The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/spa_stats.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 #include <sys/zfs_context.h>
   23 #include <sys/spa_impl.h>
   24 #include <sys/vdev_impl.h>
   25 #include <sys/spa.h>
   26 #include <zfs_comutil.h>
   27 
   28 /*
   29  * Keeps stats on last N reads per spa_t, disabled by default.
   30  */
   31 static uint_t zfs_read_history = B_FALSE;
   32 
   33 /*
   34  * Include cache hits in history, disabled by default.
   35  */
   36 static int zfs_read_history_hits = B_FALSE;
   37 
   38 /*
   39  * Keeps stats on the last 100 txgs by default.
   40  */
   41 static uint_t zfs_txg_history = 100;
   42 
   43 /*
   44  * Keeps stats on the last N MMP updates, disabled by default.
   45  */
   46 static uint_t zfs_multihost_history = B_FALSE;
   47 
   48 /*
   49  * ==========================================================================
   50  * SPA Read History Routines
   51  * ==========================================================================
   52  */
   53 
   54 /*
   55  * Read statistics - Information exported regarding each arc_read call
   56  */
   57 typedef struct spa_read_history {
   58         hrtime_t        start;          /* time read completed */
   59         uint64_t        objset;         /* read from this objset */
   60         uint64_t        object;         /* read of this object number */
   61         uint64_t        level;          /* block's indirection level */
   62         uint64_t        blkid;          /* read of this block id */
   63         char            origin[24];     /* read originated from here */
   64         uint32_t        aflags;         /* ARC flags (cached, prefetch, etc.) */
   65         pid_t           pid;            /* PID of task doing read */
   66         char            comm[16];       /* process name of task doing read */
   67         procfs_list_node_t      srh_node;
   68 } spa_read_history_t;
   69 
   70 static int
   71 spa_read_history_show_header(struct seq_file *f)
   72 {
   73         seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
   74             "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
   75             "level", "blkid", "aflags", "origin", "pid", "process");
   76 
   77         return (0);
   78 }
   79 
   80 static int
   81 spa_read_history_show(struct seq_file *f, void *data)
   82 {
   83         spa_read_history_t *srh = (spa_read_history_t *)data;
   84 
   85         seq_printf(f, "%-8llu %-16llu 0x%-6llx "
   86             "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
   87             (u_longlong_t)srh->srh_node.pln_id, srh->start,
   88             (longlong_t)srh->objset, (longlong_t)srh->object,
   89             (longlong_t)srh->level, (longlong_t)srh->blkid,
   90             srh->aflags, srh->origin, srh->pid, srh->comm);
   91 
   92         return (0);
   93 }
   94 
   95 /* Remove oldest elements from list until there are no more than 'size' left */
   96 static void
   97 spa_read_history_truncate(spa_history_list_t *shl, unsigned int size)
   98 {
   99         spa_read_history_t *srh;
  100         while (shl->size > size) {
  101                 srh = list_remove_head(&shl->procfs_list.pl_list);
  102                 ASSERT3P(srh, !=, NULL);
  103                 kmem_free(srh, sizeof (spa_read_history_t));
  104                 shl->size--;
  105         }
  106 
  107         if (size == 0)
  108                 ASSERT(list_is_empty(&shl->procfs_list.pl_list));
  109 }
  110 
  111 static int
  112 spa_read_history_clear(procfs_list_t *procfs_list)
  113 {
  114         spa_history_list_t *shl = procfs_list->pl_private;
  115         mutex_enter(&procfs_list->pl_lock);
  116         spa_read_history_truncate(shl, 0);
  117         mutex_exit(&procfs_list->pl_lock);
  118         return (0);
  119 }
  120 
  121 static void
  122 spa_read_history_init(spa_t *spa)
  123 {
  124         spa_history_list_t *shl = &spa->spa_stats.read_history;
  125 
  126         shl->size = 0;
  127         shl->procfs_list.pl_private = shl;
  128         procfs_list_install("zfs",
  129             spa_name(spa),
  130             "reads",
  131             0600,
  132             &shl->procfs_list,
  133             spa_read_history_show,
  134             spa_read_history_show_header,
  135             spa_read_history_clear,
  136             offsetof(spa_read_history_t, srh_node));
  137 }
  138 
  139 static void
  140 spa_read_history_destroy(spa_t *spa)
  141 {
  142         spa_history_list_t *shl = &spa->spa_stats.read_history;
  143         procfs_list_uninstall(&shl->procfs_list);
  144         spa_read_history_truncate(shl, 0);
  145         procfs_list_destroy(&shl->procfs_list);
  146 }
  147 
  148 void
  149 spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
  150 {
  151         spa_history_list_t *shl = &spa->spa_stats.read_history;
  152         spa_read_history_t *srh;
  153 
  154         ASSERT3P(spa, !=, NULL);
  155         ASSERT3P(zb,  !=, NULL);
  156 
  157         if (zfs_read_history == 0 && shl->size == 0)
  158                 return;
  159 
  160         if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
  161                 return;
  162 
  163         srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
  164         strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
  165         srh->start  = gethrtime();
  166         srh->objset = zb->zb_objset;
  167         srh->object = zb->zb_object;
  168         srh->level  = zb->zb_level;
  169         srh->blkid  = zb->zb_blkid;
  170         srh->aflags = aflags;
  171         srh->pid    = getpid();
  172 
  173         mutex_enter(&shl->procfs_list.pl_lock);
  174 
  175         procfs_list_add(&shl->procfs_list, srh);
  176         shl->size++;
  177 
  178         spa_read_history_truncate(shl, zfs_read_history);
  179 
  180         mutex_exit(&shl->procfs_list.pl_lock);
  181 }
  182 
  183 /*
  184  * ==========================================================================
  185  * SPA TXG History Routines
  186  * ==========================================================================
  187  */
  188 
  189 /*
  190  * Txg statistics - Information exported regarding each txg sync
  191  */
  192 
  193 typedef struct spa_txg_history {
  194         uint64_t        txg;            /* txg id */
  195         txg_state_t     state;          /* active txg state */
  196         uint64_t        nread;          /* number of bytes read */
  197         uint64_t        nwritten;       /* number of bytes written */
  198         uint64_t        reads;          /* number of read operations */
  199         uint64_t        writes;         /* number of write operations */
  200         uint64_t        ndirty;         /* number of dirty bytes */
  201         hrtime_t        times[TXG_STATE_COMMITTED]; /* completion times */
  202         procfs_list_node_t      sth_node;
  203 } spa_txg_history_t;
  204 
  205 static int
  206 spa_txg_history_show_header(struct seq_file *f)
  207 {
  208         seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s "
  209             "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
  210             "ndirty", "nread", "nwritten", "reads", "writes",
  211             "otime", "qtime", "wtime", "stime");
  212         return (0);
  213 }
  214 
  215 static int
  216 spa_txg_history_show(struct seq_file *f, void *data)
  217 {
  218         spa_txg_history_t *sth = (spa_txg_history_t *)data;
  219         uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
  220         char state;
  221 
  222         switch (sth->state) {
  223                 case TXG_STATE_BIRTH:           state = 'B';    break;
  224                 case TXG_STATE_OPEN:            state = 'O';    break;
  225                 case TXG_STATE_QUIESCED:        state = 'Q';    break;
  226                 case TXG_STATE_WAIT_FOR_SYNC:   state = 'W';    break;
  227                 case TXG_STATE_SYNCED:          state = 'S';    break;
  228                 case TXG_STATE_COMMITTED:       state = 'C';    break;
  229                 default:                        state = '?';    break;
  230         }
  231 
  232         if (sth->times[TXG_STATE_OPEN])
  233                 open = sth->times[TXG_STATE_OPEN] -
  234                     sth->times[TXG_STATE_BIRTH];
  235 
  236         if (sth->times[TXG_STATE_QUIESCED])
  237                 quiesce = sth->times[TXG_STATE_QUIESCED] -
  238                     sth->times[TXG_STATE_OPEN];
  239 
  240         if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
  241                 wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
  242                     sth->times[TXG_STATE_QUIESCED];
  243 
  244         if (sth->times[TXG_STATE_SYNCED])
  245                 sync = sth->times[TXG_STATE_SYNCED] -
  246                     sth->times[TXG_STATE_WAIT_FOR_SYNC];
  247 
  248         seq_printf(f, "%-8llu %-16llu %-5c %-12llu "
  249             "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
  250             (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
  251             (u_longlong_t)sth->ndirty,
  252             (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
  253             (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
  254             (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
  255             (u_longlong_t)sync);
  256 
  257         return (0);
  258 }
  259 
  260 /* Remove oldest elements from list until there are no more than 'size' left */
  261 static void
  262 spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size)
  263 {
  264         spa_txg_history_t *sth;
  265         while (shl->size > size) {
  266                 sth = list_remove_head(&shl->procfs_list.pl_list);
  267                 ASSERT3P(sth, !=, NULL);
  268                 kmem_free(sth, sizeof (spa_txg_history_t));
  269                 shl->size--;
  270         }
  271 
  272         if (size == 0)
  273                 ASSERT(list_is_empty(&shl->procfs_list.pl_list));
  274 
  275 }
  276 
  277 static int
  278 spa_txg_history_clear(procfs_list_t *procfs_list)
  279 {
  280         spa_history_list_t *shl = procfs_list->pl_private;
  281         mutex_enter(&procfs_list->pl_lock);
  282         spa_txg_history_truncate(shl, 0);
  283         mutex_exit(&procfs_list->pl_lock);
  284         return (0);
  285 }
  286 
  287 static void
  288 spa_txg_history_init(spa_t *spa)
  289 {
  290         spa_history_list_t *shl = &spa->spa_stats.txg_history;
  291 
  292         shl->size = 0;
  293         shl->procfs_list.pl_private = shl;
  294         procfs_list_install("zfs",
  295             spa_name(spa),
  296             "txgs",
  297             0644,
  298             &shl->procfs_list,
  299             spa_txg_history_show,
  300             spa_txg_history_show_header,
  301             spa_txg_history_clear,
  302             offsetof(spa_txg_history_t, sth_node));
  303 }
  304 
  305 static void
  306 spa_txg_history_destroy(spa_t *spa)
  307 {
  308         spa_history_list_t *shl = &spa->spa_stats.txg_history;
  309         procfs_list_uninstall(&shl->procfs_list);
  310         spa_txg_history_truncate(shl, 0);
  311         procfs_list_destroy(&shl->procfs_list);
  312 }
  313 
  314 /*
  315  * Add a new txg to historical record.
  316  */
  317 void
  318 spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
  319 {
  320         spa_history_list_t *shl = &spa->spa_stats.txg_history;
  321         spa_txg_history_t *sth;
  322 
  323         if (zfs_txg_history == 0 && shl->size == 0)
  324                 return;
  325 
  326         sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
  327         sth->txg = txg;
  328         sth->state = TXG_STATE_OPEN;
  329         sth->times[TXG_STATE_BIRTH] = birth_time;
  330 
  331         mutex_enter(&shl->procfs_list.pl_lock);
  332         procfs_list_add(&shl->procfs_list, sth);
  333         shl->size++;
  334         spa_txg_history_truncate(shl, zfs_txg_history);
  335         mutex_exit(&shl->procfs_list.pl_lock);
  336 }
  337 
  338 /*
  339  * Set txg state completion time and increment current state.
  340  */
  341 int
  342 spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
  343     hrtime_t completed_time)
  344 {
  345         spa_history_list_t *shl = &spa->spa_stats.txg_history;
  346         spa_txg_history_t *sth;
  347         int error = ENOENT;
  348 
  349         if (zfs_txg_history == 0)
  350                 return (0);
  351 
  352         mutex_enter(&shl->procfs_list.pl_lock);
  353         for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
  354             sth = list_prev(&shl->procfs_list.pl_list, sth)) {
  355                 if (sth->txg == txg) {
  356                         sth->times[completed_state] = completed_time;
  357                         sth->state++;
  358                         error = 0;
  359                         break;
  360                 }
  361         }
  362         mutex_exit(&shl->procfs_list.pl_lock);
  363 
  364         return (error);
  365 }
  366 
  367 /*
  368  * Set txg IO stats.
  369  */
  370 static int
  371 spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
  372     uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
  373 {
  374         spa_history_list_t *shl = &spa->spa_stats.txg_history;
  375         spa_txg_history_t *sth;
  376         int error = ENOENT;
  377 
  378         if (zfs_txg_history == 0)
  379                 return (0);
  380 
  381         mutex_enter(&shl->procfs_list.pl_lock);
  382         for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
  383             sth = list_prev(&shl->procfs_list.pl_list, sth)) {
  384                 if (sth->txg == txg) {
  385                         sth->nread = nread;
  386                         sth->nwritten = nwritten;
  387                         sth->reads = reads;
  388                         sth->writes = writes;
  389                         sth->ndirty = ndirty;
  390                         error = 0;
  391                         break;
  392                 }
  393         }
  394         mutex_exit(&shl->procfs_list.pl_lock);
  395 
  396         return (error);
  397 }
  398 
  399 txg_stat_t *
  400 spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
  401 {
  402         txg_stat_t *ts;
  403 
  404         if (zfs_txg_history == 0)
  405                 return (NULL);
  406 
  407         ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
  408 
  409         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
  410         vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
  411         spa_config_exit(spa, SCL_CONFIG, FTAG);
  412 
  413         ts->txg = txg;
  414         ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
  415 
  416         spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
  417 
  418         return (ts);
  419 }
  420 
  421 void
  422 spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
  423 {
  424         if (ts == NULL)
  425                 return;
  426 
  427         if (zfs_txg_history == 0) {
  428                 kmem_free(ts, sizeof (txg_stat_t));
  429                 return;
  430         }
  431 
  432         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
  433         vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
  434         spa_config_exit(spa, SCL_CONFIG, FTAG);
  435 
  436         spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
  437         spa_txg_history_set_io(spa, ts->txg,
  438             ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
  439             ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
  440             ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
  441             ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
  442             ts->ndirty);
  443 
  444         kmem_free(ts, sizeof (txg_stat_t));
  445 }
  446 
  447 /*
  448  * ==========================================================================
  449  * SPA TX Assign Histogram Routines
  450  * ==========================================================================
  451  */
  452 
  453 /*
  454  * Tx statistics - Information exported regarding dmu_tx_assign time.
  455  */
  456 
  457 /*
  458  * When the kstat is written zero all buckets.  When the kstat is read
  459  * count the number of trailing buckets set to zero and update ks_ndata
  460  * such that they are not output.
  461  */
  462 static int
  463 spa_tx_assign_update(kstat_t *ksp, int rw)
  464 {
  465         spa_t *spa = ksp->ks_private;
  466         spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
  467         int i;
  468 
  469         if (rw == KSTAT_WRITE) {
  470                 for (i = 0; i < shk->count; i++)
  471                         ((kstat_named_t *)shk->priv)[i].value.ui64 = 0;
  472         }
  473 
  474         for (i = shk->count; i > 0; i--)
  475                 if (((kstat_named_t *)shk->priv)[i-1].value.ui64 != 0)
  476                         break;
  477 
  478         ksp->ks_ndata = i;
  479         ksp->ks_data_size = i * sizeof (kstat_named_t);
  480 
  481         return (0);
  482 }
  483 
  484 static void
  485 spa_tx_assign_init(spa_t *spa)
  486 {
  487         spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
  488         char *name;
  489         kstat_named_t *ks;
  490         kstat_t *ksp;
  491         int i;
  492 
  493         mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
  494 
  495         shk->count = 42; /* power of two buckets for 1ns to 2,199s */
  496         shk->size = shk->count * sizeof (kstat_named_t);
  497         shk->priv = kmem_alloc(shk->size, KM_SLEEP);
  498 
  499         name = kmem_asprintf("zfs/%s", spa_name(spa));
  500 
  501         for (i = 0; i < shk->count; i++) {
  502                 ks = &((kstat_named_t *)shk->priv)[i];
  503                 ks->data_type = KSTAT_DATA_UINT64;
  504                 ks->value.ui64 = 0;
  505                 (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
  506                     (u_longlong_t)1 << i);
  507         }
  508 
  509         ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
  510             KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
  511         shk->kstat = ksp;
  512 
  513         if (ksp) {
  514                 ksp->ks_lock = &shk->lock;
  515                 ksp->ks_data = shk->priv;
  516                 ksp->ks_ndata = shk->count;
  517                 ksp->ks_data_size = shk->size;
  518                 ksp->ks_private = spa;
  519                 ksp->ks_update = spa_tx_assign_update;
  520                 kstat_install(ksp);
  521         }
  522         kmem_strfree(name);
  523 }
  524 
  525 static void
  526 spa_tx_assign_destroy(spa_t *spa)
  527 {
  528         spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
  529         kstat_t *ksp;
  530 
  531         ksp = shk->kstat;
  532         if (ksp)
  533                 kstat_delete(ksp);
  534 
  535         kmem_free(shk->priv, shk->size);
  536         mutex_destroy(&shk->lock);
  537 }
  538 
  539 void
  540 spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
  541 {
  542         spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
  543         uint64_t idx = 0;
  544 
  545         while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
  546                 idx++;
  547 
  548         atomic_inc_64(&((kstat_named_t *)shk->priv)[idx].value.ui64);
  549 }
  550 
  551 /*
  552  * ==========================================================================
  553  * SPA MMP History Routines
  554  * ==========================================================================
  555  */
  556 
  557 /*
  558  * MMP statistics - Information exported regarding attempted MMP writes
  559  *   For MMP writes issued, fields used as per comments below.
  560  *   For MMP writes skipped, an entry represents a span of time when
  561  *      writes were skipped for same reason (error from mmp_random_leaf).
  562  *      Differences are:
  563  *      timestamp       time first write skipped, if >1 skipped in a row
  564  *      mmp_delay       delay value at timestamp
  565  *      vdev_guid       number of writes skipped
  566  *      io_error        one of enum mmp_error
  567  *      duration        time span (ns) of skipped writes
  568  */
  569 
  570 typedef struct spa_mmp_history {
  571         uint64_t        mmp_node_id;    /* unique # for updates */
  572         uint64_t        txg;            /* txg of last sync */
  573         uint64_t        timestamp;      /* UTC time MMP write issued */
  574         uint64_t        mmp_delay;      /* mmp_thread.mmp_delay at timestamp */
  575         uint64_t        vdev_guid;      /* unique ID of leaf vdev */
  576         char            *vdev_path;
  577         int             vdev_label;     /* vdev label */
  578         int             io_error;       /* error status of MMP write */
  579         hrtime_t        error_start;    /* hrtime of start of error period */
  580         hrtime_t        duration;       /* time from submission to completion */
  581         procfs_list_node_t      smh_node;
  582 } spa_mmp_history_t;
  583 
  584 static int
  585 spa_mmp_history_show_header(struct seq_file *f)
  586 {
  587         seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
  588             "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
  589             "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
  590         return (0);
  591 }
  592 
  593 static int
  594 spa_mmp_history_show(struct seq_file *f, void *data)
  595 {
  596         spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
  597         char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
  598             "%-10lld %s\n";
  599         char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
  600             "%-10lld %s\n";
  601 
  602         seq_printf(f, (smh->error_start ? skip_fmt : write_fmt),
  603             (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg,
  604             (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
  605             (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
  606             (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
  607             (smh->vdev_path ? smh->vdev_path : "-"));
  608 
  609         return (0);
  610 }
  611 
  612 /* Remove oldest elements from list until there are no more than 'size' left */
  613 static void
  614 spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size)
  615 {
  616         spa_mmp_history_t *smh;
  617         while (shl->size > size) {
  618                 smh = list_remove_head(&shl->procfs_list.pl_list);
  619                 if (smh->vdev_path)
  620                         kmem_strfree(smh->vdev_path);
  621                 kmem_free(smh, sizeof (spa_mmp_history_t));
  622                 shl->size--;
  623         }
  624 
  625         if (size == 0)
  626                 ASSERT(list_is_empty(&shl->procfs_list.pl_list));
  627 
  628 }
  629 
  630 static int
  631 spa_mmp_history_clear(procfs_list_t *procfs_list)
  632 {
  633         spa_history_list_t *shl = procfs_list->pl_private;
  634         mutex_enter(&procfs_list->pl_lock);
  635         spa_mmp_history_truncate(shl, 0);
  636         mutex_exit(&procfs_list->pl_lock);
  637         return (0);
  638 }
  639 
  640 static void
  641 spa_mmp_history_init(spa_t *spa)
  642 {
  643         spa_history_list_t *shl = &spa->spa_stats.mmp_history;
  644 
  645         shl->size = 0;
  646 
  647         shl->procfs_list.pl_private = shl;
  648         procfs_list_install("zfs",
  649             spa_name(spa),
  650             "multihost",
  651             0644,
  652             &shl->procfs_list,
  653             spa_mmp_history_show,
  654             spa_mmp_history_show_header,
  655             spa_mmp_history_clear,
  656             offsetof(spa_mmp_history_t, smh_node));
  657 }
  658 
  659 static void
  660 spa_mmp_history_destroy(spa_t *spa)
  661 {
  662         spa_history_list_t *shl = &spa->spa_stats.mmp_history;
  663         procfs_list_uninstall(&shl->procfs_list);
  664         spa_mmp_history_truncate(shl, 0);
  665         procfs_list_destroy(&shl->procfs_list);
  666 }
  667 
  668 /*
  669  * Set duration in existing "skip" record to how long we have waited for a leaf
  670  * vdev to become available.
  671  *
  672  * Important that we start search at the tail of the list where new
  673  * records are inserted, so this is normally an O(1) operation.
  674  */
  675 int
  676 spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
  677 {
  678         spa_history_list_t *shl = &spa->spa_stats.mmp_history;
  679         spa_mmp_history_t *smh;
  680         int error = ENOENT;
  681 
  682         if (zfs_multihost_history == 0 && shl->size == 0)
  683                 return (0);
  684 
  685         mutex_enter(&shl->procfs_list.pl_lock);
  686         for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
  687             smh = list_prev(&shl->procfs_list.pl_list, smh)) {
  688                 if (smh->mmp_node_id == mmp_node_id) {
  689                         ASSERT3U(smh->io_error, !=, 0);
  690                         smh->duration = gethrtime() - smh->error_start;
  691                         smh->vdev_guid++;
  692                         error = 0;
  693                         break;
  694                 }
  695         }
  696         mutex_exit(&shl->procfs_list.pl_lock);
  697 
  698         return (error);
  699 }
  700 
  701 /*
  702  * Set MMP write duration and error status in existing record.
  703  * See comment re: search order above spa_mmp_history_set_skip().
  704  */
  705 int
  706 spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
  707     hrtime_t duration)
  708 {
  709         spa_history_list_t *shl = &spa->spa_stats.mmp_history;
  710         spa_mmp_history_t *smh;
  711         int error = ENOENT;
  712 
  713         if (zfs_multihost_history == 0 && shl->size == 0)
  714                 return (0);
  715 
  716         mutex_enter(&shl->procfs_list.pl_lock);
  717         for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
  718             smh = list_prev(&shl->procfs_list.pl_list, smh)) {
  719                 if (smh->mmp_node_id == mmp_node_id) {
  720                         ASSERT(smh->io_error == 0);
  721                         smh->io_error = io_error;
  722                         smh->duration = duration;
  723                         error = 0;
  724                         break;
  725                 }
  726         }
  727         mutex_exit(&shl->procfs_list.pl_lock);
  728 
  729         return (error);
  730 }
  731 
  732 /*
  733  * Add a new MMP historical record.
  734  * error == 0 : a write was issued.
  735  * error != 0 : a write was not issued because no leaves were found.
  736  */
  737 void
  738 spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
  739     uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
  740     int error)
  741 {
  742         spa_history_list_t *shl = &spa->spa_stats.mmp_history;
  743         spa_mmp_history_t *smh;
  744 
  745         if (zfs_multihost_history == 0 && shl->size == 0)
  746                 return;
  747 
  748         smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
  749         smh->txg = txg;
  750         smh->timestamp = timestamp;
  751         smh->mmp_delay = mmp_delay;
  752         if (vd) {
  753                 smh->vdev_guid = vd->vdev_guid;
  754                 if (vd->vdev_path)
  755                         smh->vdev_path = kmem_strdup(vd->vdev_path);
  756         }
  757         smh->vdev_label = label;
  758         smh->mmp_node_id = mmp_node_id;
  759 
  760         if (error) {
  761                 smh->io_error = error;
  762                 smh->error_start = gethrtime();
  763                 smh->vdev_guid = 1;
  764         }
  765 
  766         mutex_enter(&shl->procfs_list.pl_lock);
  767         procfs_list_add(&shl->procfs_list, smh);
  768         shl->size++;
  769         spa_mmp_history_truncate(shl, zfs_multihost_history);
  770         mutex_exit(&shl->procfs_list.pl_lock);
  771 }
  772 
  773 static void *
  774 spa_state_addr(kstat_t *ksp, loff_t n)
  775 {
  776         if (n == 0)
  777                 return (ksp->ks_private);       /* return the spa_t */
  778         return (NULL);
  779 }
  780 
  781 static int
  782 spa_state_data(char *buf, size_t size, void *data)
  783 {
  784         spa_t *spa = (spa_t *)data;
  785         (void) snprintf(buf, size, "%s\n", spa_state_to_name(spa));
  786         return (0);
  787 }
  788 
  789 /*
  790  * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
  791  *
  792  * This is a lock-less read of the pool's state (unlike using 'zpool', which
  793  * can potentially block for seconds).  Because it doesn't block, it can useful
  794  * as a pool heartbeat value.
  795  */
  796 static void
  797 spa_state_init(spa_t *spa)
  798 {
  799         spa_history_kstat_t *shk = &spa->spa_stats.state;
  800         char *name;
  801         kstat_t *ksp;
  802 
  803         mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
  804 
  805         name = kmem_asprintf("zfs/%s", spa_name(spa));
  806         ksp = kstat_create(name, 0, "state", "misc",
  807             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
  808 
  809         shk->kstat = ksp;
  810         if (ksp) {
  811                 ksp->ks_lock = &shk->lock;
  812                 ksp->ks_data = NULL;
  813                 ksp->ks_private = spa;
  814                 ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
  815                 kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr);
  816                 kstat_install(ksp);
  817         }
  818 
  819         kmem_strfree(name);
  820 }
  821 
  822 static int
  823 spa_guid_data(char *buf, size_t size, void *data)
  824 {
  825         spa_t *spa = (spa_t *)data;
  826         (void) snprintf(buf, size, "%llu\n", (u_longlong_t)spa_guid(spa));
  827         return (0);
  828 }
  829 
  830 static void
  831 spa_guid_init(spa_t *spa)
  832 {
  833         spa_history_kstat_t *shk = &spa->spa_stats.guid;
  834         char *name;
  835         kstat_t *ksp;
  836 
  837         mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
  838 
  839         name = kmem_asprintf("zfs/%s", spa_name(spa));
  840 
  841         ksp = kstat_create(name, 0, "guid", "misc",
  842             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
  843 
  844         shk->kstat = ksp;
  845         if (ksp) {
  846                 ksp->ks_lock = &shk->lock;
  847                 ksp->ks_data = NULL;
  848                 ksp->ks_private = spa;
  849                 ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
  850                 kstat_set_raw_ops(ksp, NULL, spa_guid_data, spa_state_addr);
  851                 kstat_install(ksp);
  852         }
  853 
  854         kmem_strfree(name);
  855 }
  856 
  857 static void
  858 spa_health_destroy(spa_t *spa)
  859 {
  860         spa_history_kstat_t *shk = &spa->spa_stats.state;
  861         kstat_t *ksp = shk->kstat;
  862         if (ksp)
  863                 kstat_delete(ksp);
  864 
  865         mutex_destroy(&shk->lock);
  866 }
  867 
  868 static void
  869 spa_guid_destroy(spa_t *spa)
  870 {
  871         spa_history_kstat_t *shk = &spa->spa_stats.guid;
  872         kstat_t *ksp = shk->kstat;
  873         if (ksp)
  874                 kstat_delete(ksp);
  875 
  876         mutex_destroy(&shk->lock);
  877 }
  878 
  879 static const spa_iostats_t spa_iostats_template = {
  880         { "trim_extents_written",               KSTAT_DATA_UINT64 },
  881         { "trim_bytes_written",                 KSTAT_DATA_UINT64 },
  882         { "trim_extents_skipped",               KSTAT_DATA_UINT64 },
  883         { "trim_bytes_skipped",                 KSTAT_DATA_UINT64 },
  884         { "trim_extents_failed",                KSTAT_DATA_UINT64 },
  885         { "trim_bytes_failed",                  KSTAT_DATA_UINT64 },
  886         { "autotrim_extents_written",           KSTAT_DATA_UINT64 },
  887         { "autotrim_bytes_written",             KSTAT_DATA_UINT64 },
  888         { "autotrim_extents_skipped",           KSTAT_DATA_UINT64 },
  889         { "autotrim_bytes_skipped",             KSTAT_DATA_UINT64 },
  890         { "autotrim_extents_failed",            KSTAT_DATA_UINT64 },
  891         { "autotrim_bytes_failed",              KSTAT_DATA_UINT64 },
  892         { "simple_trim_extents_written",        KSTAT_DATA_UINT64 },
  893         { "simple_trim_bytes_written",          KSTAT_DATA_UINT64 },
  894         { "simple_trim_extents_skipped",        KSTAT_DATA_UINT64 },
  895         { "simple_trim_bytes_skipped",          KSTAT_DATA_UINT64 },
  896         { "simple_trim_extents_failed",         KSTAT_DATA_UINT64 },
  897         { "simple_trim_bytes_failed",           KSTAT_DATA_UINT64 },
  898 };
  899 
  900 #define SPA_IOSTATS_ADD(stat, val) \
  901     atomic_add_64(&iostats->stat.value.ui64, (val));
  902 
  903 void
  904 spa_iostats_trim_add(spa_t *spa, trim_type_t type,
  905     uint64_t extents_written, uint64_t bytes_written,
  906     uint64_t extents_skipped, uint64_t bytes_skipped,
  907     uint64_t extents_failed, uint64_t bytes_failed)
  908 {
  909         spa_history_kstat_t *shk = &spa->spa_stats.iostats;
  910         kstat_t *ksp = shk->kstat;
  911         spa_iostats_t *iostats;
  912 
  913         if (ksp == NULL)
  914                 return;
  915 
  916         iostats = ksp->ks_data;
  917         if (type == TRIM_TYPE_MANUAL) {
  918                 SPA_IOSTATS_ADD(trim_extents_written, extents_written);
  919                 SPA_IOSTATS_ADD(trim_bytes_written, bytes_written);
  920                 SPA_IOSTATS_ADD(trim_extents_skipped, extents_skipped);
  921                 SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped);
  922                 SPA_IOSTATS_ADD(trim_extents_failed, extents_failed);
  923                 SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed);
  924         } else if (type == TRIM_TYPE_AUTO) {
  925                 SPA_IOSTATS_ADD(autotrim_extents_written, extents_written);
  926                 SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written);
  927                 SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped);
  928                 SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped);
  929                 SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed);
  930                 SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed);
  931         } else {
  932                 SPA_IOSTATS_ADD(simple_trim_extents_written, extents_written);
  933                 SPA_IOSTATS_ADD(simple_trim_bytes_written, bytes_written);
  934                 SPA_IOSTATS_ADD(simple_trim_extents_skipped, extents_skipped);
  935                 SPA_IOSTATS_ADD(simple_trim_bytes_skipped, bytes_skipped);
  936                 SPA_IOSTATS_ADD(simple_trim_extents_failed, extents_failed);
  937                 SPA_IOSTATS_ADD(simple_trim_bytes_failed, bytes_failed);
  938         }
  939 }
  940 
  941 static int
  942 spa_iostats_update(kstat_t *ksp, int rw)
  943 {
  944         if (rw == KSTAT_WRITE) {
  945                 memcpy(ksp->ks_data, &spa_iostats_template,
  946                     sizeof (spa_iostats_t));
  947         }
  948 
  949         return (0);
  950 }
  951 
  952 static void
  953 spa_iostats_init(spa_t *spa)
  954 {
  955         spa_history_kstat_t *shk = &spa->spa_stats.iostats;
  956 
  957         mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
  958 
  959         char *name = kmem_asprintf("zfs/%s", spa_name(spa));
  960         kstat_t *ksp = kstat_create(name, 0, "iostats", "misc",
  961             KSTAT_TYPE_NAMED, sizeof (spa_iostats_t) / sizeof (kstat_named_t),
  962             KSTAT_FLAG_VIRTUAL);
  963 
  964         shk->kstat = ksp;
  965         if (ksp) {
  966                 int size = sizeof (spa_iostats_t);
  967                 ksp->ks_lock = &shk->lock;
  968                 ksp->ks_private = spa;
  969                 ksp->ks_update = spa_iostats_update;
  970                 ksp->ks_data = kmem_alloc(size, KM_SLEEP);
  971                 memcpy(ksp->ks_data, &spa_iostats_template, size);
  972                 kstat_install(ksp);
  973         }
  974 
  975         kmem_strfree(name);
  976 }
  977 
  978 static void
  979 spa_iostats_destroy(spa_t *spa)
  980 {
  981         spa_history_kstat_t *shk = &spa->spa_stats.iostats;
  982         kstat_t *ksp = shk->kstat;
  983         if (ksp) {
  984                 kmem_free(ksp->ks_data, sizeof (spa_iostats_t));
  985                 kstat_delete(ksp);
  986         }
  987 
  988         mutex_destroy(&shk->lock);
  989 }
  990 
  991 void
  992 spa_stats_init(spa_t *spa)
  993 {
  994         spa_read_history_init(spa);
  995         spa_txg_history_init(spa);
  996         spa_tx_assign_init(spa);
  997         spa_mmp_history_init(spa);
  998         spa_state_init(spa);
  999         spa_guid_init(spa);
 1000         spa_iostats_init(spa);
 1001 }
 1002 
 1003 void
 1004 spa_stats_destroy(spa_t *spa)
 1005 {
 1006         spa_iostats_destroy(spa);
 1007         spa_health_destroy(spa);
 1008         spa_tx_assign_destroy(spa);
 1009         spa_txg_history_destroy(spa);
 1010         spa_read_history_destroy(spa);
 1011         spa_mmp_history_destroy(spa);
 1012         spa_guid_destroy(spa);
 1013 }
 1014 
 1015 ZFS_MODULE_PARAM(zfs, zfs_, read_history, UINT, ZMOD_RW,
 1016         "Historical statistics for the last N reads");
 1017 
 1018 ZFS_MODULE_PARAM(zfs, zfs_, read_history_hits, INT, ZMOD_RW,
 1019         "Include cache hits in read history");
 1020 
 1021 ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, history, UINT, ZMOD_RW,
 1022         "Historical statistics for the last N txgs");
 1023 
 1024 ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, history, UINT, ZMOD_RW,
 1025         "Historical statistics for last N multihost writes");

Cache object: e720df4574f31182830a4d0bd4d53ef7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.