The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Gather top-level ZFS pool and resilver/scan statistics and print using
    3  * influxdb line protocol
    4  * usage: [options] [pool_name]
    5  * where options are:
    6  *   --execd, -e           run in telegraf execd input plugin mode, [CR] on
    7  *                         stdin causes a sample to be printed and wait for
    8  *                         the next [CR]
    9  *   --no-histograms, -n   don't print histogram data (reduces cardinality
   10  *                         if you don't care about histograms)
   11  *   --sum-histogram-buckets, -s sum histogram bucket values
   12  *
   13  * To integrate into telegraf use one of:
   14  * 1. the `inputs.execd` plugin with the `--execd` option
   15  * 2. the `inputs.exec` plugin to simply run with no options
   16  *
   17  * NOTE: libzfs is an unstable interface. YMMV.
   18  *
   19  * The design goals of this software include:
   20  * + be as lightweight as possible
   21  * + reduce the number of external dependencies as far as possible, hence
   22  *   there is no dependency on a client library for managing the metric
   23  *   collection -- info is printed, KISS
   24  * + broken pools or kernel bugs can cause this process to hang in an
   25  *   unkillable state. For this reason, it is best to keep the damage limited
   26  *   to a small process like zpool_influxdb rather than a larger collector.
   27  *
   28  * Copyright 2018-2020 Richard Elling
   29  *
   30  * This software is dual-licensed MIT and CDDL.
   31  *
   32  * The MIT License (MIT)
   33  *
   34  * Permission is hereby granted, free of charge, to any person obtaining a copy
   35  * of this software and associated documentation files (the "Software"), to deal
   36  * in the Software without restriction, including without limitation the rights
   37  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   38  * copies of the Software, and to permit persons to whom the Software is
   39  * furnished to do so, subject to the following conditions:
   40  *
   41  * The above copyright notice and this permission notice shall be included in
   42  * all copies or substantial portions of the Software.
   43  *
   44  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   45  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   46  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   47  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   48  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   49  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   50  * SOFTWARE.
   51  *
   52  * CDDL HEADER START
   53  *
   54  * The contents of this file are subject to the terms of the
   55  * Common Development and Distribution License (the "License").
   56  * You may not use this file except in compliance with the License.
   57  *
   58  * The contents of this file are subject to the terms of the
   59  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
   60  * You can obtain a copy of the license from the top-level file
   61  * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
   62  * You may not use this file except in compliance with the license.
   63  *
   64  * See the License for the specific language governing permissions
   65  * and limitations under the License.
   66  *
   67  * CDDL HEADER END
   68  */
   69 #include <string.h>
   70 #include <getopt.h>
   71 #include <stdio.h>
   72 #include <stdint.h>
   73 #include <inttypes.h>
   74 #include <libzfs.h>
   75 
   76 #define POOL_MEASUREMENT        "zpool_stats"
   77 #define SCAN_MEASUREMENT        "zpool_scan_stats"
   78 #define VDEV_MEASUREMENT        "zpool_vdev_stats"
   79 #define POOL_LATENCY_MEASUREMENT        "zpool_latency"
   80 #define POOL_QUEUE_MEASUREMENT  "zpool_vdev_queue"
   81 #define MIN_LAT_INDEX   10  /* minimum latency index 10 = 1024ns */
   82 #define POOL_IO_SIZE_MEASUREMENT        "zpool_io_size"
   83 #define MIN_SIZE_INDEX  9  /* minimum size index 9 = 512 bytes */
   84 
   85 /* global options */
   86 int execd_mode = 0;
   87 int no_histograms = 0;
   88 int sum_histogram_buckets = 0;
   89 char metric_data_type = 'u';
   90 uint64_t metric_value_mask = UINT64_MAX;
   91 uint64_t timestamp = 0;
   92 int complained_about_sync = 0;
   93 const char *tags = "";
   94 
   95 typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *);
   96 
   97 /*
   98  * influxdb line protocol rules for escaping are important because the
   99  * zpool name can include characters that need to be escaped
  100  *
  101  * caller is responsible for freeing result
  102  */
  103 static char *
  104 escape_string(const char *s)
  105 {
  106         const char *c;
  107         char *d;
  108         char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2);
  109         if (t == NULL) {
  110                 fprintf(stderr, "error: cannot allocate memory\n");
  111                 exit(1);
  112         }
  113 
  114         for (c = s, d = t; *c != '\0'; c++, d++) {
  115                 switch (*c) {
  116                 case ' ':
  117                 case ',':
  118                 case '=':
  119                 case '\\':
  120                         *d++ = '\\';
  121                         zfs_fallthrough;
  122                 default:
  123                         *d = *c;
  124                 }
  125         }
  126         *d = '\0';
  127         return (t);
  128 }
  129 
  130 /*
  131  * print key=value where value is a uint64_t
  132  */
  133 static void
  134 print_kv(const char *key, uint64_t value)
  135 {
  136         printf("%s=%llu%c", key,
  137             (u_longlong_t)value & metric_value_mask, metric_data_type);
  138 }
  139 
  140 /*
  141  * print_scan_status() prints the details as often seen in the "zpool status"
  142  * output. However, unlike the zpool command, which is intended for humans,
  143  * this output is suitable for long-term tracking in influxdb.
  144  * TODO: update to include issued scan data
  145  */
  146 static int
  147 print_scan_status(nvlist_t *nvroot, const char *pool_name)
  148 {
  149         uint_t c;
  150         int64_t elapsed;
  151         uint64_t examined, pass_exam, paused_time, paused_ts, rate;
  152         uint64_t remaining_time;
  153         pool_scan_stat_t *ps = NULL;
  154         double pct_done;
  155         const char *const state[DSS_NUM_STATES] = {
  156             "none", "scanning", "finished", "canceled"};
  157         const char *func;
  158 
  159         (void) nvlist_lookup_uint64_array(nvroot,
  160             ZPOOL_CONFIG_SCAN_STATS,
  161             (uint64_t **)&ps, &c);
  162 
  163         /*
  164          * ignore if there are no stats
  165          */
  166         if (ps == NULL)
  167                 return (0);
  168 
  169         /*
  170          * return error if state is bogus
  171          */
  172         if (ps->pss_state >= DSS_NUM_STATES ||
  173             ps->pss_func >= POOL_SCAN_FUNCS) {
  174                 if (complained_about_sync % 1000 == 0) {
  175                         fprintf(stderr, "error: cannot decode scan stats: "
  176                             "ZFS is out of sync with compiled zpool_influxdb");
  177                         complained_about_sync++;
  178                 }
  179                 return (1);
  180         }
  181 
  182         switch (ps->pss_func) {
  183         case POOL_SCAN_NONE:
  184                 func = "none_requested";
  185                 break;
  186         case POOL_SCAN_SCRUB:
  187                 func = "scrub";
  188                 break;
  189         case POOL_SCAN_RESILVER:
  190                 func = "resilver";
  191                 break;
  192 #ifdef POOL_SCAN_REBUILD
  193         case POOL_SCAN_REBUILD:
  194                 func = "rebuild";
  195                 break;
  196 #endif
  197         default:
  198                 func = "scan";
  199         }
  200 
  201         /* overall progress */
  202         examined = ps->pss_examined ? ps->pss_examined : 1;
  203         pct_done = 0.0;
  204         if (ps->pss_to_examine > 0)
  205                 pct_done = 100.0 * examined / ps->pss_to_examine;
  206 
  207 #ifdef EZFS_SCRUB_PAUSED
  208         paused_ts = ps->pss_pass_scrub_pause;
  209         paused_time = ps->pss_pass_scrub_spent_paused;
  210 #else
  211         paused_ts = 0;
  212         paused_time = 0;
  213 #endif
  214 
  215         /* calculations for this pass */
  216         if (ps->pss_state == DSS_SCANNING) {
  217                 elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start -
  218                     (int64_t)paused_time;
  219                 elapsed = (elapsed > 0) ? elapsed : 1;
  220                 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
  221                 rate = pass_exam / elapsed;
  222                 rate = (rate > 0) ? rate : 1;
  223                 remaining_time = ps->pss_to_examine - examined / rate;
  224         } else {
  225                 elapsed =
  226                     (int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start -
  227                     (int64_t)paused_time;
  228                 elapsed = (elapsed > 0) ? elapsed : 1;
  229                 pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
  230                 rate = pass_exam / elapsed;
  231                 remaining_time = 0;
  232         }
  233         rate = rate ? rate : 1;
  234 
  235         /* influxdb line protocol format: "tags metrics timestamp" */
  236         printf("%s%s,function=%s,name=%s,state=%s ",
  237             SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]);
  238         print_kv("end_ts", ps->pss_end_time);
  239         print_kv(",errors", ps->pss_errors);
  240         print_kv(",examined", examined);
  241         print_kv(",issued", ps->pss_issued);
  242         print_kv(",pass_examined", pass_exam);
  243         print_kv(",pass_issued", ps->pss_pass_issued);
  244         print_kv(",paused_ts", paused_ts);
  245         print_kv(",paused_t", paused_time);
  246         printf(",pct_done=%.2f", pct_done);
  247         print_kv(",processed", ps->pss_processed);
  248         print_kv(",rate", rate);
  249         print_kv(",remaining_t", remaining_time);
  250         print_kv(",start_ts", ps->pss_start_time);
  251         print_kv(",to_examine", ps->pss_to_examine);
  252         print_kv(",to_process", ps->pss_to_process);
  253         printf(" %llu\n", (u_longlong_t)timestamp);
  254         return (0);
  255 }
  256 
  257 /*
  258  * get a vdev name that corresponds to the top-level vdev names
  259  * printed by `zpool status`
  260  */
  261 static char *
  262 get_vdev_name(nvlist_t *nvroot, const char *parent_name)
  263 {
  264         static char vdev_name[256];
  265         uint64_t vdev_id = 0;
  266 
  267         char *vdev_type = (char *)"unknown";
  268         (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type);
  269 
  270         if (nvlist_lookup_uint64(
  271             nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0)
  272                 vdev_id = UINT64_MAX;
  273 
  274         if (parent_name == NULL) {
  275                 (void) snprintf(vdev_name, sizeof (vdev_name), "%s",
  276                     vdev_type);
  277         } else {
  278                 (void) snprintf(vdev_name, sizeof (vdev_name),
  279                     "%.220s/%s-%llu",
  280                     parent_name, vdev_type, (u_longlong_t)vdev_id);
  281         }
  282         return (vdev_name);
  283 }
  284 
  285 /*
  286  * get a string suitable for an influxdb tag that describes this vdev
  287  *
  288  * By default only the vdev hierarchical name is shown, separated by '/'
  289  * If the vdev has an associated path, which is typical of leaf vdevs,
  290  * then the path is added.
  291  * It would be nice to have the devid instead of the path, but under
  292  * Linux we cannot be sure a devid will exist and we'd rather have
  293  * something than nothing, so we'll use path instead.
  294  */
  295 static char *
  296 get_vdev_desc(nvlist_t *nvroot, const char *parent_name)
  297 {
  298         static char vdev_desc[2 * MAXPATHLEN];
  299         char vdev_value[MAXPATHLEN];
  300         char *s, *t;
  301 
  302         char *vdev_type = (char *)"unknown";
  303         uint64_t vdev_id = UINT64_MAX;
  304         char *vdev_path = NULL;
  305         (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type);
  306         (void) nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id);
  307         (void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &vdev_path);
  308 
  309         if (parent_name == NULL) {
  310                 s = escape_string(vdev_type);
  311                 (void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s);
  312                 free(s);
  313         } else {
  314                 s = escape_string((char *)parent_name);
  315                 t = escape_string(vdev_type);
  316                 (void) snprintf(vdev_value, sizeof (vdev_value),
  317                     "vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id);
  318                 free(s);
  319                 free(t);
  320         }
  321         if (vdev_path == NULL) {
  322                 (void) snprintf(vdev_desc, sizeof (vdev_desc), "%s",
  323                     vdev_value);
  324         } else {
  325                 s = escape_string(vdev_path);
  326                 (void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s",
  327                     s, vdev_value);
  328                 free(s);
  329         }
  330         return (vdev_desc);
  331 }
  332 
  333 /*
  334  * vdev summary stats are a combination of the data shown by
  335  * `zpool status` and `zpool list -v`
  336  */
  337 static int
  338 print_summary_stats(nvlist_t *nvroot, const char *pool_name,
  339     const char *parent_name)
  340 {
  341         uint_t c;
  342         vdev_stat_t *vs;
  343         char *vdev_desc = NULL;
  344         vdev_desc = get_vdev_desc(nvroot, parent_name);
  345         if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
  346             (uint64_t **)&vs, &c) != 0) {
  347                 return (1);
  348         }
  349         printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags,
  350             pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state,
  351             (vdev_aux_t)vs->vs_aux), vdev_desc);
  352         print_kv("alloc", vs->vs_alloc);
  353         print_kv(",free", vs->vs_space - vs->vs_alloc);
  354         print_kv(",size", vs->vs_space);
  355         print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]);
  356         print_kv(",read_errors", vs->vs_read_errors);
  357         print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]);
  358         print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]);
  359         print_kv(",write_errors", vs->vs_write_errors);
  360         print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]);
  361         print_kv(",checksum_errors", vs->vs_checksum_errors);
  362         print_kv(",fragmentation", vs->vs_fragmentation);
  363         printf(" %llu\n", (u_longlong_t)timestamp);
  364         return (0);
  365 }
  366 
  367 /*
  368  * vdev latency stats are histograms stored as nvlist arrays of uint64.
  369  * Latency stats include the ZIO scheduler classes plus lower-level
  370  * vdev latencies.
  371  *
  372  * In many cases, the top-level "root" view obscures the underlying
  373  * top-level vdev operations. For example, if a pool has a log, special,
  374  * or cache device, then each can behave very differently. It is useful
  375  * to see how each is responding.
  376  */
  377 static int
  378 print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name,
  379     const char *parent_name)
  380 {
  381         uint_t c, end = 0;
  382         nvlist_t *nv_ex;
  383         char *vdev_desc = NULL;
  384 
  385         /* short_names become part of the metric name and are influxdb-ready */
  386         struct lat_lookup {
  387             const char *name;
  388             const char *short_name;
  389             uint64_t sum;
  390             uint64_t *array;
  391         };
  392         struct lat_lookup lat_type[] = {
  393             {ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,   "total_read", 0},
  394             {ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,   "total_write", 0},
  395             {ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,  "disk_read", 0},
  396             {ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,  "disk_write", 0},
  397             {ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,  "sync_read", 0},
  398             {ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,  "sync_write", 0},
  399             {ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0},
  400             {ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0},
  401             {ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,   "scrub", 0},
  402 #ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO
  403             {ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO,    "trim", 0},
  404 #endif
  405             {ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO,    "rebuild", 0},
  406             {NULL,      NULL}
  407         };
  408 
  409         if (nvlist_lookup_nvlist(nvroot,
  410             ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
  411                 return (6);
  412         }
  413 
  414         vdev_desc = get_vdev_desc(nvroot, parent_name);
  415 
  416         for (int i = 0; lat_type[i].name; i++) {
  417                 if (nvlist_lookup_uint64_array(nv_ex,
  418                     lat_type[i].name, &lat_type[i].array, &c) != 0) {
  419                         fprintf(stderr, "error: can't get %s\n",
  420                             lat_type[i].name);
  421                         return (3);
  422                 }
  423                 /* end count count, all of the arrays are the same size */
  424                 end = c - 1;
  425         }
  426 
  427         for (int bucket = 0; bucket <= end; bucket++) {
  428                 if (bucket < MIN_LAT_INDEX) {
  429                         /* don't print, but collect the sum */
  430                         for (int i = 0; lat_type[i].name; i++) {
  431                                 lat_type[i].sum += lat_type[i].array[bucket];
  432                         }
  433                         continue;
  434                 }
  435                 if (bucket < end) {
  436                         printf("%s%s,le=%0.6f,name=%s,%s ",
  437                             POOL_LATENCY_MEASUREMENT, tags,
  438                             (float)(1ULL << bucket) * 1e-9,
  439                             pool_name, vdev_desc);
  440                 } else {
  441                         printf("%s%s,le=+Inf,name=%s,%s ",
  442                             POOL_LATENCY_MEASUREMENT, tags, pool_name,
  443                             vdev_desc);
  444                 }
  445                 for (int i = 0; lat_type[i].name; i++) {
  446                         if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) {
  447                                 lat_type[i].sum += lat_type[i].array[bucket];
  448                         } else {
  449                                 lat_type[i].sum = lat_type[i].array[bucket];
  450                         }
  451                         print_kv(lat_type[i].short_name, lat_type[i].sum);
  452                         if (lat_type[i + 1].name != NULL) {
  453                                 printf(",");
  454                         }
  455                 }
  456                 printf(" %llu\n", (u_longlong_t)timestamp);
  457         }
  458         return (0);
  459 }
  460 
  461 /*
  462  * vdev request size stats are histograms stored as nvlist arrays of uint64.
  463  * Request size stats include the ZIO scheduler classes plus lower-level
  464  * vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported.
  465  *
  466  * In many cases, the top-level "root" view obscures the underlying
  467  * top-level vdev operations. For example, if a pool has a log, special,
  468  * or cache device, then each can behave very differently. It is useful
  469  * to see how each is responding.
  470  */
  471 static int
  472 print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name,
  473     const char *parent_name)
  474 {
  475         uint_t c, end = 0;
  476         nvlist_t *nv_ex;
  477         char *vdev_desc = NULL;
  478 
  479         /* short_names become the field name */
  480         struct size_lookup {
  481             const char *name;
  482             const char *short_name;
  483             uint64_t sum;
  484             uint64_t *array;
  485         };
  486         struct size_lookup size_type[] = {
  487             {ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,   "sync_read_ind"},
  488             {ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO,   "sync_write_ind"},
  489             {ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO,  "async_read_ind"},
  490             {ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO,  "async_write_ind"},
  491             {ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO,    "scrub_read_ind"},
  492             {ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO,   "sync_read_agg"},
  493             {ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO,   "sync_write_agg"},
  494             {ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO,  "async_read_agg"},
  495             {ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO,  "async_write_agg"},
  496             {ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO,    "scrub_read_agg"},
  497 #ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO
  498             {ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO,    "trim_write_ind"},
  499             {ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO,    "trim_write_agg"},
  500 #endif
  501             {ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO,    "rebuild_write_ind"},
  502             {ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO,    "rebuild_write_agg"},
  503             {NULL,      NULL}
  504         };
  505 
  506         if (nvlist_lookup_nvlist(nvroot,
  507             ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
  508                 return (6);
  509         }
  510 
  511         vdev_desc = get_vdev_desc(nvroot, parent_name);
  512 
  513         for (int i = 0; size_type[i].name; i++) {
  514                 if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name,
  515                     &size_type[i].array, &c) != 0) {
  516                         fprintf(stderr, "error: can't get %s\n",
  517                             size_type[i].name);
  518                         return (3);
  519                 }
  520                 /* end count count, all of the arrays are the same size */
  521                 end = c - 1;
  522         }
  523 
  524         for (int bucket = 0; bucket <= end; bucket++) {
  525                 if (bucket < MIN_SIZE_INDEX) {
  526                         /* don't print, but collect the sum */
  527                         for (int i = 0; size_type[i].name; i++) {
  528                                 size_type[i].sum += size_type[i].array[bucket];
  529                         }
  530                         continue;
  531                 }
  532 
  533                 if (bucket < end) {
  534                         printf("%s%s,le=%llu,name=%s,%s ",
  535                             POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket,
  536                             pool_name, vdev_desc);
  537                 } else {
  538                         printf("%s%s,le=+Inf,name=%s,%s ",
  539                             POOL_IO_SIZE_MEASUREMENT, tags, pool_name,
  540                             vdev_desc);
  541                 }
  542                 for (int i = 0; size_type[i].name; i++) {
  543                         if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) {
  544                                 size_type[i].sum += size_type[i].array[bucket];
  545                         } else {
  546                                 size_type[i].sum = size_type[i].array[bucket];
  547                         }
  548                         print_kv(size_type[i].short_name, size_type[i].sum);
  549                         if (size_type[i + 1].name != NULL) {
  550                                 printf(",");
  551                         }
  552                 }
  553                 printf(" %llu\n", (u_longlong_t)timestamp);
  554         }
  555         return (0);
  556 }
  557 
  558 /*
  559  * ZIO scheduler queue stats are stored as gauges. This is unfortunate
  560  * because the values can change very rapidly and any point-in-time
  561  * value will quickly be obsoleted. It is also not easy to downsample.
  562  * Thus only the top-level queue stats might be beneficial... maybe.
  563  */
  564 static int
  565 print_queue_stats(nvlist_t *nvroot, const char *pool_name,
  566     const char *parent_name)
  567 {
  568         nvlist_t *nv_ex;
  569         uint64_t value;
  570 
  571         /* short_names are used for the field name */
  572         struct queue_lookup {
  573             const char *name;
  574             const char *short_name;
  575         };
  576         struct queue_lookup queue_type[] = {
  577             {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,     "sync_r_active"},
  578             {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,     "sync_w_active"},
  579             {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,    "async_r_active"},
  580             {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,    "async_w_active"},
  581             {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,      "async_scrub_active"},
  582             {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE,    "rebuild_active"},
  583             {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE,       "sync_r_pend"},
  584             {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE,       "sync_w_pend"},
  585             {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE,      "async_r_pend"},
  586             {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE,      "async_w_pend"},
  587             {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE,        "async_scrub_pend"},
  588             {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE,      "rebuild_pend"},
  589             {NULL,      NULL}
  590         };
  591 
  592         if (nvlist_lookup_nvlist(nvroot,
  593             ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
  594                 return (6);
  595         }
  596 
  597         printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name,
  598             get_vdev_desc(nvroot, parent_name));
  599         for (int i = 0; queue_type[i].name; i++) {
  600                 if (nvlist_lookup_uint64(nv_ex,
  601                     queue_type[i].name, &value) != 0) {
  602                         fprintf(stderr, "error: can't get %s\n",
  603                             queue_type[i].name);
  604                         return (3);
  605                 }
  606                 print_kv(queue_type[i].short_name, value);
  607                 if (queue_type[i + 1].name != NULL) {
  608                         printf(",");
  609                 }
  610         }
  611         printf(" %llu\n", (u_longlong_t)timestamp);
  612         return (0);
  613 }
  614 
  615 /*
  616  * top-level vdev stats are at the pool level
  617  */
  618 static int
  619 print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name)
  620 {
  621         nvlist_t *nv_ex;
  622         uint64_t value;
  623 
  624         /* short_names become part of the metric name */
  625         struct queue_lookup {
  626             const char *name;
  627             const char *short_name;
  628         };
  629         struct queue_lookup queue_type[] = {
  630             {ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"},
  631             {ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"},
  632             {ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"},
  633             {ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"},
  634             {ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"},
  635             {ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active_queue"},
  636             {ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"},
  637             {ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"},
  638             {ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"},
  639             {ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"},
  640             {ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"},
  641             {ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend_queue"},
  642             {NULL, NULL}
  643         };
  644 
  645         if (nvlist_lookup_nvlist(nvroot,
  646             ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {
  647                 return (6);
  648         }
  649 
  650         printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags,
  651             pool_name);
  652         for (int i = 0; queue_type[i].name; i++) {
  653                 if (nvlist_lookup_uint64(nv_ex,
  654                     queue_type[i].name, &value) != 0) {
  655                         fprintf(stderr, "error: can't get %s\n",
  656                             queue_type[i].name);
  657                         return (3);
  658                 }
  659                 if (i > 0)
  660                         printf(",");
  661                 print_kv(queue_type[i].short_name, value);
  662         }
  663 
  664         printf(" %llu\n", (u_longlong_t)timestamp);
  665         return (0);
  666 }
  667 
  668 /*
  669  * recursive stats printer
  670  */
  671 static int
  672 print_recursive_stats(stat_printer_f func, nvlist_t *nvroot,
  673     const char *pool_name, const char *parent_name, int descend)
  674 {
  675         uint_t c, children;
  676         nvlist_t **child;
  677         char vdev_name[256];
  678         int err;
  679 
  680         err = func(nvroot, pool_name, parent_name);
  681         if (err)
  682                 return (err);
  683 
  684         if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
  685             &child, &children) == 0) {
  686                 (void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name),
  687                     sizeof (vdev_name));
  688 
  689                 for (c = 0; c < children; c++) {
  690                         err = print_recursive_stats(func, child[c], pool_name,
  691                             vdev_name, descend);
  692                         if (err)
  693                                 return (err);
  694                 }
  695         }
  696         return (0);
  697 }
  698 
  699 /*
  700  * call-back to print the stats from the pool config
  701  *
  702  * Note: if the pool is broken, this can hang indefinitely and perhaps in an
  703  * unkillable state.
  704  */
  705 static int
  706 print_stats(zpool_handle_t *zhp, void *data)
  707 {
  708         uint_t c;
  709         int err;
  710         boolean_t missing;
  711         nvlist_t *config, *nvroot;
  712         vdev_stat_t *vs;
  713         struct timespec tv;
  714         char *pool_name;
  715 
  716         /* if not this pool return quickly */
  717         if (data &&
  718             strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) {
  719                 zpool_close(zhp);
  720                 return (0);
  721         }
  722 
  723         if (zpool_refresh_stats(zhp, &missing) != 0) {
  724                 zpool_close(zhp);
  725                 return (1);
  726         }
  727 
  728         config = zpool_get_config(zhp, NULL);
  729         if (clock_gettime(CLOCK_REALTIME, &tv) != 0)
  730                 timestamp = (uint64_t)time(NULL) * 1000000000;
  731         else
  732                 timestamp =
  733                     ((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec;
  734 
  735         if (nvlist_lookup_nvlist(
  736             config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) {
  737         zpool_close(zhp);
  738                 return (2);
  739         }
  740         if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
  741             (uint64_t **)&vs, &c) != 0) {
  742         zpool_close(zhp);
  743                 return (3);
  744         }
  745 
  746         pool_name = escape_string(zpool_get_name(zhp));
  747         err = print_recursive_stats(print_summary_stats, nvroot,
  748             pool_name, NULL, 1);
  749         /* if any of these return an error, skip the rest */
  750         if (err == 0)
  751         err = print_top_level_vdev_stats(nvroot, pool_name);
  752 
  753         if (no_histograms == 0) {
  754         if (err == 0)
  755                 err = print_recursive_stats(print_vdev_latency_stats, nvroot,
  756                     pool_name, NULL, 1);
  757         if (err == 0)
  758                 err = print_recursive_stats(print_vdev_size_stats, nvroot,
  759                     pool_name, NULL, 1);
  760         if (err == 0)
  761                 err = print_recursive_stats(print_queue_stats, nvroot,
  762                     pool_name, NULL, 0);
  763         }
  764         if (err == 0)
  765                 err = print_scan_status(nvroot, pool_name);
  766 
  767         free(pool_name);
  768         zpool_close(zhp);
  769         return (err);
  770 }
  771 
  772 static void
  773 usage(char *name)
  774 {
  775         fprintf(stderr, "usage: %s [--execd][--no-histograms]"
  776             "[--sum-histogram-buckets] [--signed-int] [poolname]\n", name);
  777         exit(EXIT_FAILURE);
  778 }
  779 
  780 int
  781 main(int argc, char *argv[])
  782 {
  783         int opt;
  784         int ret = 8;
  785         char *line = NULL, *ttags = NULL;
  786         size_t len, tagslen = 0;
  787         struct option long_options[] = {
  788             {"execd", no_argument, NULL, 'e'},
  789             {"help", no_argument, NULL, 'h'},
  790             {"no-histograms", no_argument, NULL, 'n'},
  791             {"signed-int", no_argument, NULL, 'i'},
  792             {"sum-histogram-buckets", no_argument, NULL, 's'},
  793             {"tags", required_argument, NULL, 't'},
  794             {0, 0, 0, 0}
  795         };
  796         while ((opt = getopt_long(
  797             argc, argv, "ehinst:", long_options, NULL)) != -1) {
  798                 switch (opt) {
  799                 case 'e':
  800                         execd_mode = 1;
  801                         break;
  802                 case 'i':
  803                         metric_data_type = 'i';
  804                         metric_value_mask = INT64_MAX;
  805                         break;
  806                 case 'n':
  807                         no_histograms = 1;
  808                         break;
  809                 case 's':
  810                         sum_histogram_buckets = 1;
  811                         break;
  812                 case 't':
  813                         free(ttags);
  814                         tagslen = strlen(optarg) + 2;
  815                         ttags = calloc(1, tagslen);
  816                         if (ttags == NULL) {
  817                                 fprintf(stderr,
  818                                     "error: cannot allocate memory "
  819                                     "for tags\n");
  820                                 exit(1);
  821                         }
  822                         (void) snprintf(ttags, tagslen, ",%s", optarg);
  823                         tags = ttags;
  824                         break;
  825                 default:
  826                         usage(argv[0]);
  827                 }
  828         }
  829 
  830         libzfs_handle_t *g_zfs;
  831         if ((g_zfs = libzfs_init()) == NULL) {
  832                 fprintf(stderr,
  833                     "error: cannot initialize libzfs. "
  834                     "Is the zfs module loaded or zrepl running?\n");
  835                 exit(EXIT_FAILURE);
  836         }
  837         if (execd_mode == 0) {
  838                 ret = zpool_iter(g_zfs, print_stats, argv[optind]);
  839                 return (ret);
  840         }
  841         while (getline(&line, &len, stdin) != -1) {
  842                 ret = zpool_iter(g_zfs, print_stats, argv[optind]);
  843                 fflush(stdout);
  844         }
  845         return (ret);
  846 }

Cache object: fecce02dded36b671b84773bcb259b1c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.