The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_stats.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2014-2018 Netflix, Inc.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  *
   26  * $FreeBSD$
   27  */
   28 
   29 /*
   30  * Author: Lawrence Stewart <lstewart@netflix.com>
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __FBSDID("$FreeBSD$");
   35 
   36 #include <sys/param.h>
   37 #include <sys/arb.h>
   38 #include <sys/ctype.h>
   39 #include <sys/errno.h>
   40 #include <sys/hash.h>
   41 #include <sys/limits.h>
   42 #include <sys/malloc.h>
   43 #include <sys/qmath.h>
   44 #include <sys/sbuf.h>
   45 #if defined(DIAGNOSTIC)
   46 #include <sys/tree.h>
   47 #endif
   48 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
   49 #include <sys/stddef.h>
   50 #include <sys/stdint.h>
   51 #include <sys/time.h>
   52 
   53 #ifdef _KERNEL
   54 #include <sys/kernel.h>
   55 #include <sys/lock.h>
   56 #include <sys/rwlock.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/systm.h>
   59 #else /* ! _KERNEL */
   60 #include <pthread.h>
   61 #include <stdbool.h>
   62 #include <stdio.h>
   63 #include <stdlib.h>
   64 #include <string.h>
   65 #endif /* _KERNEL */
   66 
   67 struct voistatdata_voistate {
   68         /* Previous VOI value for diff calculation. */
   69         struct voistatdata_numeric prev;
   70 };
   71 
   72 #define VS_VSDVALID     0x0001  /* Stat's voistatdata updated at least once. */
   73 struct voistat {
   74         int8_t          stype;          /* Type of stat e.g. VS_STYPE_SUM. */
   75         enum vsd_dtype  dtype : 8;      /* Data type of this stat's data. */
   76         uint16_t        data_off;       /* Blob offset for this stat's data. */
   77         uint16_t        dsz;            /* Size of stat's data. */
   78 #define VS_EBITS 8
   79         uint16_t        errs : VS_EBITS;/* Non-wrapping error count. */
   80         uint16_t        flags : 16 - VS_EBITS;
   81 };
   82 /* The voistat error count is capped to avoid wrapping. */
   83 #define VS_INCERRS(vs) do {                                             \
   84         if ((vs)->errs < (1U << VS_EBITS) - 1)                          \
   85                 (vs)->errs++;                                           \
   86 } while (0)
   87 
   88 /*
   89  * Ideas for flags:
   90  *   - Global or entity specific (global would imply use of counter(9)?)
   91  *   - Whether to reset stats on read or not
   92  *   - Signal an overflow?
   93  *   - Compressed voistat array
   94  */
   95 #define VOI_REQSTATE    0x0001  /* VOI requires VS_STYPE_VOISTATE. */
   96 struct voi {
   97         int16_t         id;             /* VOI id. */
   98         enum vsd_dtype  dtype : 8;      /* Data type of the VOI itself. */
   99         int8_t          voistatmaxid;   /* Largest allocated voistat index. */
  100         uint16_t        stats_off;      /* Blob offset for this VOIs stats. */
  101         uint16_t        flags;
  102 };
  103 
  104 /*
  105  * Memory for the entire blob is allocated as a slab and then offsets are
  106  * maintained to carve up the slab into sections holding different data types.
  107  *
  108  * Ideas for flags:
  109  * - Compressed voi array (trade off memory usage vs search time)
  110  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
  111  */
  112 struct statsblobv1 {
  113         uint8_t         abi;
  114         uint8_t         endian;
  115         uint16_t        flags;
  116         uint16_t        maxsz;
  117         uint16_t        cursz;
  118         /* Fields from here down are opaque to consumers. */
  119         uint32_t        tplhash;        /* Base template hash ID. */
  120         uint16_t        stats_off;      /* voistat array blob offset. */
  121         uint16_t        statsdata_off;  /* voistatdata array blob offset. */
  122         sbintime_t      created;        /* Blob creation time. */
  123         sbintime_t      lastrst;        /* Time of last reset. */
  124         struct voi      vois[];         /* Array indexed by [voi_id]. */
  125 } __aligned(sizeof(void *));
  126 _Static_assert(offsetof(struct statsblobv1, cursz) +
  127     SIZEOF_MEMBER(struct statsblobv1, cursz) ==
  128     offsetof(struct statsblob, opaque),
  129     "statsblobv1 ABI mismatch");
  130 
  131 struct statsblobv1_tpl {
  132         struct metablob         *mb;
  133         struct statsblobv1      *sb;
  134 };
  135 
  136 /* Context passed to iterator callbacks. */
  137 struct sb_iter_ctx {
  138         void            *usrctx;        /* Caller supplied context. */
  139         uint32_t        flags;          /* Flags for current iteration. */
  140         int16_t         vslot;          /* struct voi slot index. */
  141         int8_t          vsslot;         /* struct voistat slot index. */
  142 };
  143 
  144 struct sb_tostrcb_ctx {
  145         struct sbuf             *buf;
  146         struct statsblob_tpl    *tpl;
  147         enum sb_str_fmt fmt;
  148         uint32_t                flags;
  149 };
  150 
  151 struct sb_visitcb_ctx {
  152         stats_blob_visitcb_t    cb;
  153         void                    *usrctx;
  154 };
  155 
  156 /* Stats blob iterator callback. */
  157 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
  158     struct voistat *vs, struct sb_iter_ctx *ctx);
  159 
  160 #ifdef _KERNEL
  161 static struct rwlock tpllistlock;
  162 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
  163 #define TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
  164 #define TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
  165 #define TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
  166 #define TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
  167 #define TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
  168 #define TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
  169 #define TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
  170 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
  171 #define stats_free(ptr) free((ptr), M_STATS)
  172 #else /* ! _KERNEL */
  173 static void stats_constructor(void);
  174 static void stats_destructor(void);
  175 static pthread_rwlock_t tpllistlock;
  176 #define TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
  177 #define TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
  178 #define TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
  179 #define TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
  180 #define TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
  181 #define TPL_LIST_LOCK_ASSERT() do { } while (0)
  182 #define TPL_LIST_RLOCK_ASSERT() do { } while (0)
  183 #define TPL_LIST_WLOCK_ASSERT() do { } while (0)
  184 #ifdef NDEBUG
  185 #define KASSERT(cond, msg) do {} while (0)
  186 #define stats_abort() do {} while (0)
  187 #else /* ! NDEBUG */
  188 #define KASSERT(cond, msg) do { \
  189         if (!(cond)) { \
  190                 panic msg; \
  191         } \
  192 } while (0)
  193 #define stats_abort() abort()
  194 #endif /* NDEBUG */
  195 #define stats_free(ptr) free(ptr)
  196 #define panic(fmt, ...) do { \
  197         fprintf(stderr, (fmt), ##__VA_ARGS__); \
  198         stats_abort(); \
  199 } while (0)
  200 #endif /* _KERNEL */
  201 
  202 #define SB_V1_MAXSZ 65535
  203 
  204 /* Obtain a blob offset pointer. */
  205 #define BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
  206 
  207 /*
  208  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
  209  * power of 2 size, we can shift instead of divide. The shift amount must be
  210  * updated if sizeof(struct voi) ever changes, which the assert should catch.
  211  */
  212 #define NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
  213     sizeof(struct statsblobv1)) >> 3))
  214 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
  215 
  216 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
  217 const char *vs_stype2name[VS_NUM_STYPES] = {
  218         [VS_STYPE_VOISTATE] = "VOISTATE",
  219         [VS_STYPE_SUM] = "SUM",
  220         [VS_STYPE_MAX] = "MAX",
  221         [VS_STYPE_MIN] = "MIN",
  222         [VS_STYPE_HIST] = "HIST",
  223         [VS_STYPE_TDGST] = "TDGST",
  224 };
  225 
  226 const char *vs_stype2desc[VS_NUM_STYPES] = {
  227         [VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
  228         [VS_STYPE_SUM] = "Simple arithmetic accumulator",
  229         [VS_STYPE_MAX] = "Maximum observed VOI value",
  230         [VS_STYPE_MIN] = "Minimum observed VOI value",
  231         [VS_STYPE_HIST] = "Histogram of observed VOI values",
  232         [VS_STYPE_TDGST] = "t-digest of observed VOI values",
  233 };
  234 
  235 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
  236         [VSD_DTYPE_VOISTATE] = "VOISTATE",
  237         [VSD_DTYPE_INT_S32] = "INT_S32",
  238         [VSD_DTYPE_INT_U32] = "INT_U32",
  239         [VSD_DTYPE_INT_S64] = "INT_S64",
  240         [VSD_DTYPE_INT_U64] = "INT_U64",
  241         [VSD_DTYPE_INT_SLONG] = "INT_SLONG",
  242         [VSD_DTYPE_INT_ULONG] = "INT_ULONG",
  243         [VSD_DTYPE_Q_S32] = "Q_S32",
  244         [VSD_DTYPE_Q_U32] = "Q_U32",
  245         [VSD_DTYPE_Q_S64] = "Q_S64",
  246         [VSD_DTYPE_Q_U64] = "Q_U64",
  247         [VSD_DTYPE_CRHIST32] = "CRHIST32",
  248         [VSD_DTYPE_DRHIST32] = "DRHIST32",
  249         [VSD_DTYPE_DVHIST32] = "DVHIST32",
  250         [VSD_DTYPE_CRHIST64] = "CRHIST64",
  251         [VSD_DTYPE_DRHIST64] = "DRHIST64",
  252         [VSD_DTYPE_DVHIST64] = "DVHIST64",
  253         [VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
  254         [VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
  255 };
  256 
  257 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
  258         [VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
  259         [VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
  260         [VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
  261         [VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
  262         [VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
  263         [VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
  264         [VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
  265         [VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
  266         [VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
  267         [VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
  268         [VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
  269         [VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
  270         [VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
  271         [VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
  272         [VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
  273         [VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
  274         [VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
  275         [VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
  276         [VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
  277 };
  278 
  279 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
  280         [VSD_DTYPE_VOISTATE] = true,
  281         [VSD_DTYPE_INT_S32] = false,
  282         [VSD_DTYPE_INT_U32] = false,
  283         [VSD_DTYPE_INT_S64] = false,
  284         [VSD_DTYPE_INT_U64] = false,
  285         [VSD_DTYPE_INT_SLONG] = false,
  286         [VSD_DTYPE_INT_ULONG] = false,
  287         [VSD_DTYPE_Q_S32] = false,
  288         [VSD_DTYPE_Q_U32] = false,
  289         [VSD_DTYPE_Q_S64] = false,
  290         [VSD_DTYPE_Q_U64] = false,
  291         [VSD_DTYPE_CRHIST32] = true,
  292         [VSD_DTYPE_DRHIST32] = true,
  293         [VSD_DTYPE_DVHIST32] = true,
  294         [VSD_DTYPE_CRHIST64] = true,
  295         [VSD_DTYPE_DRHIST64] = true,
  296         [VSD_DTYPE_DVHIST64] = true,
  297         [VSD_DTYPE_TDGSTCLUST32] = true,
  298         [VSD_DTYPE_TDGSTCLUST64] = true,
  299 };
  300 
  301 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
  302         [LIM_MIN] = {
  303                 [VSD_DTYPE_VOISTATE] = {0},
  304                 [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
  305                 [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
  306                 [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
  307                 [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
  308                 [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
  309                 [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
  310                 [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
  311                 [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
  312                 [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
  313                 [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
  314         },
  315         [LIM_MAX] = {
  316                 [VSD_DTYPE_VOISTATE] = {0},
  317                 [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
  318                 [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
  319                 [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
  320                 [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
  321                 [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
  322                 [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
  323                 [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
  324                 [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
  325                 [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
  326                 [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
  327         }
  328 };
  329 
  330 /* tpllistlock protects tpllist and ntpl */
  331 static uint32_t ntpl;
  332 static struct statsblob_tpl **tpllist;
  333 
  334 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
  335     int flags);
  336 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
  337 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
  338     uint32_t flags);
  339 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
  340     int newvoistatbytes, int newvoistatdatabytes);
  341 static void stats_v1_blob_iter(struct statsblobv1 *sb,
  342     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
  343 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
  344     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
  345 
  346 static inline int
  347 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
  348 {
  349 
  350         KASSERT(Q_PRECEQ(c1->mu, c2->mu),
  351             ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
  352             Q_RELPREC(c1->mu, c2->mu)));
  353 
  354        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
  355 }
  356 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
  357 
  358 static inline int
  359 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
  360 {
  361 
  362         KASSERT(Q_PRECEQ(c1->mu, c2->mu),
  363             ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
  364             Q_RELPREC(c1->mu, c2->mu)));
  365 
  366        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
  367 }
  368 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
  369 
  370 #ifdef DIAGNOSTIC
  371 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
  372 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
  373 #endif
  374 
  375 static inline sbintime_t
  376 stats_sbinuptime(void)
  377 {
  378         sbintime_t sbt;
  379 #ifdef _KERNEL
  380 
  381         sbt = sbinuptime();
  382 #else /* ! _KERNEL */
  383         struct timespec tp;
  384 
  385         clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
  386         sbt = tstosbt(tp);
  387 #endif /* _KERNEL */
  388 
  389         return (sbt);
  390 }
  391 
  392 static inline void *
  393 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
  394 {
  395 
  396 #ifdef _KERNEL
  397         /* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
  398         if (!(flags & (M_WAITOK | M_NOWAIT)))
  399                 flags |= M_NOWAIT;
  400         ptr = realloc(ptr, newsz, M_STATS, flags);
  401 #else /* ! _KERNEL */
  402         ptr = realloc(ptr, newsz);
  403         if ((flags & M_ZERO) && ptr != NULL) {
  404                 if (oldsz == 0)
  405                         memset(ptr, '\0', newsz);
  406                 else if (newsz > oldsz)
  407                         memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
  408         }
  409 #endif /* _KERNEL */
  410 
  411         return (ptr);
  412 }
  413 
  414 static inline char *
  415 stats_strdup(const char *s,
  416 #ifdef _KERNEL
  417     int flags)
  418 {
  419         char *copy;
  420         size_t len;
  421 
  422         if (!(flags & (M_WAITOK | M_NOWAIT)))
  423                 flags |= M_NOWAIT;
  424 
  425         len = strlen(s) + 1;
  426         if ((copy = malloc(len, M_STATS, flags)) != NULL)
  427                 bcopy(s, copy, len);
  428 
  429         return (copy);
  430 #else
  431     int flags __unused)
  432 {
  433         return (strdup(s));
  434 #endif
  435 }
  436 
  437 static inline void
  438 stats_tpl_update_hash(struct statsblob_tpl *tpl)
  439 {
  440 
  441         TPL_LIST_WLOCK_ASSERT();
  442         tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
  443         for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
  444                 if (tpl->mb->voi_meta[voi_id].name != NULL)
  445                         tpl->mb->tplhash = hash32_str(
  446                             tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
  447         }
  448         tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
  449             tpl->mb->tplhash);
  450 }
  451 
  452 static inline uint64_t
  453 stats_pow_u64(uint64_t base, uint64_t exp)
  454 {
  455         uint64_t result = 1;
  456 
  457         while (exp) {
  458                 if (exp & 1)
  459                         result *= base;
  460                 exp >>= 1;
  461                 base *= base;
  462         }
  463 
  464         return (result);
  465 }
  466 
  467 static inline int
  468 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
  469     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
  470 {
  471         uint64_t step = 0;
  472         int error = 0;
  473 
  474         switch (info->scheme) {
  475         case BKT_LIN:
  476                 step = info->lin.stepinc;
  477                 break;
  478         case BKT_EXP:
  479                 step = stats_pow_u64(info->exp.stepbase,
  480                     info->exp.stepexp + curbkt);
  481                 break;
  482         case BKT_LINEXP:
  483                 {
  484                 uint64_t curstepexp = 1;
  485 
  486                 switch (info->voi_dtype) {
  487                 case VSD_DTYPE_INT_S32:
  488                         while ((int32_t)stats_pow_u64(info->linexp.stepbase,
  489                             curstepexp) <= bkt_lb->int32.s32)
  490                                 curstepexp++;
  491                         break;
  492                 case VSD_DTYPE_INT_U32:
  493                         while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
  494                             curstepexp) <= bkt_lb->int32.u32)
  495                                 curstepexp++;
  496                         break;
  497                 case VSD_DTYPE_INT_S64:
  498                         while ((int64_t)stats_pow_u64(info->linexp.stepbase,
  499                             curstepexp) <= bkt_lb->int64.s64)
  500                                 curstepexp++;
  501                         break;
  502                 case VSD_DTYPE_INT_U64:
  503                         while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
  504                             curstepexp) <= bkt_lb->int64.u64)
  505                                 curstepexp++;
  506                         break;
  507                 case VSD_DTYPE_INT_SLONG:
  508                         while ((long)stats_pow_u64(info->linexp.stepbase,
  509                             curstepexp) <= bkt_lb->intlong.slong)
  510                                 curstepexp++;
  511                         break;
  512                 case VSD_DTYPE_INT_ULONG:
  513                         while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
  514                             curstepexp) <= bkt_lb->intlong.ulong)
  515                                 curstepexp++;
  516                         break;
  517                 case VSD_DTYPE_Q_S32:
  518                         while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
  519                             curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
  520                         break;
  521                 case VSD_DTYPE_Q_U32:
  522                         while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
  523                             curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
  524                         break;
  525                 case VSD_DTYPE_Q_S64:
  526                         while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
  527                             curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
  528                                 curstepexp++;
  529                         break;
  530                 case VSD_DTYPE_Q_U64:
  531                         while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
  532                             curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
  533                                 curstepexp++;
  534                         break;
  535                 default:
  536                         break;
  537                 }
  538 
  539                 step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
  540                     info->linexp.linstepdiv;
  541                 if (step == 0)
  542                         step = 1;
  543                 break;
  544                 }
  545         default:
  546                 break;
  547         }
  548 
  549         if (info->scheme == BKT_USR) {
  550                 *bkt_lb = info->usr.bkts[curbkt].lb;
  551                 *bkt_ub = info->usr.bkts[curbkt].ub;
  552         } else if (step != 0) {
  553                 switch (info->voi_dtype) {
  554                 case VSD_DTYPE_INT_S32:
  555                         bkt_ub->int32.s32 += (int32_t)step;
  556                         break;
  557                 case VSD_DTYPE_INT_U32:
  558                         bkt_ub->int32.u32 += (uint32_t)step;
  559                         break;
  560                 case VSD_DTYPE_INT_S64:
  561                         bkt_ub->int64.s64 += (int64_t)step;
  562                         break;
  563                 case VSD_DTYPE_INT_U64:
  564                         bkt_ub->int64.u64 += (uint64_t)step;
  565                         break;
  566                 case VSD_DTYPE_INT_SLONG:
  567                         bkt_ub->intlong.slong += (long)step;
  568                         break;
  569                 case VSD_DTYPE_INT_ULONG:
  570                         bkt_ub->intlong.ulong += (unsigned long)step;
  571                         break;
  572                 case VSD_DTYPE_Q_S32:
  573                         error = Q_QADDI(&bkt_ub->q32.sq32, step);
  574                         break;
  575                 case VSD_DTYPE_Q_U32:
  576                         error = Q_QADDI(&bkt_ub->q32.uq32, step);
  577                         break;
  578                 case VSD_DTYPE_Q_S64:
  579                         error = Q_QADDI(&bkt_ub->q64.sq64, step);
  580                         break;
  581                 case VSD_DTYPE_Q_U64:
  582                         error = Q_QADDI(&bkt_ub->q64.uq64, step);
  583                         break;
  584                 default:
  585                         break;
  586                 }
  587         } else { /* info->scheme != BKT_USR && step == 0 */
  588                 return (EINVAL);
  589         }
  590 
  591         return (error);
  592 }
  593 
  594 static uint32_t
  595 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
  596 {
  597         struct voistatdata_numeric bkt_lb, bkt_ub;
  598         uint32_t nbkts;
  599         int done;
  600 
  601         if (info->scheme == BKT_USR) {
  602                 /* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
  603                 info->lb = info->usr.bkts[0].lb;
  604                 info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
  605         }
  606 
  607         nbkts = 0;
  608         done = 0;
  609         bkt_ub = info->lb;
  610 
  611         do {
  612                 bkt_lb = bkt_ub;
  613                 if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
  614                         return (0);
  615 
  616                 if (info->scheme == BKT_USR)
  617                         done = (nbkts == info->usr.nbkts);
  618                 else {
  619                         switch (info->voi_dtype) {
  620                         case VSD_DTYPE_INT_S32:
  621                                 done = (bkt_ub.int32.s32 > info->ub.int32.s32);
  622                                 break;
  623                         case VSD_DTYPE_INT_U32:
  624                                 done = (bkt_ub.int32.u32 > info->ub.int32.u32);
  625                                 break;
  626                         case VSD_DTYPE_INT_S64:
  627                                 done = (bkt_ub.int64.s64 > info->ub.int64.s64);
  628                                 break;
  629                         case VSD_DTYPE_INT_U64:
  630                                 done = (bkt_ub.int64.u64 > info->ub.int64.u64);
  631                                 break;
  632                         case VSD_DTYPE_INT_SLONG:
  633                                 done = (bkt_ub.intlong.slong >
  634                                     info->ub.intlong.slong);
  635                                 break;
  636                         case VSD_DTYPE_INT_ULONG:
  637                                 done = (bkt_ub.intlong.ulong >
  638                                     info->ub.intlong.ulong);
  639                                 break;
  640                         case VSD_DTYPE_Q_S32:
  641                                 done = Q_QGTQ(bkt_ub.q32.sq32,
  642                                     info->ub.q32.sq32);
  643                                 break;
  644                         case VSD_DTYPE_Q_U32:
  645                                 done = Q_QGTQ(bkt_ub.q32.uq32,
  646                                     info->ub.q32.uq32);
  647                                 break;
  648                         case VSD_DTYPE_Q_S64:
  649                                 done = Q_QGTQ(bkt_ub.q64.sq64,
  650                                     info->ub.q64.sq64);
  651                                 break;
  652                         case VSD_DTYPE_Q_U64:
  653                                 done = Q_QGTQ(bkt_ub.q64.uq64,
  654                                     info->ub.q64.uq64);
  655                                 break;
  656                         default:
  657                                 return (0);
  658                         }
  659                 }
  660         } while (!done);
  661 
  662         if (info->flags & VSD_HIST_LBOUND_INF)
  663                 nbkts++;
  664         if (info->flags & VSD_HIST_UBOUND_INF)
  665                 nbkts++;
  666 
  667         return (nbkts);
  668 }
  669 
  670 int
  671 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
  672     struct vss_hist_hlpr_info *info)
  673 {
  674         struct voistatdata_hist *hist;
  675         struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
  676             *ubinfbktlb, *ubinfbktub;
  677         uint32_t bkt, nbkts, nloop;
  678 
  679         if (vss == NULL || info == NULL || (info->flags &
  680         (VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
  681         VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
  682                 return (EINVAL);
  683 
  684         info->voi_dtype = voi_dtype;
  685 
  686         if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
  687                 return (EINVAL);
  688 
  689         switch (info->hist_dtype) {
  690         case VSD_DTYPE_CRHIST32:
  691                 vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
  692                 break;
  693         case VSD_DTYPE_DRHIST32:
  694                 vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
  695                 break;
  696         case VSD_DTYPE_DVHIST32:
  697                 vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
  698                 break;
  699         case VSD_DTYPE_CRHIST64:
  700                 vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
  701                 break;
  702         case VSD_DTYPE_DRHIST64:
  703                 vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
  704                 break;
  705         case VSD_DTYPE_DVHIST64:
  706                 vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
  707                 break;
  708         default:
  709                 return (EINVAL);
  710         }
  711 
  712         vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
  713         if (vss->iv == NULL)
  714                 return (ENOMEM);
  715 
  716         hist = (struct voistatdata_hist *)vss->iv;
  717         bkt_ub = info->lb;
  718 
  719         for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
  720             bkt < nbkts;
  721             bkt++, nloop++) {
  722                 bkt_lb = bkt_ub;
  723                 if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
  724                         return (EINVAL);
  725 
  726                 switch (info->hist_dtype) {
  727                 case VSD_DTYPE_CRHIST32:
  728                         VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
  729                         break;
  730                 case VSD_DTYPE_DRHIST32:
  731                         VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
  732                         VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
  733                         break;
  734                 case VSD_DTYPE_DVHIST32:
  735                         VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
  736                         break;
  737                 case VSD_DTYPE_CRHIST64:
  738                         VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
  739                         break;
  740                 case VSD_DTYPE_DRHIST64:
  741                         VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
  742                         VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
  743                         break;
  744                 case VSD_DTYPE_DVHIST64:
  745                         VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
  746                         break;
  747                 default:
  748                         return (EINVAL);
  749                 }
  750         }
  751 
  752         lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
  753 
  754         switch (info->hist_dtype) {
  755         case VSD_DTYPE_CRHIST32:
  756                 lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
  757                 ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
  758                 break;
  759         case VSD_DTYPE_DRHIST32:
  760                 lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
  761                 lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
  762                 ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
  763                 ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
  764                 break;
  765         case VSD_DTYPE_CRHIST64:
  766                 lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
  767                 ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
  768                 break;
  769         case VSD_DTYPE_DRHIST64:
  770                 lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
  771                 lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
  772                 ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
  773                 ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
  774                 break;
  775         case VSD_DTYPE_DVHIST32:
  776         case VSD_DTYPE_DVHIST64:
  777                 break;
  778         default:
  779                 return (EINVAL);
  780         }
  781 
  782         if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
  783                 *lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
  784                 /*
  785                  * Assignment from numeric_limit array for Q types assigns max
  786                  * possible integral/fractional value for underlying data type,
  787                  * but we must set control bits for this specific histogram per
  788                  * the user's choice of fractional bits, which we extract from
  789                  * info->lb.
  790                  */
  791                 if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
  792                     info->voi_dtype == VSD_DTYPE_Q_U32) {
  793                         /* Signedness doesn't matter for setting control bits. */
  794                         Q_SCVAL(lbinfbktlb->q32.sq32,
  795                             Q_GCVAL(info->lb.q32.sq32));
  796                 } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
  797                     info->voi_dtype == VSD_DTYPE_Q_U64) {
  798                         /* Signedness doesn't matter for setting control bits. */
  799                         Q_SCVAL(lbinfbktlb->q64.sq64,
  800                             Q_GCVAL(info->lb.q64.sq64));
  801                 }
  802                 if (lbinfbktub)
  803                         *lbinfbktub = info->lb;
  804         }
  805         if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
  806                 *ubinfbktlb = bkt_lb;
  807                 if (ubinfbktub) {
  808                         *ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
  809                         if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
  810                             info->voi_dtype == VSD_DTYPE_Q_U32) {
  811                                 Q_SCVAL(ubinfbktub->q32.sq32,
  812                                     Q_GCVAL(info->lb.q32.sq32));
  813                         } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
  814                             info->voi_dtype == VSD_DTYPE_Q_U64) {
  815                                 Q_SCVAL(ubinfbktub->q64.sq64,
  816                                     Q_GCVAL(info->lb.q64.sq64));
  817                         }
  818                 }
  819         }
  820 
  821         return (0);
  822 }
  823 
  824 int
  825 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
  826     struct vss_tdgst_hlpr_info *info)
  827 {
  828         struct voistatdata_tdgst *tdgst;
  829         struct ctdth32 *ctd32tree;
  830         struct ctdth64 *ctd64tree;
  831         struct voistatdata_tdgstctd32 *ctd32;
  832         struct voistatdata_tdgstctd64 *ctd64;
  833 
  834         info->voi_dtype = voi_dtype;
  835 
  836         switch (info->tdgst_dtype) {
  837         case VSD_DTYPE_TDGSTCLUST32:
  838                 vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
  839                 break;
  840         case VSD_DTYPE_TDGSTCLUST64:
  841                 vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
  842                 break;
  843         default:
  844                 return (EINVAL);
  845         }
  846 
  847         vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
  848         if (vss->iv == NULL)
  849                 return (ENOMEM);
  850 
  851         tdgst = (struct voistatdata_tdgst *)vss->iv;
  852 
  853         switch (info->tdgst_dtype) {
  854         case VSD_DTYPE_TDGSTCLUST32:
  855                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
  856                 ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
  857                         Q_INI(&ctd32->mu, 0, 0, info->prec);
  858                 }
  859                 break;
  860         case VSD_DTYPE_TDGSTCLUST64:
  861                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
  862                 ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
  863                         Q_INI(&ctd64->mu, 0, 0, info->prec);
  864                 }
  865                 break;
  866         default:
  867                 return (EINVAL);
  868         }
  869 
  870         return (0);
  871 }
  872 
  873 int
  874 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
  875     struct vss_numeric_hlpr_info *info)
  876 {
  877         struct voistatdata_numeric iv;
  878 
  879         switch (vss->stype) {
  880         case VS_STYPE_SUM:
  881                 iv = stats_ctor_vsd_numeric(0);
  882                 break;
  883         case VS_STYPE_MIN:
  884                 iv = numeric_limits[LIM_MAX][voi_dtype];
  885                 break;
  886         case VS_STYPE_MAX:
  887                 iv = numeric_limits[LIM_MIN][voi_dtype];
  888                 break;
  889         default:
  890                 return (EINVAL);
  891         }
  892 
  893         vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
  894         if (vss->iv == NULL)
  895                 return (ENOMEM);
  896 
  897         vss->vs_dtype = voi_dtype;
  898         vss->vsdsz = vsd_dtype2size[voi_dtype];
  899         switch (voi_dtype) {
  900         case VSD_DTYPE_INT_S32:
  901                 *((int32_t *)vss->iv) = iv.int32.s32;
  902                 break;
  903         case VSD_DTYPE_INT_U32:
  904                 *((uint32_t *)vss->iv) = iv.int32.u32;
  905                 break;
  906         case VSD_DTYPE_INT_S64:
  907                 *((int64_t *)vss->iv) = iv.int64.s64;
  908                 break;
  909         case VSD_DTYPE_INT_U64:
  910                 *((uint64_t *)vss->iv) = iv.int64.u64;
  911                 break;
  912         case VSD_DTYPE_INT_SLONG:
  913                 *((long *)vss->iv) = iv.intlong.slong;
  914                 break;
  915         case VSD_DTYPE_INT_ULONG:
  916                 *((unsigned long *)vss->iv) = iv.intlong.ulong;
  917                 break;
  918         case VSD_DTYPE_Q_S32:
  919                 *((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
  920                     Q_CTRLINI(info->prec));
  921                 break;
  922         case VSD_DTYPE_Q_U32:
  923                 *((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
  924                     Q_CTRLINI(info->prec));
  925                 break;
  926         case VSD_DTYPE_Q_S64:
  927                 *((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
  928                     Q_CTRLINI(info->prec));
  929                 break;
  930         case VSD_DTYPE_Q_U64:
  931                 *((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
  932                     Q_CTRLINI(info->prec));
  933                 break;
  934         default:
  935                 break;
  936         }
  937 
  938         return (0);
  939 }
  940 
  941 int
  942 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
  943     struct voistatspec *vss)
  944 {
  945         int i, ret;
  946 
  947         for (i = nvss - 1; i >= 0; i--) {
  948                 if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
  949                     vss[i].hlprinfo)) != 0)
  950                         return (ret);
  951         }
  952 
  953         return (0);
  954 }
  955 
  956 void
  957 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
  958 {
  959         int i;
  960 
  961         for (i = nvss - 1; i >= 0; i--) {
  962                 if (vss[i].hlpr) {
  963                         stats_free((void *)vss[i].iv);
  964                         vss[i].iv = NULL;
  965                 }
  966         }
  967 }
  968 
  969 int
  970 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
  971 {
  972         int error;
  973 
  974         error = 0;
  975 
  976         TPL_LIST_WLOCK();
  977         if (tpl_id < 0 || tpl_id >= (int)ntpl) {
  978                 error = ENOENT;
  979         } else {
  980                 *tpl = tpllist[tpl_id];
  981                 /* XXXLAS: Acquire refcount on tpl. */
  982         }
  983         TPL_LIST_WUNLOCK();
  984 
  985         return (error);
  986 }
  987 
  988 int
  989 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
  990 {
  991         int i, tpl_id;
  992 
  993         tpl_id = -ESRCH;
  994 
  995         TPL_LIST_RLOCK();
  996         for (i = ntpl - 1; i >= 0; i--) {
  997                 if (name != NULL) {
  998                         if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
  999                             strncmp(name, tpllist[i]->mb->tplname,
 1000                             TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
 1001                             tpllist[i]->mb->tplhash)) {
 1002                                 tpl_id = i;
 1003                                 break;
 1004                         }
 1005                 } else if (hash == tpllist[i]->mb->tplhash) {
 1006                         tpl_id = i;
 1007                         break;
 1008                 }
 1009         }
 1010         TPL_LIST_RUNLOCK();
 1011 
 1012         return (tpl_id);
 1013 }
 1014 
 1015 int
 1016 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
 1017 {
 1018         int error;
 1019 
 1020         error = 0;
 1021 
 1022         TPL_LIST_RLOCK();
 1023         if (tpl_id < ntpl) {
 1024                 if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
 1025                         strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
 1026                 else
 1027                         error = EOVERFLOW;
 1028         } else
 1029                 error = ENOENT;
 1030         TPL_LIST_RUNLOCK();
 1031 
 1032         return (error);
 1033 }
 1034 
 1035 int
 1036 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
 1037     void *seed_bytes, size_t seed_len)
 1038 {
 1039         uint32_t cum_pct, rnd_pct;
 1040         int i;
 1041 
 1042         cum_pct = 0;
 1043 
 1044         /*
 1045          * Choose a pseudorandom or seeded number in range [0,100] and use
 1046          * it to make a sampling decision and template selection where required.
 1047          * If no seed is supplied, a PRNG is used to generate a pseudorandom
 1048          * number so that every selection is independent. If a seed is supplied,
 1049          * the caller desires random selection across different seeds, but
 1050          * deterministic selection given the same seed. This is achieved by
 1051          * hashing the seed and using the hash as the random number source.
 1052          *
 1053          * XXXLAS: Characterise hash function output distribution.
 1054          */
 1055         if (seed_bytes == NULL)
 1056                 rnd_pct = random() / (INT32_MAX / 100);
 1057         else
 1058                 rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
 1059                     (UINT32_MAX / 100U);
 1060 
 1061         /*
 1062          * We map the randomly selected percentage on to the interval [0,100]
 1063          * consisting of the cumulatively summed template sampling percentages.
 1064          * The difference between the cumulative sum of all template sampling
 1065          * percentages and 100 is treated as a NULL assignment i.e. no stats
 1066          * template will be assigned, and -1 returned instead.
 1067          */
 1068         for (i = 0; i < nrates; i++) {
 1069                 cum_pct += rates[i].tpl_sample_pct;
 1070 
 1071                 KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
 1072                     cum_pct));
 1073                 if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
 1074                         continue;
 1075 
 1076                 return (rates[i].tpl_slot_id);
 1077         }
 1078 
 1079         return (-1);
 1080 }
 1081 
 1082 int
 1083 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
 1084     struct statsblobv1 *src, uint32_t flags)
 1085 {
 1086         int error;
 1087 
 1088         error = 0;
 1089 
 1090         if (src == NULL || dst == NULL ||
 1091             src->cursz < sizeof(struct statsblob) ||
 1092             ((flags & SB_CLONE_ALLOCDST) &&
 1093             (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
 1094                 error = EINVAL;
 1095         } else if (flags & SB_CLONE_ALLOCDST) {
 1096                 *dst = stats_realloc(NULL, 0, src->cursz, 0);
 1097                 if (*dst)
 1098                         (*dst)->maxsz = dstmaxsz = src->cursz;
 1099                 else
 1100                         error = ENOMEM;
 1101         } else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
 1102                 error = EINVAL;
 1103         }
 1104 
 1105         if (!error) {
 1106                 size_t postcurszlen;
 1107 
 1108                 /*
 1109                  * Clone src into dst except for the maxsz field. If dst is too
 1110                  * small to hold all of src, only copy src's header and return
 1111                  * EOVERFLOW.
 1112                  */
 1113 #ifdef _KERNEL
 1114                 if (flags & SB_CLONE_USRDSTNOFAULT)
 1115                         copyout_nofault(src, *dst,
 1116                             offsetof(struct statsblob, maxsz));
 1117                 else if (flags & SB_CLONE_USRDST)
 1118                         copyout(src, *dst, offsetof(struct statsblob, maxsz));
 1119                 else
 1120 #endif
 1121                         memcpy(*dst, src, offsetof(struct statsblob, maxsz));
 1122 
 1123                 if (dstmaxsz >= src->cursz) {
 1124                         postcurszlen = src->cursz -
 1125                             offsetof(struct statsblob, cursz);
 1126                 } else {
 1127                         error = EOVERFLOW;
 1128                         postcurszlen = sizeof(struct statsblob) -
 1129                             offsetof(struct statsblob, cursz);
 1130                 }
 1131 #ifdef _KERNEL
 1132                 if (flags & SB_CLONE_USRDSTNOFAULT)
 1133                         copyout_nofault(&(src->cursz), &((*dst)->cursz),
 1134                             postcurszlen);
 1135                 else if (flags & SB_CLONE_USRDST)
 1136                         copyout(&(src->cursz), &((*dst)->cursz), postcurszlen);
 1137                 else
 1138 #endif
 1139                         memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
 1140         }
 1141 
 1142         return (error);
 1143 }
 1144 
 1145 int
 1146 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
 1147 {
 1148         struct statsblobv1_tpl *tpl, **newtpllist;
 1149         struct statsblobv1 *tpl_sb;
 1150         struct metablob *tpl_mb;
 1151         int tpl_id;
 1152 
 1153         if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
 1154                 return (-EINVAL);
 1155 
 1156         if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
 1157                 return (-EEXIST);
 1158 
 1159         tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
 1160         tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
 1161         tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
 1162 
 1163         if (tpl_mb != NULL && name != NULL)
 1164                 tpl_mb->tplname = stats_strdup(name, 0);
 1165 
 1166         if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
 1167             tpl_mb->tplname == NULL) {
 1168                 stats_free(tpl);
 1169                 stats_free(tpl_sb);
 1170                 if (tpl_mb != NULL) {
 1171                         stats_free(tpl_mb->tplname);
 1172                         stats_free(tpl_mb);
 1173                 }
 1174                 return (-ENOMEM);
 1175         }
 1176 
 1177         tpl->mb = tpl_mb;
 1178         tpl->sb = tpl_sb;
 1179 
 1180         tpl_sb->abi = STATS_ABI_V1;
 1181         tpl_sb->endian =
 1182 #if BYTE_ORDER == LITTLE_ENDIAN
 1183             SB_LE;
 1184 #elif BYTE_ORDER == BIG_ENDIAN
 1185             SB_BE;
 1186 #else
 1187             SB_UE;
 1188 #endif
 1189         tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
 1190         tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
 1191 
 1192         TPL_LIST_WLOCK();
 1193         newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
 1194             (ntpl + 1) * sizeof(void *), 0);
 1195         if (newtpllist != NULL) {
 1196                 tpl_id = ntpl++;
 1197                 tpllist = (struct statsblob_tpl **)newtpllist;
 1198                 tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
 1199                 stats_tpl_update_hash(tpllist[tpl_id]);
 1200         } else {
 1201                 stats_free(tpl);
 1202                 stats_free(tpl_sb);
 1203                 if (tpl_mb != NULL) {
 1204                         stats_free(tpl_mb->tplname);
 1205                         stats_free(tpl_mb);
 1206                 }
 1207                 tpl_id = -ENOMEM;
 1208         }
 1209         TPL_LIST_WUNLOCK();
 1210 
 1211         return (tpl_id);
 1212 }
 1213 
 1214 int
 1215 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
 1216     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
 1217     uint32_t flags)
 1218 {
 1219         struct voi *voi;
 1220         struct voistat *tmpstat;
 1221         struct statsblobv1 *tpl_sb;
 1222         struct metablob *tpl_mb;
 1223         int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
 1224             newvoistatdatabytes, newvoistatmaxid;
 1225         uint32_t nbytes;
 1226 
 1227         if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
 1228             nvss == 0 || vss == NULL)
 1229                 return (EINVAL);
 1230 
 1231         error = nbytes = newvoibytes = newvoistatbytes =
 1232             newvoistatdatabytes = 0;
 1233         newvoistatmaxid = -1;
 1234 
 1235         /* Calculate the number of bytes required for the new voistats. */
 1236         for (i = nvss - 1; i >= 0; i--) {
 1237                 if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
 1238                     vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
 1239                     vss[i].iv == NULL || vss[i].vsdsz == 0)
 1240                         return (EINVAL);
 1241                 if ((int)vss[i].stype > newvoistatmaxid)
 1242                         newvoistatmaxid = vss[i].stype;
 1243                 newvoistatdatabytes += vss[i].vsdsz;
 1244         }
 1245 
 1246         if (flags & SB_VOI_RELUPDATE) {
 1247                 /* XXXLAS: VOI state bytes may need to vary based on stat types. */
 1248                 newvoistatdatabytes += sizeof(struct voistatdata_voistate);
 1249         }
 1250         nbytes += newvoistatdatabytes;
 1251 
 1252         TPL_LIST_WLOCK();
 1253         if (tpl_id < ntpl) {
 1254                 tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
 1255                 tpl_mb = tpllist[tpl_id]->mb;
 1256 
 1257                 if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
 1258                         /* Adding a new VOI and associated stats. */
 1259                         if (voi_id >= NVOIS(tpl_sb)) {
 1260                                 /* We need to grow the tpl_sb->vois array. */
 1261                                 newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
 1262                                     sizeof(struct voi);
 1263                                 nbytes += newvoibytes;
 1264                         }
 1265                         newvoistatbytes =
 1266                             (newvoistatmaxid + 1) * sizeof(struct voistat);
 1267                 } else {
 1268                         /* Adding stats to an existing VOI. */
 1269                         if (newvoistatmaxid >
 1270                             tpl_sb->vois[voi_id].voistatmaxid) {
 1271                                 newvoistatbytes = (newvoistatmaxid -
 1272                                     tpl_sb->vois[voi_id].voistatmaxid) *
 1273                                     sizeof(struct voistat);
 1274                         }
 1275                         /* XXXLAS: KPI does not yet support expanding VOIs. */
 1276                         error = EOPNOTSUPP;
 1277                 }
 1278                 nbytes += newvoistatbytes;
 1279 
 1280                 if (!error && newvoibytes > 0) {
 1281                         struct voi_meta *voi_meta = tpl_mb->voi_meta;
 1282 
 1283                         voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
 1284                             0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
 1285                             (1 + voi_id) * sizeof(struct voi_meta),
 1286                             M_ZERO);
 1287 
 1288                         if (voi_meta == NULL)
 1289                                 error = ENOMEM;
 1290                         else
 1291                                 tpl_mb->voi_meta = voi_meta;
 1292                 }
 1293 
 1294                 if (!error) {
 1295                         /* NB: Resizing can change where tpl_sb points. */
 1296                         error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
 1297                             newvoistatbytes, newvoistatdatabytes);
 1298                 }
 1299 
 1300                 if (!error) {
 1301                         tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
 1302                             0);
 1303                         if (tpl_mb->voi_meta[voi_id].name == NULL)
 1304                                 error = ENOMEM;
 1305                 }
 1306 
 1307                 if (!error) {
 1308                         /* Update the template list with the resized pointer. */
 1309                         tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
 1310 
 1311                         /* Update the template. */
 1312                         voi = &tpl_sb->vois[voi_id];
 1313 
 1314                         if (voi->id < 0) {
 1315                                 /* VOI is new and needs to be initialised. */
 1316                                 voi->id = voi_id;
 1317                                 voi->dtype = voi_dtype;
 1318                                 voi->stats_off = tpl_sb->stats_off;
 1319                                 if (flags & SB_VOI_RELUPDATE)
 1320                                         voi->flags |= VOI_REQSTATE;
 1321                         } else {
 1322                                 /*
 1323                                  * XXXLAS: When this else block is written, the
 1324                                  * "KPI does not yet support expanding VOIs"
 1325                                  * error earlier in this function can be
 1326                                  * removed. What is required here is to shuffle
 1327                                  * the voistat array such that the new stats for
 1328                                  * the voi are contiguous, which will displace
 1329                                  * stats for other vois that reside after the
 1330                                  * voi being updated. The other vois then need
 1331                                  * to have their stats_off adjusted post
 1332                                  * shuffle.
 1333                                  */
 1334                         }
 1335 
 1336                         voi->voistatmaxid = newvoistatmaxid;
 1337                         newstatdataidx = 0;
 1338 
 1339                         if (voi->flags & VOI_REQSTATE) {
 1340                                 /* Initialise the voistate stat in slot 0. */
 1341                                 tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
 1342                                 tmpstat->stype = VS_STYPE_VOISTATE;
 1343                                 tmpstat->flags = 0;
 1344                                 tmpstat->dtype = VSD_DTYPE_VOISTATE;
 1345                                 newstatdataidx = tmpstat->dsz =
 1346                                     sizeof(struct voistatdata_numeric);
 1347                                 tmpstat->data_off = tpl_sb->statsdata_off;
 1348                         }
 1349 
 1350                         for (i = 0; (uint32_t)i < nvss; i++) {
 1351                                 tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
 1352                                     (vss[i].stype * sizeof(struct voistat)));
 1353                                 KASSERT(tmpstat->stype < 0, ("voistat %p "
 1354                                     "already initialised", tmpstat));
 1355                                 tmpstat->stype = vss[i].stype;
 1356                                 tmpstat->flags = vss[i].flags;
 1357                                 tmpstat->dtype = vss[i].vs_dtype;
 1358                                 tmpstat->dsz = vss[i].vsdsz;
 1359                                 tmpstat->data_off = tpl_sb->statsdata_off +
 1360                                     newstatdataidx;
 1361                                 memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
 1362                                     vss[i].iv, vss[i].vsdsz);
 1363                                 newstatdataidx += vss[i].vsdsz;
 1364                         }
 1365 
 1366                         /* Update the template version hash. */
 1367                         stats_tpl_update_hash(tpllist[tpl_id]);
 1368                         /* XXXLAS: Confirm tpl name/hash pair remains unique. */
 1369                 }
 1370         } else
 1371                 error = EINVAL;
 1372         TPL_LIST_WUNLOCK();
 1373 
 1374         return (error);
 1375 }
 1376 
 1377 struct statsblobv1 *
 1378 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
 1379 {
 1380         struct statsblobv1 *sb;
 1381         int error;
 1382 
 1383         sb = NULL;
 1384 
 1385         TPL_LIST_RLOCK();
 1386         if (tpl_id < ntpl) {
 1387                 sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
 1388                 if (sb != NULL) {
 1389                         sb->maxsz = tpllist[tpl_id]->sb->maxsz;
 1390                         error = stats_v1_blob_init_locked(sb, tpl_id, 0);
 1391                 } else
 1392                         error = ENOMEM;
 1393 
 1394                 if (error) {
 1395                         stats_free(sb);
 1396                         sb = NULL;
 1397                 }
 1398         }
 1399         TPL_LIST_RUNLOCK();
 1400 
 1401         return (sb);
 1402 }
 1403 
 1404 void
 1405 stats_v1_blob_destroy(struct statsblobv1 *sb)
 1406 {
 1407 
 1408         stats_free(sb);
 1409 }
 1410 
 1411 int
 1412 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
 1413     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
 1414     size_t *retvsdsz)
 1415 {
 1416         struct voi *v;
 1417         struct voistat *vs;
 1418 
 1419         if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
 1420             voi_id >= NVOIS(sb))
 1421                 return (EINVAL);
 1422 
 1423         v = &sb->vois[voi_id];
 1424         if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
 1425                 return (EINVAL);
 1426 
 1427         vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
 1428         *retvsd = BLOB_OFFSET(sb, vs->data_off);
 1429         if (retdtype != NULL)
 1430                 *retdtype = vs->dtype;
 1431         if (retvsdsz != NULL)
 1432                 *retvsdsz = vs->dsz;
 1433 
 1434         return (0);
 1435 }
 1436 
 1437 int
 1438 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
 1439 {
 1440         int error;
 1441 
 1442         error = 0;
 1443 
 1444         TPL_LIST_RLOCK();
 1445         if (sb == NULL || tpl_id >= ntpl) {
 1446                 error = EINVAL;
 1447         } else {
 1448                 error = stats_v1_blob_init_locked(sb, tpl_id, flags);
 1449         }
 1450         TPL_LIST_RUNLOCK();
 1451 
 1452         return (error);
 1453 }
 1454 
 1455 static inline int
 1456 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
 1457     uint32_t flags __unused)
 1458 {
 1459         int error;
 1460 
 1461         TPL_LIST_RLOCK_ASSERT();
 1462         error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
 1463         KASSERT(!error,
 1464             ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
 1465 
 1466         if (!error) {
 1467                 memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
 1468                 sb->created = sb->lastrst = stats_sbinuptime();
 1469                 sb->tplhash = tpllist[tpl_id]->mb->tplhash;
 1470         }
 1471 
 1472         return (error);
 1473 }
 1474 
 1475 static int
 1476 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
 1477     int newvoistatbytes, int newvoistatdatabytes)
 1478 {
 1479         struct statsblobv1 *sb;
 1480         struct voi *tmpvoi;
 1481         struct voistat *tmpvoistat, *voistat_array;
 1482         int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
 1483 
 1484         KASSERT(newvoibytes % sizeof(struct voi) == 0,
 1485             ("Bad newvoibytes %d", newvoibytes));
 1486         KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
 1487             ("Bad newvoistatbytes %d", newvoistatbytes));
 1488 
 1489         error = ((newvoibytes % sizeof(struct voi) == 0) &&
 1490             (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
 1491         sb = *sbpp;
 1492         nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
 1493 
 1494         /*
 1495          * XXXLAS: Required until we gain support for flags which alter the
 1496          * units of size/offset fields in key structs.
 1497          */
 1498         if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
 1499                 error = EFBIG;
 1500 
 1501         if (!error && (sb->cursz + nbytes > sb->maxsz)) {
 1502                 /* Need to expand our blob. */
 1503                 sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
 1504                 if (sb != NULL) {
 1505                         sb->maxsz = sb->cursz + nbytes;
 1506                         *sbpp = sb;
 1507                 } else
 1508                     error = ENOMEM;
 1509         }
 1510 
 1511         if (!error) {
 1512                 /*
 1513                  * Shuffle memory within the expanded blob working from the end
 1514                  * backwards, leaving gaps for the new voistat and voistatdata
 1515                  * structs at the beginning of their respective blob regions,
 1516                  * and for the new voi structs at the end of their blob region.
 1517                  */
 1518                 memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
 1519                     BLOB_OFFSET(sb, sb->statsdata_off),
 1520                     sb->cursz - sb->statsdata_off);
 1521                 memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
 1522                     newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
 1523                     sb->statsdata_off - sb->stats_off);
 1524 
 1525                 /* First index of new voi/voistat structs to be initialised. */
 1526                 idxnewvois = NVOIS(sb);
 1527                 idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
 1528 
 1529                 /* Update housekeeping variables and offsets. */
 1530                 sb->cursz += nbytes;
 1531                 sb->stats_off += newvoibytes;
 1532                 sb->statsdata_off += newvoibytes + newvoistatbytes;
 1533 
 1534                 /* XXXLAS: Zeroing not strictly needed but aids debugging. */
 1535                 memset(&sb->vois[idxnewvois], '\0', newvoibytes);
 1536                 memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
 1537                     newvoistatbytes);
 1538                 memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
 1539                     newvoistatdatabytes);
 1540 
 1541                 /* Initialise new voi array members and update offsets. */
 1542                 for (i = 0; i < NVOIS(sb); i++) {
 1543                         tmpvoi = &sb->vois[i];
 1544                         if (i >= idxnewvois) {
 1545                                 tmpvoi->id = tmpvoi->voistatmaxid = -1;
 1546                         } else if (tmpvoi->id > -1) {
 1547                                 tmpvoi->stats_off += newvoibytes +
 1548                                     newvoistatbytes;
 1549                         }
 1550                 }
 1551 
 1552                 /* Initialise new voistat array members and update offsets. */
 1553                 nvoistats = (sb->statsdata_off - sb->stats_off) /
 1554                     sizeof(struct voistat);
 1555                 voistat_array = BLOB_OFFSET(sb, sb->stats_off);
 1556                 for (i = 0; i < nvoistats; i++) {
 1557                         tmpvoistat = &voistat_array[i];
 1558                         if (i <= idxnewvoistats) {
 1559                                 tmpvoistat->stype = -1;
 1560                         } else if (tmpvoistat->stype > -1) {
 1561                                 tmpvoistat->data_off += nbytes;
 1562                         }
 1563                 }
 1564         }
 1565 
 1566         return (error);
 1567 }
 1568 
 1569 static void
 1570 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
 1571 {
 1572 
 1573         /* XXXLAS: Fill this in. */
 1574 }
 1575 
 1576 static void
 1577 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
 1578     void *usrctx, uint32_t flags)
 1579 {
 1580         struct voi *v;
 1581         struct voistat *vs;
 1582         struct sb_iter_ctx ctx;
 1583         int i, j, firstvoi;
 1584 
 1585         ctx.usrctx = usrctx;
 1586         ctx.flags = SB_IT_FIRST_CB;
 1587         firstvoi = 1;
 1588 
 1589         for (i = 0; i < NVOIS(sb); i++) {
 1590                 v = &sb->vois[i];
 1591                 ctx.vslot = i;
 1592                 ctx.vsslot = -1;
 1593                 ctx.flags |= SB_IT_FIRST_VOISTAT;
 1594 
 1595                 if (firstvoi)
 1596                         ctx.flags |= SB_IT_FIRST_VOI;
 1597                 else if (i == (NVOIS(sb) - 1))
 1598                         ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
 1599 
 1600                 if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
 1601                         if (icb(sb, v, NULL, &ctx))
 1602                                 return;
 1603                         firstvoi = 0;
 1604                         ctx.flags &= ~SB_IT_FIRST_CB;
 1605                 }
 1606 
 1607                 /* If NULL voi, v->voistatmaxid == -1 */
 1608                 for (j = 0; j <= v->voistatmaxid; j++) {
 1609                         vs = &((struct voistat *)BLOB_OFFSET(sb,
 1610                             v->stats_off))[j];
 1611                         if (vs->stype < 0 &&
 1612                             !(flags & SB_IT_NULLVOISTAT))
 1613                                 continue;
 1614 
 1615                         if (j == v->voistatmaxid) {
 1616                                 ctx.flags |= SB_IT_LAST_VOISTAT;
 1617                                 if (i == (NVOIS(sb) - 1))
 1618                                         ctx.flags |=
 1619                                             SB_IT_LAST_CB;
 1620                         } else
 1621                                 ctx.flags &= ~SB_IT_LAST_CB;
 1622 
 1623                         ctx.vsslot = j;
 1624                         if (icb(sb, v, vs, &ctx))
 1625                                 return;
 1626 
 1627                         ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
 1628                             SB_IT_LAST_VOISTAT);
 1629                 }
 1630                 ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
 1631         }
 1632 }
 1633 
 1634 static inline void
 1635 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
 1636     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
 1637     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
 1638 {
 1639         const struct ctdth32 *ctd32tree;
 1640         const struct ctdth64 *ctd64tree;
 1641         const struct voistatdata_tdgstctd32 *ctd32;
 1642         const struct voistatdata_tdgstctd64 *ctd64;
 1643         const char *fmtstr;
 1644         uint64_t smplcnt, compcnt;
 1645         int is32bit, qmaxstrlen;
 1646         uint16_t maxctds, curctds;
 1647 
 1648         switch (tdgst_dtype) {
 1649         case VSD_DTYPE_TDGSTCLUST32:
 1650                 smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
 1651                 compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
 1652                 maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
 1653                 curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
 1654                 ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
 1655                 ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
 1656                     ARB_CMIN(ctdth32, ctd32tree));
 1657                 qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
 1658                 is32bit = 1;
 1659                 ctd64tree = NULL;
 1660                 ctd64 = NULL;
 1661                 break;
 1662         case VSD_DTYPE_TDGSTCLUST64:
 1663                 smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
 1664                 compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
 1665                 maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
 1666                 curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
 1667                 ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
 1668                 ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
 1669                     ARB_CMIN(ctdth64, ctd64tree));
 1670                 qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
 1671                 is32bit = 0;
 1672                 ctd32tree = NULL;
 1673                 ctd32 = NULL;
 1674                 break;
 1675         default:
 1676                 return;
 1677         }
 1678 
 1679         switch (fmt) {
 1680         case SB_STRFMT_FREEFORM:
 1681                 fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
 1682                 break;
 1683         case SB_STRFMT_JSON:
 1684         default:
 1685                 fmtstr =
 1686                     "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
 1687                     "\"nctds\":%hu,\"ctds\":[";
 1688                 break;
 1689         }
 1690         sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
 1691             maxctds, curctds);
 1692 
 1693         while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
 1694                 char qstr[qmaxstrlen];
 1695 
 1696                 switch (fmt) {
 1697                 case SB_STRFMT_FREEFORM:
 1698                         fmtstr = "\n\t\t\t\t";
 1699                         break;
 1700                 case SB_STRFMT_JSON:
 1701                 default:
 1702                         fmtstr = "{";
 1703                         break;
 1704                 }
 1705                 sbuf_cat(buf, fmtstr);
 1706 
 1707                 if (objdump) {
 1708                         switch (fmt) {
 1709                         case SB_STRFMT_FREEFORM:
 1710                                 fmtstr = "ctd[%hu].";
 1711                                 break;
 1712                         case SB_STRFMT_JSON:
 1713                         default:
 1714                                 fmtstr = "\"ctd\":%hu,";
 1715                                 break;
 1716                         }
 1717                         sbuf_printf(buf, fmtstr, is32bit ?
 1718                             ARB_SELFIDX(ctd32tree, ctd32) :
 1719                             ARB_SELFIDX(ctd64tree, ctd64));
 1720                 }
 1721 
 1722                 switch (fmt) {
 1723                 case SB_STRFMT_FREEFORM:
 1724                         fmtstr = "{mu=";
 1725                         break;
 1726                 case SB_STRFMT_JSON:
 1727                 default:
 1728                         fmtstr = "\"mu\":";
 1729                         break;
 1730                 }
 1731                 sbuf_cat(buf, fmtstr);
 1732                 Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
 1733                     sizeof(qstr));
 1734                 sbuf_cat(buf, qstr);
 1735 
 1736                 switch (fmt) {
 1737                 case SB_STRFMT_FREEFORM:
 1738                         fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
 1739                         break;
 1740                 case SB_STRFMT_JSON:
 1741                 default:
 1742                         fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
 1743                         break;
 1744                 }
 1745                 sbuf_printf(buf, fmtstr,
 1746                     is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
 1747 
 1748                 if (is32bit)
 1749                         ctd32 = (objdump ? ARB_CNODE(ctd32tree,
 1750                             ARB_SELFIDX(ctd32tree, ctd32) + 1) :
 1751                             ARB_CNEXT(ctdth32, ctd32tree, ctd32));
 1752                 else
 1753                         ctd64 = (objdump ? ARB_CNODE(ctd64tree,
 1754                             ARB_SELFIDX(ctd64tree, ctd64) + 1) :
 1755                             ARB_CNEXT(ctdth64, ctd64tree, ctd64));
 1756 
 1757                 if (fmt == SB_STRFMT_JSON &&
 1758                     (is32bit ? NULL != ctd32 : NULL != ctd64))
 1759                         sbuf_putc(buf, ',');
 1760         }
 1761         if (fmt == SB_STRFMT_JSON)
 1762                 sbuf_cat(buf, "]");
 1763 }
 1764 
 1765 static inline void
 1766 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
 1767     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
 1768     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
 1769 {
 1770         const struct voistatdata_numeric *bkt_lb, *bkt_ub;
 1771         const char *fmtstr;
 1772         int is32bit;
 1773         uint16_t i, nbkts;
 1774 
 1775         switch (hist_dtype) {
 1776         case VSD_DTYPE_CRHIST32:
 1777                 nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
 1778                 is32bit = 1;
 1779                 break;
 1780         case VSD_DTYPE_DRHIST32:
 1781                 nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
 1782                 is32bit = 1;
 1783                 break;
 1784         case VSD_DTYPE_DVHIST32:
 1785                 nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
 1786                 is32bit = 1;
 1787                 break;
 1788         case VSD_DTYPE_CRHIST64:
 1789                 nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
 1790                 is32bit = 0;
 1791                 break;
 1792         case VSD_DTYPE_DRHIST64:
 1793                 nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
 1794                 is32bit = 0;
 1795                 break;
 1796         case VSD_DTYPE_DVHIST64:
 1797                 nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
 1798                 is32bit = 0;
 1799                 break;
 1800         default:
 1801                 return;
 1802         }
 1803 
 1804         switch (fmt) {
 1805         case SB_STRFMT_FREEFORM:
 1806                 fmtstr = "nbkts=%hu, ";
 1807                 break;
 1808         case SB_STRFMT_JSON:
 1809         default:
 1810                 fmtstr = "\"nbkts\":%hu,";
 1811                 break;
 1812         }
 1813         sbuf_printf(buf, fmtstr, nbkts);
 1814 
 1815         switch (fmt) {
 1816                 case SB_STRFMT_FREEFORM:
 1817                         fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
 1818                         break;
 1819                 case SB_STRFMT_JSON:
 1820                 default:
 1821                         fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
 1822                             "\"oob\":%ju,\"bkts\":[");
 1823                         break;
 1824         }
 1825         sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
 1826             hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
 1827             hist_dtype, oob));
 1828 
 1829         for (i = 0; i < nbkts; i++) {
 1830                 switch (hist_dtype) {
 1831                 case VSD_DTYPE_CRHIST32:
 1832                 case VSD_DTYPE_CRHIST64:
 1833                         bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
 1834                             bkts[i].lb);
 1835                         if (i < nbkts - 1)
 1836                                 bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
 1837                                     hist_dtype, bkts[i + 1].lb);
 1838                         else
 1839                                 bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
 1840                         break;
 1841                 case VSD_DTYPE_DRHIST32:
 1842                 case VSD_DTYPE_DRHIST64:
 1843                         bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
 1844                             bkts[i].lb);
 1845                         bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
 1846                             bkts[i].ub);
 1847                         break;
 1848                 case VSD_DTYPE_DVHIST32:
 1849                 case VSD_DTYPE_DVHIST64:
 1850                         bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
 1851                             hist_dtype, bkts[i].val);
 1852                         break;
 1853                 default:
 1854                         break;
 1855                 }
 1856 
 1857                 switch (fmt) {
 1858                 case SB_STRFMT_FREEFORM:
 1859                         fmtstr = "\n\t\t\t\t";
 1860                         break;
 1861                 case SB_STRFMT_JSON:
 1862                 default:
 1863                         fmtstr = "{";
 1864                         break;
 1865                 }
 1866                 sbuf_cat(buf, fmtstr);
 1867 
 1868                 if (objdump) {
 1869                         switch (fmt) {
 1870                         case SB_STRFMT_FREEFORM:
 1871                                 fmtstr = "bkt[%hu].";
 1872                                 break;
 1873                         case SB_STRFMT_JSON:
 1874                         default:
 1875                                 fmtstr = "\"bkt\":%hu,";
 1876                                 break;
 1877                         }
 1878                         sbuf_printf(buf, fmtstr, i);
 1879                 }
 1880 
 1881                 switch (fmt) {
 1882                 case SB_STRFMT_FREEFORM:
 1883                         fmtstr = "{lb=";
 1884                         break;
 1885                 case SB_STRFMT_JSON:
 1886                 default:
 1887                         fmtstr = "\"lb\":";
 1888                         break;
 1889                 }
 1890                 sbuf_cat(buf, fmtstr);
 1891                 stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
 1892                     voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
 1893                     fmt, buf, objdump);
 1894 
 1895                 switch (fmt) {
 1896                 case SB_STRFMT_FREEFORM:
 1897                         fmtstr = ",ub=";
 1898                         break;
 1899                 case SB_STRFMT_JSON:
 1900                 default:
 1901                         fmtstr = ",\"ub\":";
 1902                         break;
 1903                 }
 1904                 sbuf_cat(buf, fmtstr);
 1905                 stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
 1906                     voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
 1907                     fmt, buf, objdump);
 1908 
 1909                 switch (fmt) {
 1910                 case SB_STRFMT_FREEFORM:
 1911                         fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
 1912                         break;
 1913                 case SB_STRFMT_JSON:
 1914                 default:
 1915                         fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
 1916                         break;
 1917                 }
 1918                 sbuf_printf(buf, fmtstr, is32bit ?
 1919                     VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
 1920                     (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
 1921                     bkts[i].cnt));
 1922 
 1923                 if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
 1924                         sbuf_putc(buf, ',');
 1925         }
 1926         if (fmt == SB_STRFMT_JSON)
 1927                 sbuf_cat(buf, "]");
 1928 }
 1929 
 1930 int
 1931 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
 1932     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
 1933     struct sbuf *buf, int objdump)
 1934 {
 1935         const char *fmtstr;
 1936 
 1937         if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
 1938             vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
 1939                 return (EINVAL);
 1940 
 1941         switch (vsd_dtype) {
 1942         case VSD_DTYPE_VOISTATE:
 1943                 switch (fmt) {
 1944                 case SB_STRFMT_FREEFORM:
 1945                         fmtstr = "prev=";
 1946                         break;
 1947                 case SB_STRFMT_JSON:
 1948                 default:
 1949                         fmtstr = "\"prev\":";
 1950                         break;
 1951                 }
 1952                 sbuf_cat(buf, fmtstr);
 1953                 /*
 1954                  * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
 1955                  */
 1956                 stats_voistatdata_tostr(
 1957                     (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
 1958                     voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
 1959                 break;
 1960         case VSD_DTYPE_INT_S32:
 1961                 sbuf_printf(buf, "%d", vsd->int32.s32);
 1962                 break;
 1963         case VSD_DTYPE_INT_U32:
 1964                 sbuf_printf(buf, "%u", vsd->int32.u32);
 1965                 break;
 1966         case VSD_DTYPE_INT_S64:
 1967                 sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
 1968                 break;
 1969         case VSD_DTYPE_INT_U64:
 1970                 sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
 1971                 break;
 1972         case VSD_DTYPE_INT_SLONG:
 1973                 sbuf_printf(buf, "%ld", vsd->intlong.slong);
 1974                 break;
 1975         case VSD_DTYPE_INT_ULONG:
 1976                 sbuf_printf(buf, "%lu", vsd->intlong.ulong);
 1977                 break;
 1978         case VSD_DTYPE_Q_S32:
 1979                 {
 1980                 char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
 1981                 Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
 1982                 sbuf_cat(buf, qstr);
 1983                 }
 1984                 break;
 1985         case VSD_DTYPE_Q_U32:
 1986                 {
 1987                 char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
 1988                 Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
 1989                 sbuf_cat(buf, qstr);
 1990                 }
 1991                 break;
 1992         case VSD_DTYPE_Q_S64:
 1993                 {
 1994                 char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
 1995                 Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
 1996                 sbuf_cat(buf, qstr);
 1997                 }
 1998                 break;
 1999         case VSD_DTYPE_Q_U64:
 2000                 {
 2001                 char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
 2002                 Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
 2003                 sbuf_cat(buf, qstr);
 2004                 }
 2005                 break;
 2006         case VSD_DTYPE_CRHIST32:
 2007         case VSD_DTYPE_DRHIST32:
 2008         case VSD_DTYPE_DVHIST32:
 2009         case VSD_DTYPE_CRHIST64:
 2010         case VSD_DTYPE_DRHIST64:
 2011         case VSD_DTYPE_DVHIST64:
 2012                 stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
 2013                     vsd_dtype, vsd_sz, fmt, buf, objdump);
 2014                 break;
 2015         case VSD_DTYPE_TDGSTCLUST32:
 2016         case VSD_DTYPE_TDGSTCLUST64:
 2017                 stats_voistatdata_tdgst_tostr(voi_dtype,
 2018                     CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
 2019                     objdump);
 2020                 break;
 2021         default:
 2022                 break;
 2023         }
 2024 
 2025         return (sbuf_error(buf));
 2026 }
 2027 
 2028 static void
 2029 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
 2030     struct voistat *vs, struct sb_iter_ctx *ctx)
 2031 {
 2032         struct sb_tostrcb_ctx *sctx;
 2033         struct metablob *tpl_mb;
 2034         struct sbuf *buf;
 2035         void *vsd;
 2036         uint8_t dump;
 2037 
 2038         sctx = ctx->usrctx;
 2039         buf = sctx->buf;
 2040         tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
 2041         dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
 2042 
 2043         if (ctx->flags & SB_IT_FIRST_CB) {
 2044                 sbuf_printf(buf, "struct statsblobv1@%p", sb);
 2045                 if (dump) {
 2046                         sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
 2047                             "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
 2048                             "stats_off=%hu, statsdata_off=%hu",
 2049                             sb->abi, sb->endian, sb->maxsz, sb->cursz,
 2050                             sb->created, sb->lastrst, sb->flags, sb->stats_off,
 2051                             sb->statsdata_off);
 2052                 }
 2053                 sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
 2054         }
 2055 
 2056         if (ctx->flags & SB_IT_FIRST_VOISTAT) {
 2057                 sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
 2058                 if (v->id < 0)
 2059                         return;
 2060                 sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
 2061                     tpl_mb->voi_meta[v->id].name);
 2062                 if (dump)
 2063                     sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
 2064                     "voistatmaxid=%hhd, stats_off=%hu", v->flags,
 2065                     vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
 2066         }
 2067 
 2068         if (!dump && vs->stype <= 0)
 2069                 return;
 2070 
 2071         sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
 2072         if (vs->stype < 0) {
 2073                 sbuf_printf(buf, "%hhd", vs->stype);
 2074                 return;
 2075         } else
 2076                 sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
 2077                     vs->errs);
 2078         vsd = BLOB_OFFSET(sb, vs->data_off);
 2079         if (dump)
 2080                 sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
 2081                     "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
 2082                     vs->dsz, vs->data_off);
 2083 
 2084         sbuf_printf(buf, "\n\t\t\tvoistatdata: ");
 2085         stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
 2086             sctx->fmt, buf, dump);
 2087 }
 2088 
 2089 static void
 2090 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
 2091     struct sb_iter_ctx *ctx)
 2092 {
 2093         struct sb_tostrcb_ctx *sctx;
 2094         struct metablob *tpl_mb;
 2095         struct sbuf *buf;
 2096         const char *fmtstr;
 2097         void *vsd;
 2098         uint8_t dump;
 2099 
 2100         sctx = ctx->usrctx;
 2101         buf = sctx->buf;
 2102         tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
 2103         dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
 2104 
 2105         if (ctx->flags & SB_IT_FIRST_CB) {
 2106                 sbuf_putc(buf, '{');
 2107                 if (dump) {
 2108                         sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
 2109                             "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
 2110                             "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
 2111                             "\"statsdata_off\":%hu,", sb->abi,
 2112                             sb->endian, sb->maxsz, sb->cursz, sb->created,
 2113                             sb->lastrst, sb->flags, sb->stats_off,
 2114                             sb->statsdata_off);
 2115                 }
 2116 
 2117                 if (tpl_mb == NULL)
 2118                         fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
 2119                 else
 2120                         fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
 2121 
 2122                 sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
 2123                     sb->tplhash);
 2124         }
 2125 
 2126         if (ctx->flags & SB_IT_FIRST_VOISTAT) {
 2127                 if (dump) {
 2128                         sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
 2129                             v->id);
 2130                         if (v->id < 0) {
 2131                                 sbuf_printf(buf, "},");
 2132                                 return;
 2133                         }
 2134                         
 2135                         if (tpl_mb == NULL)
 2136                                 fmtstr = ",\"name\":%s,\"flags\":%hu,"
 2137                                     "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
 2138                                     "\"stats_off\":%hu,";
 2139                         else
 2140                                 fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
 2141                                     "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
 2142                                     "\"stats_off\":%hu,";
 2143 
 2144                         sbuf_printf(buf, fmtstr, tpl_mb ?
 2145                             tpl_mb->voi_meta[v->id].name : "null", v->flags,
 2146                             vsd_dtype2name[v->dtype], v->voistatmaxid,
 2147                             v->stats_off);
 2148                 } else {
 2149                         if (tpl_mb == NULL) {
 2150                                 sbuf_printf(buf, "\"[%hd]\":{", v->id);
 2151                         } else {
 2152                                 sbuf_printf(buf, "\"%s\":{",
 2153                                     tpl_mb->voi_meta[v->id].name);
 2154                         }
 2155                 }
 2156                 sbuf_cat(buf, "\"stats\":{");
 2157         }
 2158 
 2159         vsd = BLOB_OFFSET(sb, vs->data_off);
 2160         if (dump) {
 2161                 sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
 2162                 if (vs->stype < 0) {
 2163                         sbuf_printf(buf, "{\"stype\":-1},");
 2164                         return;
 2165                 }
 2166                 sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
 2167                     "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
 2168                     vs_stype2name[vs->stype], vs->errs, vs->flags,
 2169                     vsd_dtype2name[vs->dtype], vs->data_off);
 2170         } else if (vs->stype > 0) {
 2171                 if (tpl_mb == NULL)
 2172                         sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
 2173                 else
 2174                         sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
 2175         } else
 2176                 return;
 2177 
 2178         if ((vs->flags & VS_VSDVALID) || dump) {
 2179                 if (!dump)
 2180                         sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
 2181                 /* Simple non-compound VSD types need a key. */
 2182                 if (!vsd_compoundtype[vs->dtype])
 2183                         sbuf_cat(buf, "\"val\":");
 2184                 stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
 2185                     sctx->fmt, buf, dump);
 2186                 sbuf_cat(buf, dump ? "}}" : "}");
 2187         } else
 2188                 sbuf_cat(buf, dump ? "null}" : "null");
 2189 
 2190         if (ctx->flags & SB_IT_LAST_VOISTAT)
 2191                 sbuf_cat(buf, "}}");
 2192 
 2193         if (ctx->flags & SB_IT_LAST_CB)
 2194                 sbuf_cat(buf, "}}");
 2195         else
 2196                 sbuf_putc(buf, ',');
 2197 }
 2198 
 2199 static int
 2200 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
 2201     struct sb_iter_ctx *ctx)
 2202 {
 2203         struct sb_tostrcb_ctx *sctx;
 2204 
 2205         sctx = ctx->usrctx;
 2206 
 2207         switch (sctx->fmt) {
 2208         case SB_STRFMT_FREEFORM:
 2209                 stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
 2210                 break;
 2211         case SB_STRFMT_JSON:
 2212                 stats_v1_itercb_tostr_json(sb, v, vs, ctx);
 2213                 break;
 2214         default:
 2215                 break;
 2216         }
 2217 
 2218         return (sbuf_error(sctx->buf));
 2219 }
 2220 
 2221 int
 2222 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
 2223     enum sb_str_fmt fmt, uint32_t flags)
 2224 {
 2225         struct sb_tostrcb_ctx sctx;
 2226         uint32_t iflags;
 2227 
 2228         if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
 2229             fmt >= SB_STRFMT_NUM_FMTS)
 2230                 return (EINVAL);
 2231 
 2232         sctx.buf = buf;
 2233         sctx.fmt = fmt;
 2234         sctx.flags = flags;
 2235 
 2236         if (flags & SB_TOSTR_META) {
 2237                 if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
 2238                     &sctx.tpl))
 2239                         return (EINVAL);
 2240         } else
 2241                 sctx.tpl = NULL;
 2242 
 2243         iflags = 0;
 2244         if (flags & SB_TOSTR_OBJDUMP)
 2245                 iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
 2246         stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
 2247 
 2248         return (sbuf_error(buf));
 2249 }
 2250 
 2251 static int
 2252 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
 2253     struct voistat *vs, struct sb_iter_ctx *ctx)
 2254 {
 2255         struct sb_visitcb_ctx *vctx;
 2256         struct sb_visit sbv;
 2257 
 2258         vctx = ctx->usrctx;
 2259 
 2260         sbv.tplhash = sb->tplhash;
 2261         sbv.voi_id = v->id;
 2262         sbv.voi_dtype = v->dtype;
 2263         sbv.vs_stype = vs->stype;
 2264         sbv.vs_dtype = vs->dtype;
 2265         sbv.vs_dsz = vs->dsz;
 2266         sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
 2267         sbv.vs_errs = vs->errs;
 2268         sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
 2269             SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
 2270             SB_IT_LAST_VOISTAT);
 2271 
 2272         return (vctx->cb(&sbv, vctx->usrctx));
 2273 }
 2274 
 2275 int
 2276 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
 2277     void *usrctx)
 2278 {
 2279         struct sb_visitcb_ctx vctx;
 2280 
 2281         if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
 2282                 return (EINVAL);
 2283 
 2284         vctx.cb = func;
 2285         vctx.usrctx = usrctx;
 2286 
 2287         stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
 2288 
 2289         return (0);
 2290 }
 2291 
 2292 static int
 2293 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
 2294     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
 2295 {
 2296         void *vsd;
 2297 
 2298         if (vs->stype == VS_STYPE_VOISTATE)
 2299                 return (0);
 2300 
 2301         vsd = BLOB_OFFSET(sb, vs->data_off);
 2302 
 2303         /* Perform the stat type's default reset action. */
 2304         switch (vs->stype) {
 2305         case VS_STYPE_SUM:
 2306                 switch (vs->dtype) {
 2307                 case VSD_DTYPE_Q_S32:
 2308                         Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
 2309                         break;
 2310                 case VSD_DTYPE_Q_U32:
 2311                         Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
 2312                         break;
 2313                 case VSD_DTYPE_Q_S64:
 2314                         Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
 2315                         break;
 2316                 case VSD_DTYPE_Q_U64:
 2317                         Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
 2318                         break;
 2319                 default:
 2320                         bzero(vsd, vs->dsz);
 2321                         break;
 2322                 }
 2323                 break;
 2324         case VS_STYPE_MAX:
 2325                 switch (vs->dtype) {
 2326                 case VSD_DTYPE_Q_S32:
 2327                         Q_SIFVAL(VSD(q32, vsd)->sq32,
 2328                             Q_IFMINVAL(VSD(q32, vsd)->sq32));
 2329                         break;
 2330                 case VSD_DTYPE_Q_U32:
 2331                         Q_SIFVAL(VSD(q32, vsd)->uq32,
 2332                             Q_IFMINVAL(VSD(q32, vsd)->uq32));
 2333                         break;
 2334                 case VSD_DTYPE_Q_S64:
 2335                         Q_SIFVAL(VSD(q64, vsd)->sq64,
 2336                             Q_IFMINVAL(VSD(q64, vsd)->sq64));
 2337                         break;
 2338                 case VSD_DTYPE_Q_U64:
 2339                         Q_SIFVAL(VSD(q64, vsd)->uq64,
 2340                             Q_IFMINVAL(VSD(q64, vsd)->uq64));
 2341                         break;
 2342                 default:
 2343                         memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
 2344                             vs->dsz);
 2345                         break;
 2346                 }
 2347                 break;
 2348         case VS_STYPE_MIN:
 2349                 switch (vs->dtype) {
 2350                 case VSD_DTYPE_Q_S32:
 2351                         Q_SIFVAL(VSD(q32, vsd)->sq32,
 2352                             Q_IFMAXVAL(VSD(q32, vsd)->sq32));
 2353                         break;
 2354                 case VSD_DTYPE_Q_U32:
 2355                         Q_SIFVAL(VSD(q32, vsd)->uq32,
 2356                             Q_IFMAXVAL(VSD(q32, vsd)->uq32));
 2357                         break;
 2358                 case VSD_DTYPE_Q_S64:
 2359                         Q_SIFVAL(VSD(q64, vsd)->sq64,
 2360                             Q_IFMAXVAL(VSD(q64, vsd)->sq64));
 2361                         break;
 2362                 case VSD_DTYPE_Q_U64:
 2363                         Q_SIFVAL(VSD(q64, vsd)->uq64,
 2364                             Q_IFMAXVAL(VSD(q64, vsd)->uq64));
 2365                         break;
 2366                 default:
 2367                         memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
 2368                             vs->dsz);
 2369                         break;
 2370                 }
 2371                 break;
 2372         case VS_STYPE_HIST:
 2373                 {
 2374                 /* Reset bucket counts. */
 2375                 struct voistatdata_hist *hist;
 2376                 int i, is32bit;
 2377                 uint16_t nbkts;
 2378 
 2379                 hist = VSD(hist, vsd);
 2380                 switch (vs->dtype) {
 2381                 case VSD_DTYPE_CRHIST32:
 2382                         nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
 2383                         is32bit = 1;
 2384                         break;
 2385                 case VSD_DTYPE_DRHIST32:
 2386                         nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
 2387                         is32bit = 1;
 2388                         break;
 2389                 case VSD_DTYPE_DVHIST32:
 2390                         nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
 2391                         is32bit = 1;
 2392                         break;
 2393                 case VSD_DTYPE_CRHIST64:
 2394                         nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
 2395                         is32bit = 0;
 2396                         break;
 2397                 case VSD_DTYPE_DRHIST64:
 2398                         nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
 2399                         is32bit = 0;
 2400                         break;
 2401                 case VSD_DTYPE_DVHIST64:
 2402                         nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
 2403                         is32bit = 0;
 2404                         break;
 2405                 default:
 2406                         return (0);
 2407                 }
 2408 
 2409                 bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
 2410                     is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
 2411                 for (i = nbkts - 1; i >= 0; i--) {
 2412                         bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
 2413                             bkts[i].cnt), is32bit ? sizeof(uint32_t) :
 2414                             sizeof(uint64_t));
 2415                 }
 2416                 break;
 2417                 }
 2418         case VS_STYPE_TDGST:
 2419                 {
 2420                 /* Reset sample count centroids array/tree. */
 2421                 struct voistatdata_tdgst *tdgst;
 2422                 struct ctdth32 *ctd32tree;
 2423                 struct ctdth64 *ctd64tree;
 2424                 struct voistatdata_tdgstctd32 *ctd32;
 2425                 struct voistatdata_tdgstctd64 *ctd64;
 2426 
 2427                 tdgst = VSD(tdgst, vsd);
 2428                 switch (vs->dtype) {
 2429                 case VSD_DTYPE_TDGSTCLUST32:
 2430                         VSD(tdgstclust32, tdgst)->smplcnt = 0;
 2431                         VSD(tdgstclust32, tdgst)->compcnt = 0;
 2432                         ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
 2433                         ARB_INIT(ctd32, ctdlnk, ctd32tree,
 2434                             ARB_MAXNODES(ctd32tree)) {
 2435                                 ctd32->cnt = 0;
 2436                                 Q_SIFVAL(ctd32->mu, 0);
 2437                         }
 2438 #ifdef DIAGNOSTIC
 2439                         RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
 2440 #endif
 2441                 break;
 2442                 case VSD_DTYPE_TDGSTCLUST64:
 2443                         VSD(tdgstclust64, tdgst)->smplcnt = 0;
 2444                         VSD(tdgstclust64, tdgst)->compcnt = 0;
 2445                         ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
 2446                         ARB_INIT(ctd64, ctdlnk, ctd64tree,
 2447                             ARB_MAXNODES(ctd64tree)) {
 2448                                 ctd64->cnt = 0;
 2449                                 Q_SIFVAL(ctd64->mu, 0);
 2450                         }
 2451 #ifdef DIAGNOSTIC
 2452                         RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
 2453 #endif
 2454                 break;
 2455                 default:
 2456                         return (0);
 2457                 }
 2458                 break;
 2459                 }
 2460         default:
 2461                 KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
 2462                 break;
 2463         }
 2464 
 2465         vs->errs = 0;
 2466         vs->flags &= ~VS_VSDVALID;
 2467 
 2468         return (0);
 2469 }
 2470 
 2471 int
 2472 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
 2473     struct statsblobv1 *src, uint32_t flags)
 2474 {
 2475         int error;
 2476 
 2477         if (src != NULL && src->abi == STATS_ABI_V1) {
 2478                 error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
 2479                 if (!error) {
 2480                         if (flags & SB_CLONE_RSTSRC) {
 2481                                 stats_v1_blob_iter(src,
 2482                                     stats_v1_icb_reset_voistat, NULL, 0);
 2483                                 src->lastrst = stats_sbinuptime();
 2484                         }
 2485                         stats_v1_blob_finalise(*dst);
 2486                 }
 2487         } else
 2488                 error = EINVAL;
 2489 
 2490         return (error);
 2491 }
 2492 
 2493 static inline int
 2494 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
 2495     struct voistatdata *voival, struct voistat *vs, void *vsd)
 2496 {
 2497         int error;
 2498 
 2499         KASSERT(vs->dtype < VSD_NUM_DTYPES,
 2500             ("Unknown VSD dtype %d", vs->dtype));
 2501 
 2502         error = 0;
 2503 
 2504         switch (vs->dtype) {
 2505         case VSD_DTYPE_INT_S32:
 2506                 if (VSD(int32, vsd)->s32 < voival->int32.s32) {
 2507                         VSD(int32, vsd)->s32 = voival->int32.s32;
 2508                         vs->flags |= VS_VSDVALID;
 2509                 }
 2510                 break;
 2511         case VSD_DTYPE_INT_U32:
 2512                 if (VSD(int32, vsd)->u32 < voival->int32.u32) {
 2513                         VSD(int32, vsd)->u32 = voival->int32.u32;
 2514                         vs->flags |= VS_VSDVALID;
 2515                 }
 2516                 break;
 2517         case VSD_DTYPE_INT_S64:
 2518                 if (VSD(int64, vsd)->s64 < voival->int64.s64) {
 2519                         VSD(int64, vsd)->s64 = voival->int64.s64;
 2520                         vs->flags |= VS_VSDVALID;
 2521                 }
 2522                 break;
 2523         case VSD_DTYPE_INT_U64:
 2524                 if (VSD(int64, vsd)->u64 < voival->int64.u64) {
 2525                         VSD(int64, vsd)->u64 = voival->int64.u64;
 2526                         vs->flags |= VS_VSDVALID;
 2527                 }
 2528                 break;
 2529         case VSD_DTYPE_INT_SLONG:
 2530                 if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
 2531                         VSD(intlong, vsd)->slong = voival->intlong.slong;
 2532                         vs->flags |= VS_VSDVALID;
 2533                 }
 2534                 break;
 2535         case VSD_DTYPE_INT_ULONG:
 2536                 if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
 2537                         VSD(intlong, vsd)->ulong = voival->intlong.ulong;
 2538                         vs->flags |= VS_VSDVALID;
 2539                 }
 2540                 break;
 2541         case VSD_DTYPE_Q_S32:
 2542                 if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
 2543                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
 2544                     voival->q32.sq32)))) {
 2545                         vs->flags |= VS_VSDVALID;
 2546                 }
 2547                 break;
 2548         case VSD_DTYPE_Q_U32:
 2549                 if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
 2550                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
 2551                     voival->q32.uq32)))) {
 2552                         vs->flags |= VS_VSDVALID;
 2553                 }
 2554                 break;
 2555         case VSD_DTYPE_Q_S64:
 2556                 if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
 2557                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
 2558                     voival->q64.sq64)))) {
 2559                         vs->flags |= VS_VSDVALID;
 2560                 }
 2561                 break;
 2562         case VSD_DTYPE_Q_U64:
 2563                 if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
 2564                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
 2565                     voival->q64.uq64)))) {
 2566                         vs->flags |= VS_VSDVALID;
 2567                 }
 2568                 break;
 2569         default:
 2570                 error = EINVAL;
 2571                 break;
 2572         }
 2573 
 2574         return (error);
 2575 }
 2576 
 2577 static inline int
 2578 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
 2579     struct voistatdata *voival, struct voistat *vs, void *vsd)
 2580 {
 2581         int error;
 2582 
 2583         KASSERT(vs->dtype < VSD_NUM_DTYPES,
 2584             ("Unknown VSD dtype %d", vs->dtype));
 2585 
 2586         error = 0;
 2587 
 2588         switch (vs->dtype) {
 2589         case VSD_DTYPE_INT_S32:
 2590                 if (VSD(int32, vsd)->s32 > voival->int32.s32) {
 2591                         VSD(int32, vsd)->s32 = voival->int32.s32;
 2592                         vs->flags |= VS_VSDVALID;
 2593                 }
 2594                 break;
 2595         case VSD_DTYPE_INT_U32:
 2596                 if (VSD(int32, vsd)->u32 > voival->int32.u32) {
 2597                         VSD(int32, vsd)->u32 = voival->int32.u32;
 2598                         vs->flags |= VS_VSDVALID;
 2599                 }
 2600                 break;
 2601         case VSD_DTYPE_INT_S64:
 2602                 if (VSD(int64, vsd)->s64 > voival->int64.s64) {
 2603                         VSD(int64, vsd)->s64 = voival->int64.s64;
 2604                         vs->flags |= VS_VSDVALID;
 2605                 }
 2606                 break;
 2607         case VSD_DTYPE_INT_U64:
 2608                 if (VSD(int64, vsd)->u64 > voival->int64.u64) {
 2609                         VSD(int64, vsd)->u64 = voival->int64.u64;
 2610                         vs->flags |= VS_VSDVALID;
 2611                 }
 2612                 break;
 2613         case VSD_DTYPE_INT_SLONG:
 2614                 if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
 2615                         VSD(intlong, vsd)->slong = voival->intlong.slong;
 2616                         vs->flags |= VS_VSDVALID;
 2617                 }
 2618                 break;
 2619         case VSD_DTYPE_INT_ULONG:
 2620                 if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
 2621                         VSD(intlong, vsd)->ulong = voival->intlong.ulong;
 2622                         vs->flags |= VS_VSDVALID;
 2623                 }
 2624                 break;
 2625         case VSD_DTYPE_Q_S32:
 2626                 if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
 2627                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
 2628                     voival->q32.sq32)))) {
 2629                         vs->flags |= VS_VSDVALID;
 2630                 }
 2631                 break;
 2632         case VSD_DTYPE_Q_U32:
 2633                 if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
 2634                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
 2635                     voival->q32.uq32)))) {
 2636                         vs->flags |= VS_VSDVALID;
 2637                 }
 2638                 break;
 2639         case VSD_DTYPE_Q_S64:
 2640                 if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
 2641                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
 2642                     voival->q64.sq64)))) {
 2643                         vs->flags |= VS_VSDVALID;
 2644                 }
 2645                 break;
 2646         case VSD_DTYPE_Q_U64:
 2647                 if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
 2648                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
 2649                     voival->q64.uq64)))) {
 2650                         vs->flags |= VS_VSDVALID;
 2651                 }
 2652                 break;
 2653         default:
 2654                 error = EINVAL;
 2655                 break;
 2656         }
 2657 
 2658         return (error);
 2659 }
 2660 
 2661 static inline int
 2662 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
 2663     struct voistatdata *voival, struct voistat *vs, void *vsd)
 2664 {
 2665         int error;
 2666 
 2667         KASSERT(vs->dtype < VSD_NUM_DTYPES,
 2668             ("Unknown VSD dtype %d", vs->dtype));
 2669 
 2670         error = 0;
 2671 
 2672         switch (vs->dtype) {
 2673         case VSD_DTYPE_INT_S32:
 2674                 VSD(int32, vsd)->s32 += voival->int32.s32;
 2675                 break;
 2676         case VSD_DTYPE_INT_U32:
 2677                 VSD(int32, vsd)->u32 += voival->int32.u32;
 2678                 break;
 2679         case VSD_DTYPE_INT_S64:
 2680                 VSD(int64, vsd)->s64 += voival->int64.s64;
 2681                 break;
 2682         case VSD_DTYPE_INT_U64:
 2683                 VSD(int64, vsd)->u64 += voival->int64.u64;
 2684                 break;
 2685         case VSD_DTYPE_INT_SLONG:
 2686                 VSD(intlong, vsd)->slong += voival->intlong.slong;
 2687                 break;
 2688         case VSD_DTYPE_INT_ULONG:
 2689                 VSD(intlong, vsd)->ulong += voival->intlong.ulong;
 2690                 break;
 2691         case VSD_DTYPE_Q_S32:
 2692                 error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
 2693                 break;
 2694         case VSD_DTYPE_Q_U32:
 2695                 error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
 2696                 break;
 2697         case VSD_DTYPE_Q_S64:
 2698                 error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
 2699                 break;
 2700         case VSD_DTYPE_Q_U64:
 2701                 error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
 2702                 break;
 2703         default:
 2704                 error = EINVAL;
 2705                 break;
 2706         }
 2707 
 2708         if (!error)
 2709                 vs->flags |= VS_VSDVALID;
 2710 
 2711         return (error);
 2712 }
 2713 
 2714 static inline int
 2715 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
 2716     struct voistat *vs, struct voistatdata_hist *hist)
 2717 {
 2718         struct voistatdata_numeric *bkt_lb, *bkt_ub;
 2719         uint64_t *oob64, *cnt64;
 2720         uint32_t *oob32, *cnt32;
 2721         int error, i, found, is32bit, has_ub, eq_only;
 2722 
 2723         error = 0;
 2724 
 2725         switch (vs->dtype) {
 2726         case VSD_DTYPE_CRHIST32:
 2727                 i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
 2728                 is32bit = 1;
 2729                 has_ub = eq_only = 0;
 2730                 oob32 = &VSD(crhist32, hist)->oob;
 2731                 break;
 2732         case VSD_DTYPE_DRHIST32:
 2733                 i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
 2734                 is32bit = has_ub = 1;
 2735                 eq_only = 0;
 2736                 oob32 = &VSD(drhist32, hist)->oob;
 2737                 break;
 2738         case VSD_DTYPE_DVHIST32:
 2739                 i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
 2740                 is32bit = eq_only = 1;
 2741                 has_ub = 0;
 2742                 oob32 = &VSD(dvhist32, hist)->oob;
 2743                 break;
 2744         case VSD_DTYPE_CRHIST64:
 2745                 i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
 2746                 is32bit = has_ub = eq_only = 0;
 2747                 oob64 = &VSD(crhist64, hist)->oob;
 2748                 break;
 2749         case VSD_DTYPE_DRHIST64:
 2750                 i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
 2751                 is32bit = eq_only = 0;
 2752                 has_ub = 1;
 2753                 oob64 = &VSD(drhist64, hist)->oob;
 2754                 break;
 2755         case VSD_DTYPE_DVHIST64:
 2756                 i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
 2757                 is32bit = has_ub = 0;
 2758                 eq_only = 1;
 2759                 oob64 = &VSD(dvhist64, hist)->oob;
 2760                 break;
 2761         default:
 2762                 return (EINVAL);
 2763         }
 2764         i--; /* Adjust for 0-based array index. */
 2765 
 2766         /* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
 2767         for (found = 0; i >= 0 && !found; i--) {
 2768                 switch (vs->dtype) {
 2769                 case VSD_DTYPE_CRHIST32:
 2770                         bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
 2771                         cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
 2772                         break;
 2773                 case VSD_DTYPE_DRHIST32:
 2774                         bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
 2775                         bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
 2776                         cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
 2777                         break;
 2778                 case VSD_DTYPE_DVHIST32:
 2779                         bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
 2780                         cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
 2781                         break;
 2782                 case VSD_DTYPE_CRHIST64:
 2783                         bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
 2784                         cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
 2785                         break;
 2786                 case VSD_DTYPE_DRHIST64:
 2787                         bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
 2788                         bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
 2789                         cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
 2790                         break;
 2791                 case VSD_DTYPE_DVHIST64:
 2792                         bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
 2793                         cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
 2794                         break;
 2795                 default:
 2796                         return (EINVAL);
 2797                 }
 2798 
 2799                 switch (voi_dtype) {
 2800                 case VSD_DTYPE_INT_S32:
 2801                         if (voival->int32.s32 >= bkt_lb->int32.s32) {
 2802                                 if ((eq_only && voival->int32.s32 ==
 2803                                     bkt_lb->int32.s32) ||
 2804                                     (!eq_only && (!has_ub ||
 2805                                     voival->int32.s32 < bkt_ub->int32.s32)))
 2806                                         found = 1;
 2807                         }
 2808                         break;
 2809                 case VSD_DTYPE_INT_U32:
 2810                         if (voival->int32.u32 >= bkt_lb->int32.u32) {
 2811                                 if ((eq_only && voival->int32.u32 ==
 2812                                     bkt_lb->int32.u32) ||
 2813                                     (!eq_only && (!has_ub ||
 2814                                     voival->int32.u32 < bkt_ub->int32.u32)))
 2815                                         found = 1;
 2816                         }
 2817                         break;
 2818                 case VSD_DTYPE_INT_S64:
 2819                         if (voival->int64.s64 >= bkt_lb->int64.s64)
 2820                                 if ((eq_only && voival->int64.s64 ==
 2821                                     bkt_lb->int64.s64) ||
 2822                                     (!eq_only && (!has_ub ||
 2823                                     voival->int64.s64 < bkt_ub->int64.s64)))
 2824                                         found = 1;
 2825                         break;
 2826                 case VSD_DTYPE_INT_U64:
 2827                         if (voival->int64.u64 >= bkt_lb->int64.u64)
 2828                                 if ((eq_only && voival->int64.u64 ==
 2829                                     bkt_lb->int64.u64) ||
 2830                                     (!eq_only && (!has_ub ||
 2831                                     voival->int64.u64 < bkt_ub->int64.u64)))
 2832                                         found = 1;
 2833                         break;
 2834                 case VSD_DTYPE_INT_SLONG:
 2835                         if (voival->intlong.slong >= bkt_lb->intlong.slong)
 2836                                 if ((eq_only && voival->intlong.slong ==
 2837                                     bkt_lb->intlong.slong) ||
 2838                                     (!eq_only && (!has_ub ||
 2839                                     voival->intlong.slong <
 2840                                     bkt_ub->intlong.slong)))
 2841                                         found = 1;
 2842                         break;
 2843                 case VSD_DTYPE_INT_ULONG:
 2844                         if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
 2845                                 if ((eq_only && voival->intlong.ulong ==
 2846                                     bkt_lb->intlong.ulong) ||
 2847                                     (!eq_only && (!has_ub ||
 2848                                     voival->intlong.ulong <
 2849                                     bkt_ub->intlong.ulong)))
 2850                                         found = 1;
 2851                         break;
 2852                 case VSD_DTYPE_Q_S32:
 2853                         if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
 2854                                 if ((eq_only && Q_QEQ(voival->q32.sq32,
 2855                                     bkt_lb->q32.sq32)) ||
 2856                                     (!eq_only && (!has_ub ||
 2857                                     Q_QLTQ(voival->q32.sq32,
 2858                                     bkt_ub->q32.sq32))))
 2859                                         found = 1;
 2860                         break;
 2861                 case VSD_DTYPE_Q_U32:
 2862                         if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
 2863                                 if ((eq_only && Q_QEQ(voival->q32.uq32,
 2864                                     bkt_lb->q32.uq32)) ||
 2865                                     (!eq_only && (!has_ub ||
 2866                                     Q_QLTQ(voival->q32.uq32,
 2867                                     bkt_ub->q32.uq32))))
 2868                                         found = 1;
 2869                         break;
 2870                 case VSD_DTYPE_Q_S64:
 2871                         if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
 2872                                 if ((eq_only && Q_QEQ(voival->q64.sq64,
 2873                                     bkt_lb->q64.sq64)) ||
 2874                                     (!eq_only && (!has_ub ||
 2875                                     Q_QLTQ(voival->q64.sq64,
 2876                                     bkt_ub->q64.sq64))))
 2877                                         found = 1;
 2878                         break;
 2879                 case VSD_DTYPE_Q_U64:
 2880                         if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
 2881                                 if ((eq_only && Q_QEQ(voival->q64.uq64,
 2882                                     bkt_lb->q64.uq64)) ||
 2883                                     (!eq_only && (!has_ub ||
 2884                                     Q_QLTQ(voival->q64.uq64,
 2885                                     bkt_ub->q64.uq64))))
 2886                                         found = 1;
 2887                         break;
 2888                 default:
 2889                         break;
 2890                 }
 2891         }
 2892 
 2893         if (found) {
 2894                 if (is32bit)
 2895                         *cnt32 += 1;
 2896                 else
 2897                         *cnt64 += 1;
 2898         } else {
 2899                 if (is32bit)
 2900                         *oob32 += 1;
 2901                 else
 2902                         *oob64 += 1;
 2903         }
 2904 
 2905         vs->flags |= VS_VSDVALID;
 2906         return (error);
 2907 }
 2908 
 2909 static inline int
 2910 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
 2911     struct voistatdata_tdgst *tdgst, int attempt)
 2912 {
 2913         struct ctdth32 *ctd32tree;
 2914         struct ctdth64 *ctd64tree;
 2915         struct voistatdata_tdgstctd32 *ctd32;
 2916         struct voistatdata_tdgstctd64 *ctd64;
 2917         uint64_t ebits, idxmask;
 2918         uint32_t bitsperidx, nebits;
 2919         int error, idx, is32bit, maxctds, remctds, tmperr;
 2920 
 2921         error = 0;
 2922 
 2923         switch (vs_dtype) {
 2924         case VSD_DTYPE_TDGSTCLUST32:
 2925                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
 2926                 if (!ARB_FULL(ctd32tree))
 2927                         return (0);
 2928                 VSD(tdgstclust32, tdgst)->compcnt++;
 2929                 maxctds = remctds = ARB_MAXNODES(ctd32tree);
 2930                 ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
 2931                 VSD(tdgstclust32, tdgst)->smplcnt = 0;
 2932                 is32bit = 1;
 2933                 ctd64tree = NULL;
 2934                 ctd64 = NULL;
 2935 #ifdef DIAGNOSTIC
 2936                 RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
 2937 #endif
 2938                 break;
 2939         case VSD_DTYPE_TDGSTCLUST64:
 2940                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
 2941                 if (!ARB_FULL(ctd64tree))
 2942                         return (0);
 2943                 VSD(tdgstclust64, tdgst)->compcnt++;
 2944                 maxctds = remctds = ARB_MAXNODES(ctd64tree);
 2945                 ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
 2946                 VSD(tdgstclust64, tdgst)->smplcnt = 0;
 2947                 is32bit = 0;
 2948                 ctd32tree = NULL;
 2949                 ctd32 = NULL;
 2950 #ifdef DIAGNOSTIC
 2951                 RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
 2952 #endif
 2953                 break;
 2954         default:
 2955                 return (EINVAL);
 2956         }
 2957 
 2958         /*
 2959          * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
 2960          * re-inserting the mu/cnt of each as a value and corresponding weight.
 2961          */
 2962 
 2963         /*
 2964          * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
 2965          * RAND_MAX happens to be approximately 31 bits (range [0,
 2966          * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
 2967          * the code is compiled in userspace, it gets the random(3) behavior,
 2968          * which has expected range [0, 0x7fffffff].
 2969          */
 2970 #define bitsperrand 31
 2971         ebits = 0;
 2972         nebits = 0;
 2973         bitsperidx = fls(maxctds);
 2974         KASSERT(bitsperidx <= sizeof(ebits) << 3,
 2975             ("%s: bitsperidx=%d, ebits=%d",
 2976             __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
 2977         idxmask = (UINT64_C(1) << bitsperidx) - 1;
 2978 
 2979         /* Initialise the free list with randomised centroid indices. */
 2980         for (; remctds > 0; remctds--) {
 2981                 while (nebits < bitsperidx) {
 2982                         ebits |= ((uint64_t)random()) << nebits;
 2983                         nebits += bitsperrand;
 2984                         if (nebits > (sizeof(ebits) << 3))
 2985                                 nebits = sizeof(ebits) << 3;
 2986                 }
 2987                 idx = ebits & idxmask;
 2988                 nebits -= bitsperidx;
 2989                 ebits >>= bitsperidx;
 2990 
 2991                 /*
 2992                  * Select the next centroid to put on the ARB free list. We
 2993                  * start with the centroid at our randomly selected array index,
 2994                  * and work our way forwards until finding one (the latter
 2995                  * aspect reduces re-insertion randomness, but is good enough).
 2996                  */
 2997                 do {
 2998                         if (idx >= maxctds)
 2999                                 idx %= maxctds;
 3000 
 3001                         if (is32bit)
 3002                                 ctd32 = ARB_NODE(ctd32tree, idx);
 3003                         else
 3004                                 ctd64 = ARB_NODE(ctd64tree, idx);
 3005                 } while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
 3006                     ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
 3007 
 3008                 /* Put the centroid on the ARB free list. */
 3009                 if (is32bit)
 3010                         ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
 3011                 else
 3012                         ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
 3013         }
 3014 
 3015         /*
 3016          * The free list now contains the randomised indices of every centroid.
 3017          * Walk the free list from start to end, re-inserting each centroid's
 3018          * mu/cnt. The tdgst_add() call may or may not consume the free centroid
 3019          * we re-insert values from during each loop iteration, so we must latch
 3020          * the index of the next free list centroid before the re-insertion
 3021          * call. The previous loop above should have left the centroid pointer
 3022          * pointing to the element at the head of the free list.
 3023          */
 3024         KASSERT((is32bit ?
 3025             ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
 3026             ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
 3027             ("%s: t-digest ARB@%p free list bug", __func__,
 3028             (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
 3029         remctds = maxctds;
 3030         while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
 3031                 tmperr = 0;
 3032                 if (is32bit) {
 3033                         s64q_t x;
 3034 
 3035                         idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
 3036                         /* Cloning a s32q_t into a s64q_t should never fail. */
 3037                         tmperr = Q_QCLONEQ(&x, ctd32->mu);
 3038                         tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
 3039                             vs_dtype, tdgst, x, ctd32->cnt, attempt);
 3040                         ctd32 = ARB_NODE(ctd32tree, idx);
 3041                         KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
 3042                             ("%s: t-digest ARB@%p free list bug", __func__,
 3043                             ctd32tree));
 3044                 } else {
 3045                         idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
 3046                         tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
 3047                             ctd64->mu, ctd64->cnt, attempt);
 3048                         ctd64 = ARB_NODE(ctd64tree, idx);
 3049                         KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
 3050                             ("%s: t-digest ARB@%p free list bug", __func__,
 3051                             ctd64tree));
 3052                 }
 3053                 /*
 3054                  * This process should not produce errors, bugs notwithstanding.
 3055                  * Just in case, latch any errors and attempt all re-insertions.
 3056                  */
 3057                 error = tmperr ? tmperr : error;
 3058                 remctds--;
 3059         }
 3060 
 3061         KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
 3062             (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
 3063 
 3064         return (error);
 3065 }
 3066 
 3067 static inline int
 3068 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
 3069     s64q_t x, uint64_t weight, int attempt)
 3070 {
 3071 #ifdef DIAGNOSTIC
 3072         char qstr[Q_MAXSTRLEN(x, 10)];
 3073 #endif
 3074         struct ctdth32 *ctd32tree;
 3075         struct ctdth64 *ctd64tree;
 3076         void *closest, *cur, *lb, *ub;
 3077         struct voistatdata_tdgstctd32 *ctd32;
 3078         struct voistatdata_tdgstctd64 *ctd64;
 3079         uint64_t cnt, smplcnt, sum, tmpsum;
 3080         s64q_t k, minz, q, z;
 3081         int error, is32bit, n;
 3082 
 3083         error = 0;
 3084         minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
 3085 
 3086         switch (vs_dtype) {
 3087         case VSD_DTYPE_TDGSTCLUST32:
 3088                 if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
 3089                         error = EOVERFLOW;
 3090                 smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
 3091                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
 3092                 is32bit = 1;
 3093                 ctd64tree = NULL;
 3094                 ctd64 = NULL;
 3095                 break;
 3096         case VSD_DTYPE_TDGSTCLUST64:
 3097                 if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
 3098                         error = EOVERFLOW;
 3099                 smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
 3100                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
 3101                 is32bit = 0;
 3102                 ctd32tree = NULL;
 3103                 ctd32 = NULL;
 3104                 break;
 3105         default:
 3106                 error = EINVAL;
 3107                 break;
 3108         }
 3109 
 3110         if (error)
 3111                 return (error);
 3112 
 3113         /*
 3114          * Inspired by Ted Dunning's AVLTreeDigest.java
 3115          */
 3116         do {
 3117 #if defined(DIAGNOSTIC)
 3118                 KASSERT(attempt < 5,
 3119                     ("%s: Too many attempts", __func__));
 3120 #endif
 3121                 if (attempt >= 5)
 3122                         return (EAGAIN);
 3123 
 3124                 Q_SIFVAL(minz, Q_IFMAXVAL(minz));
 3125                 closest = ub = NULL;
 3126                 sum = tmpsum = 0;
 3127 
 3128                 if (is32bit)
 3129                         lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
 3130                 else
 3131                         lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
 3132 
 3133                 if (lb == NULL) /* Empty tree. */
 3134                         lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
 3135                             (void *)ARB_ROOT(ctd64tree));
 3136 
 3137                 /*
 3138                  * Find the set of centroids with minimum distance to x and
 3139                  * compute the sum of counts for all centroids with mean less
 3140                  * than the first centroid in the set.
 3141                  */
 3142                 for (; cur != NULL;
 3143                     cur = (is32bit ?
 3144                     (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
 3145                     (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
 3146                         if (is32bit) {
 3147                                 cnt = ctd32->cnt;
 3148                                 KASSERT(Q_PRECEQ(ctd32->mu, x),
 3149                                     ("%s: Q_RELPREC(mu,x)=%d", __func__,
 3150                                     Q_RELPREC(ctd32->mu, x)));
 3151                                 /* Ok to assign as both have same precision. */
 3152                                 z = ctd32->mu;
 3153                         } else {
 3154                                 cnt = ctd64->cnt;
 3155                                 KASSERT(Q_PRECEQ(ctd64->mu, x),
 3156                                     ("%s: Q_RELPREC(mu,x)=%d", __func__,
 3157                                     Q_RELPREC(ctd64->mu, x)));
 3158                                 /* Ok to assign as both have same precision. */
 3159                                 z = ctd64->mu;
 3160                         }
 3161 
 3162                         error = Q_QSUBQ(&z, x);
 3163 #if defined(DIAGNOSTIC)
 3164                         KASSERT(!error, ("%s: unexpected error %d", __func__,
 3165                             error));
 3166 #endif
 3167                         if (error)
 3168                                 return (error);
 3169 
 3170                         z = Q_QABS(z);
 3171                         if (Q_QLTQ(z, minz)) {
 3172                                 minz = z;
 3173                                 lb = cur;
 3174                                 sum = tmpsum;
 3175                                 tmpsum += cnt;
 3176                         } else if (Q_QGTQ(z, minz)) {
 3177                                 ub = cur;
 3178                                 break;
 3179                         }
 3180                 }
 3181 
 3182                 cur = (is32bit ?
 3183                     (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
 3184                     (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
 3185 
 3186                 for (n = 0; cur != ub; cur = (is32bit ?
 3187                     (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
 3188                     (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
 3189                         if (is32bit)
 3190                                 cnt = ctd32->cnt;
 3191                         else
 3192                                 cnt = ctd64->cnt;
 3193 
 3194                         q = Q_CTRLINI(16);
 3195                         if (smplcnt == 1)
 3196                                 error = Q_QFRACI(&q, 1, 2);
 3197                         else
 3198                                 /* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
 3199                                 error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
 3200                                     (smplcnt - 1) << 1);
 3201                         k = q;
 3202                         /* k = q x 4 x samplcnt x attempt */
 3203                         error |= Q_QMULI(&k, 4 * smplcnt * attempt);
 3204                         /* k = k x (1 - q) */
 3205                         error |= Q_QSUBI(&q, 1);
 3206                         q = Q_QABS(q);
 3207                         error |= Q_QMULQ(&k, q);
 3208 #if defined(DIAGNOSTIC)
 3209 #if !defined(_KERNEL)
 3210                         double q_dbl, k_dbl, q2d, k2d;
 3211                         q2d = Q_Q2D(q);
 3212                         k2d = Q_Q2D(k);
 3213                         q_dbl = smplcnt == 1 ? 0.5 :
 3214                             (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
 3215                         k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
 3216                         /*
 3217                          * If the difference between q and q_dbl is greater than
 3218                          * the fractional precision of q, something is off.
 3219                          * NB: q is holding the value of 1 - q
 3220                          */
 3221                         q_dbl = 1.0 - q_dbl;
 3222                         KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
 3223                             (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
 3224                             ("Q-type q bad precision"));
 3225                         KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
 3226                             1.0 + (0.01 * smplcnt),
 3227                             ("Q-type k bad precision"));
 3228 #endif /* !_KERNEL */
 3229                         KASSERT(!error, ("%s: unexpected error %d", __func__,
 3230                             error));
 3231 #endif /* DIAGNOSTIC */
 3232                         if (error)
 3233                                 return (error);
 3234                         if ((is32bit && ((ctd32->cnt + weight) <=
 3235                             (uint64_t)Q_GIVAL(k))) ||
 3236                             (!is32bit && ((ctd64->cnt + weight) <=
 3237                             (uint64_t)Q_GIVAL(k)))) {
 3238                                 n++;
 3239                                 /* random() produces 31 bits. */
 3240                                 if (random() < (INT32_MAX / n))
 3241                                         closest = cur;
 3242                         }
 3243                         sum += cnt;
 3244                 }
 3245         } while (closest == NULL &&
 3246             (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
 3247             (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
 3248             attempt++)) == 0);
 3249 
 3250         if (error)
 3251                 return (error);
 3252 
 3253         if (closest != NULL) {
 3254                 /* Merge with an existing centroid. */
 3255                 if (is32bit) {
 3256                         ctd32 = (struct voistatdata_tdgstctd32 *)closest;
 3257                         error = Q_QSUBQ(&x, ctd32->mu);
 3258                         /*
 3259                          * The following calculation "x / (cnt + weight)"
 3260                          * computes the amount by which to adjust the centroid's
 3261                          * mu value in order to merge in the VOI sample.
 3262                          *
 3263                          * It can underflow (Q_QDIVI() returns ERANGE) when the
 3264                          * user centroids' fractional precision (which is
 3265                          * inherited by 'x') is too low to represent the result.
 3266                          *
 3267                          * A sophisticated approach to dealing with this issue
 3268                          * would minimise accumulation of error by tracking
 3269                          * underflow per centroid and making an adjustment when
 3270                          * a LSB's worth of underflow has accumulated.
 3271                          *
 3272                          * A simpler approach is to let the result underflow
 3273                          * i.e. merge the VOI sample into the centroid without
 3274                          * adjusting the centroid's mu, and rely on the user to
 3275                          * specify their t-digest with sufficient centroid
 3276                          * fractional precision such that the accumulation of
 3277                          * error from multiple underflows is of no material
 3278                          * consequence to the centroid's final value of mu.
 3279                          *
 3280                          * For the moment, the latter approach is employed by
 3281                          * simply ignoring ERANGE here.
 3282                          *
 3283                          * XXXLAS: Per-centroid underflow tracking is likely too
 3284                          * onerous, but it probably makes sense to accumulate a
 3285                          * single underflow error variable across all centroids
 3286                          * and report it as part of the digest to provide
 3287                          * additional visibility into the digest's fidelity.
 3288                          */
 3289                         error = error ? error :
 3290                             Q_QDIVI(&x, ctd32->cnt + weight);
 3291                         if ((error && error != ERANGE)
 3292                             || (error = Q_QADDQ(&ctd32->mu, x))) {
 3293 #ifdef DIAGNOSTIC
 3294                                 KASSERT(!error, ("%s: unexpected error %d",
 3295                                     __func__, error));
 3296 #endif
 3297                                 return (error);
 3298                         }
 3299                         ctd32->cnt += weight;
 3300                         error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
 3301                             NULL ? 0 : EALREADY;
 3302 #ifdef DIAGNOSTIC
 3303                         RB_REINSERT(rbctdth32,
 3304                             &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
 3305 #endif
 3306                 } else {
 3307                         ctd64 = (struct voistatdata_tdgstctd64 *)closest;
 3308                         error = Q_QSUBQ(&x, ctd64->mu);
 3309                         error = error ? error :
 3310                             Q_QDIVI(&x, ctd64->cnt + weight);
 3311                         /* Refer to is32bit ERANGE discussion above. */
 3312                         if ((error && error != ERANGE)
 3313                             || (error = Q_QADDQ(&ctd64->mu, x))) {
 3314                                 KASSERT(!error, ("%s: unexpected error %d",
 3315                                     __func__, error));
 3316                                 return (error);
 3317                         }
 3318                         ctd64->cnt += weight;
 3319                         error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
 3320                             NULL ? 0 : EALREADY;
 3321 #ifdef DIAGNOSTIC
 3322                         RB_REINSERT(rbctdth64,
 3323                             &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
 3324 #endif
 3325                 }
 3326         } else {
 3327                 /*
 3328                  * Add a new centroid. If digest compression is working
 3329                  * correctly, there should always be at least one free.
 3330                  */
 3331                 if (is32bit) {
 3332                         ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
 3333 #ifdef DIAGNOSTIC
 3334                         KASSERT(ctd32 != NULL,
 3335                             ("%s: t-digest@%p has no free centroids",
 3336                             __func__, tdgst));
 3337 #endif
 3338                         if (ctd32 == NULL)
 3339                                 return (EAGAIN);
 3340                         if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
 3341                                 return (error);
 3342                         ctd32->cnt = weight;
 3343                         error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
 3344                             0 : EALREADY;
 3345 #ifdef DIAGNOSTIC
 3346                         RB_INSERT(rbctdth32,
 3347                             &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
 3348 #endif
 3349                 } else {
 3350                         ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
 3351 #ifdef DIAGNOSTIC
 3352                         KASSERT(ctd64 != NULL,
 3353                             ("%s: t-digest@%p has no free centroids",
 3354                             __func__, tdgst));
 3355 #endif
 3356                         if (ctd64 == NULL) /* Should not happen. */
 3357                                 return (EAGAIN);
 3358                         /* Direct assignment ok as both have same type/prec. */
 3359                         ctd64->mu = x;
 3360                         ctd64->cnt = weight;
 3361                         error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
 3362                             0 : EALREADY;
 3363 #ifdef DIAGNOSTIC
 3364                         RB_INSERT(rbctdth64, &VSD(tdgstclust64,
 3365                             tdgst)->rbctdtree, ctd64);
 3366 #endif
 3367                 }
 3368         }
 3369 
 3370         if (is32bit)
 3371                 VSD(tdgstclust32, tdgst)->smplcnt += weight;
 3372         else {
 3373                 VSD(tdgstclust64, tdgst)->smplcnt += weight;
 3374 
 3375 #ifdef DIAGNOSTIC
 3376                 struct rbctdth64 *rbctdtree =
 3377                     &VSD(tdgstclust64, tdgst)->rbctdtree;
 3378                 struct voistatdata_tdgstctd64 *rbctd64;
 3379                 int i = 0;
 3380                 ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
 3381                         rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
 3382                             RB_NEXT(rbctdth64, rbctdtree, rbctd64));
 3383 
 3384                         if (i >= ARB_CURNODES(ctd64tree)
 3385                             || ctd64 != rbctd64
 3386                             || ARB_MIN(ctdth64, ctd64tree) !=
 3387                                RB_MIN(rbctdth64, rbctdtree)
 3388                             || ARB_MAX(ctdth64, ctd64tree) !=
 3389                                RB_MAX(rbctdth64, rbctdtree)
 3390                             || ARB_LEFTIDX(ctd64, ctdlnk) !=
 3391                                ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
 3392                             || ARB_RIGHTIDX(ctd64, ctdlnk) !=
 3393                                ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
 3394                             || ARB_PARENTIDX(ctd64, ctdlnk) !=
 3395                                ARB_SELFIDX(ctd64tree,
 3396                                RB_PARENT(rbctd64, rblnk))) {
 3397                                 Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
 3398                                 printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
 3399                                     "mu=%s\n",
 3400                                     (int)ARB_SELFIDX(ctd64tree, ctd64),
 3401                                     ARB_PARENTIDX(ctd64, ctdlnk),
 3402                                     ARB_LEFTIDX(ctd64, ctdlnk),
 3403                                     ARB_RIGHTIDX(ctd64, ctdlnk),
 3404                                     ARB_COLOR(ctd64, ctdlnk),
 3405                                     qstr);
 3406 
 3407                                 Q_TOSTR(rbctd64->mu, -1, 10, qstr,
 3408                                     sizeof(qstr));
 3409                                 struct voistatdata_tdgstctd64 *parent;
 3410                                 parent = RB_PARENT(rbctd64, rblnk);
 3411                                 int rb_color =
 3412                                         parent == NULL ? 0 :
 3413                                         RB_LEFT(parent, rblnk) == rbctd64 ?
 3414                                         (_RB_BITSUP(parent, rblnk) & _RB_L) != 0 :
 3415                                         (_RB_BITSUP(parent, rblnk) & _RB_R) != 0;
 3416                                 printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
 3417                                     "mu=%s\n",
 3418                                     (int)ARB_SELFIDX(ctd64tree, rbctd64),
 3419                                     (int)ARB_SELFIDX(ctd64tree,
 3420                                       RB_PARENT(rbctd64, rblnk)),
 3421                                     (int)ARB_SELFIDX(ctd64tree,
 3422                                       RB_LEFT(rbctd64, rblnk)),
 3423                                     (int)ARB_SELFIDX(ctd64tree,
 3424                                       RB_RIGHT(rbctd64, rblnk)),
 3425                                     rb_color,
 3426                                     qstr);
 3427 
 3428                                 panic("RB@%p and ARB@%p trees differ\n",
 3429                                     rbctdtree, ctd64tree);
 3430                         }
 3431                         i++;
 3432                 }
 3433 #endif /* DIAGNOSTIC */
 3434         }
 3435 
 3436         return (error);
 3437 }
 3438 
 3439 static inline int
 3440 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
 3441     struct voistat *vs, struct voistatdata_tdgst *tdgst)
 3442 {
 3443         s64q_t x;
 3444         int error;
 3445 
 3446         error = 0;
 3447 
 3448         switch (vs->dtype) {
 3449         case VSD_DTYPE_TDGSTCLUST32:
 3450                 /* Use same precision as the user's centroids. */
 3451                 Q_INI(&x, 0, 0, Q_NFBITS(
 3452                     ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
 3453                 break;
 3454         case VSD_DTYPE_TDGSTCLUST64:
 3455                 /* Use same precision as the user's centroids. */
 3456                 Q_INI(&x, 0, 0, Q_NFBITS(
 3457                     ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
 3458                 break;
 3459         default:
 3460                 KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
 3461                     vs->dtype == VSD_DTYPE_TDGSTCLUST64,
 3462                     ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
 3463                     __func__, vs->dtype));
 3464                 return (EINVAL);
 3465         }
 3466 
 3467         /*
 3468          * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
 3469          * returning EOVERFLOW if the voival would have fit in a u64q_t.
 3470          */
 3471         switch (voi_dtype) {
 3472         case VSD_DTYPE_INT_S32:
 3473                 error = Q_QCPYVALI(&x, voival->int32.s32);
 3474                 break;
 3475         case VSD_DTYPE_INT_U32:
 3476                 error = Q_QCPYVALI(&x, voival->int32.u32);
 3477                 break;
 3478         case VSD_DTYPE_INT_S64:
 3479                 error = Q_QCPYVALI(&x, voival->int64.s64);
 3480                 break;
 3481         case VSD_DTYPE_INT_U64:
 3482                 error = Q_QCPYVALI(&x, voival->int64.u64);
 3483                 break;
 3484         case VSD_DTYPE_INT_SLONG:
 3485                 error = Q_QCPYVALI(&x, voival->intlong.slong);
 3486                 break;
 3487         case VSD_DTYPE_INT_ULONG:
 3488                 error = Q_QCPYVALI(&x, voival->intlong.ulong);
 3489                 break;
 3490         case VSD_DTYPE_Q_S32:
 3491                 error = Q_QCPYVALQ(&x, voival->q32.sq32);
 3492                 break;
 3493         case VSD_DTYPE_Q_U32:
 3494                 error = Q_QCPYVALQ(&x, voival->q32.uq32);
 3495                 break;
 3496         case VSD_DTYPE_Q_S64:
 3497                 error = Q_QCPYVALQ(&x, voival->q64.sq64);
 3498                 break;
 3499         case VSD_DTYPE_Q_U64:
 3500                 error = Q_QCPYVALQ(&x, voival->q64.uq64);
 3501                 break;
 3502         default:
 3503                 error = EINVAL;
 3504                 break;
 3505         }
 3506 
 3507         if (error ||
 3508             (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
 3509                 return (error);
 3510 
 3511         vs->flags |= VS_VSDVALID;
 3512         return (0);
 3513 }
 3514 
 3515 int
 3516 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
 3517     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
 3518 {
 3519         struct voi *v;
 3520         struct voistat *vs;
 3521         void *statevsd, *vsd;
 3522         int error, i, tmperr;
 3523 
 3524         error = 0;
 3525 
 3526         if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
 3527             voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
 3528                 return (EINVAL);
 3529         v = &sb->vois[voi_id];
 3530         if (voi_dtype != v->dtype || v->id < 0 ||
 3531             ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
 3532                 return (EINVAL);
 3533 
 3534         vs = BLOB_OFFSET(sb, v->stats_off);
 3535         if (v->flags & VOI_REQSTATE)
 3536                 statevsd = BLOB_OFFSET(sb, vs->data_off);
 3537         else
 3538                 statevsd = NULL;
 3539 
 3540         if (flags & SB_VOI_RELUPDATE) {
 3541                 switch (voi_dtype) {
 3542                 case VSD_DTYPE_INT_S32:
 3543                         voival->int32.s32 +=
 3544                             VSD(voistate, statevsd)->prev.int32.s32;
 3545                         break;
 3546                 case VSD_DTYPE_INT_U32:
 3547                         voival->int32.u32 +=
 3548                             VSD(voistate, statevsd)->prev.int32.u32;
 3549                         break;
 3550                 case VSD_DTYPE_INT_S64:
 3551                         voival->int64.s64 +=
 3552                             VSD(voistate, statevsd)->prev.int64.s64;
 3553                         break;
 3554                 case VSD_DTYPE_INT_U64:
 3555                         voival->int64.u64 +=
 3556                             VSD(voistate, statevsd)->prev.int64.u64;
 3557                         break;
 3558                 case VSD_DTYPE_INT_SLONG:
 3559                         voival->intlong.slong +=
 3560                             VSD(voistate, statevsd)->prev.intlong.slong;
 3561                         break;
 3562                 case VSD_DTYPE_INT_ULONG:
 3563                         voival->intlong.ulong +=
 3564                             VSD(voistate, statevsd)->prev.intlong.ulong;
 3565                         break;
 3566                 case VSD_DTYPE_Q_S32:
 3567                         error = Q_QADDQ(&voival->q32.sq32,
 3568                             VSD(voistate, statevsd)->prev.q32.sq32);
 3569                         break;
 3570                 case VSD_DTYPE_Q_U32:
 3571                         error = Q_QADDQ(&voival->q32.uq32,
 3572                             VSD(voistate, statevsd)->prev.q32.uq32);
 3573                         break;
 3574                 case VSD_DTYPE_Q_S64:
 3575                         error = Q_QADDQ(&voival->q64.sq64,
 3576                             VSD(voistate, statevsd)->prev.q64.sq64);
 3577                         break;
 3578                 case VSD_DTYPE_Q_U64:
 3579                         error = Q_QADDQ(&voival->q64.uq64,
 3580                             VSD(voistate, statevsd)->prev.q64.uq64);
 3581                         break;
 3582                 default:
 3583                         KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
 3584                         break;
 3585                 }
 3586         }
 3587 
 3588         if (error)
 3589                 return (error);
 3590 
 3591         for (i = v->voistatmaxid; i > 0; i--) {
 3592                 vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
 3593                 if (vs->stype < 0)
 3594                         continue;
 3595 
 3596                 vsd = BLOB_OFFSET(sb, vs->data_off);
 3597 
 3598                 switch (vs->stype) {
 3599                 case VS_STYPE_MAX:
 3600                         tmperr = stats_v1_voi_update_max(voi_dtype, voival,
 3601                             vs, vsd);
 3602                         break;
 3603                 case VS_STYPE_MIN:
 3604                         tmperr = stats_v1_voi_update_min(voi_dtype, voival,
 3605                             vs, vsd);
 3606                         break;
 3607                 case VS_STYPE_SUM:
 3608                         tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
 3609                             vs, vsd);
 3610                         break;
 3611                 case VS_STYPE_HIST:
 3612                         tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
 3613                             vs, vsd);
 3614                         break;
 3615                 case VS_STYPE_TDGST:
 3616                         tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
 3617                             vs, vsd);
 3618                         break;
 3619                 default:
 3620                         KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
 3621                         break;
 3622                 }
 3623 
 3624                 if (tmperr) {
 3625                         error = tmperr;
 3626                         VS_INCERRS(vs);
 3627                 }
 3628         }
 3629 
 3630         if (statevsd) {
 3631                 switch (voi_dtype) {
 3632                 case VSD_DTYPE_INT_S32:
 3633                         VSD(voistate, statevsd)->prev.int32.s32 =
 3634                             voival->int32.s32;
 3635                         break;
 3636                 case VSD_DTYPE_INT_U32:
 3637                         VSD(voistate, statevsd)->prev.int32.u32 =
 3638                             voival->int32.u32;
 3639                         break;
 3640                 case VSD_DTYPE_INT_S64:
 3641                         VSD(voistate, statevsd)->prev.int64.s64 =
 3642                             voival->int64.s64;
 3643                         break;
 3644                 case VSD_DTYPE_INT_U64:
 3645                         VSD(voistate, statevsd)->prev.int64.u64 =
 3646                             voival->int64.u64;
 3647                         break;
 3648                 case VSD_DTYPE_INT_SLONG:
 3649                         VSD(voistate, statevsd)->prev.intlong.slong =
 3650                             voival->intlong.slong;
 3651                         break;
 3652                 case VSD_DTYPE_INT_ULONG:
 3653                         VSD(voistate, statevsd)->prev.intlong.ulong =
 3654                             voival->intlong.ulong;
 3655                         break;
 3656                 case VSD_DTYPE_Q_S32:
 3657                         error = Q_QCPYVALQ(
 3658                             &VSD(voistate, statevsd)->prev.q32.sq32,
 3659                             voival->q32.sq32);
 3660                         break;
 3661                 case VSD_DTYPE_Q_U32:
 3662                         error = Q_QCPYVALQ(
 3663                             &VSD(voistate, statevsd)->prev.q32.uq32,
 3664                             voival->q32.uq32);
 3665                         break;
 3666                 case VSD_DTYPE_Q_S64:
 3667                         error = Q_QCPYVALQ(
 3668                             &VSD(voistate, statevsd)->prev.q64.sq64,
 3669                             voival->q64.sq64);
 3670                         break;
 3671                 case VSD_DTYPE_Q_U64:
 3672                         error = Q_QCPYVALQ(
 3673                             &VSD(voistate, statevsd)->prev.q64.uq64,
 3674                             voival->q64.uq64);
 3675                         break;
 3676                 default:
 3677                         KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
 3678                         break;
 3679                 }
 3680         }
 3681 
 3682         return (error);
 3683 }
 3684 
 3685 #ifdef _KERNEL
 3686 
 3687 static void
 3688 stats_init(void *arg)
 3689 {
 3690 
 3691 }
 3692 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
 3693 
 3694 /*
 3695  * Sysctl handler to display the list of available stats templates.
 3696  */
 3697 static int
 3698 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
 3699 {
 3700         struct sbuf *s;
 3701         int err, i;
 3702 
 3703         err = 0;
 3704 
 3705         /* We can tolerate ntpl being stale, so do not take the lock. */
 3706         s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
 3707             ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
 3708         if (s == NULL)
 3709                 return (ENOMEM);
 3710 
 3711         TPL_LIST_RLOCK();
 3712         for (i = 0; i < ntpl; i++) {
 3713                 err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
 3714                     tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
 3715                 if (err) {
 3716                         /* Sbuf overflow condition. */
 3717                         err = EOVERFLOW;
 3718                         break;
 3719                 }
 3720         }
 3721         TPL_LIST_RUNLOCK();
 3722 
 3723         if (!err) {
 3724                 sbuf_finish(s);
 3725                 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
 3726         }
 3727 
 3728         sbuf_delete(s);
 3729         return (err);
 3730 }
 3731 
 3732 /*
 3733  * Called by subsystem-specific sysctls to report and/or parse the list of
 3734  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
 3735  * conformant function pointer must be passed in as arg1, which is used to
 3736  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
 3737  * a zero-initialised allocation of arg2-sized contextual memory is
 3738  * heap-allocated and passed in to all subsystem callbacks made during the
 3739  * operation of stats_tpl_sample_rates().
 3740  *
 3741  * XXXLAS: Assumes templates are never removed, which is currently true but may
 3742  * need to be reworked in future if dynamic template management becomes a
 3743  * requirement e.g. to support kernel module based templates.
 3744  */
 3745 int
 3746 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
 3747 {
 3748         char kvpair_fmt[16], tplspec_fmt[16];
 3749         char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
 3750         char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
 3751         stats_tpl_sr_cb_t subsys_cb;
 3752         void *subsys_ctx;
 3753         char *buf, *new_rates_usr_str, *tpl_name_p;
 3754         struct stats_tpl_sample_rate *rates;
 3755         struct sbuf *s, _s;
 3756         uint32_t cum_pct, pct, tpl_hash;
 3757         int err, i, off, len, newlen, nrates;
 3758 
 3759         buf = NULL;
 3760         rates = NULL;
 3761         err = nrates = 0;
 3762         subsys_cb = (stats_tpl_sr_cb_t)arg1;
 3763         KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
 3764         if (arg2 > 0)
 3765                 subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
 3766         else
 3767                 subsys_ctx = NULL;
 3768 
 3769         /* Grab current count of subsystem rates. */
 3770         err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
 3771         if (err)
 3772                 goto done;
 3773 
 3774         /* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
 3775         len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
 3776 
 3777         if (req->oldptr != NULL || req->newptr != NULL)
 3778                 buf = malloc(len, M_TEMP, M_WAITOK);
 3779 
 3780         if (req->oldptr != NULL) {
 3781                 if (nrates == 0) {
 3782                         /* No rates, so return an empty string via oldptr. */
 3783                         err = SYSCTL_OUT(req, "", 1);
 3784                         if (err)
 3785                                 goto done;
 3786                         goto process_new;
 3787                 }
 3788 
 3789                 s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 3790 
 3791                 /* Grab locked count of, and ptr to, subsystem rates. */
 3792                 err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
 3793                     subsys_ctx);
 3794                 if (err)
 3795                         goto done;
 3796                 TPL_LIST_RLOCK();
 3797                 for (i = 0; i < nrates && !err; i++) {
 3798                         err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
 3799                             tpllist[rates[i].tpl_slot_id]->mb->tplname,
 3800                             tpllist[rates[i].tpl_slot_id]->mb->tplhash,
 3801                             rates[i].tpl_sample_pct);
 3802                 }
 3803                 TPL_LIST_RUNLOCK();
 3804                 /* Tell subsystem that we're done with its rates list. */
 3805                 err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
 3806                 if (err)
 3807                         goto done;
 3808 
 3809                 err = sbuf_finish(s);
 3810                 if (err)
 3811                         goto done; /* We lost a race for buf to be too small. */
 3812 
 3813                 /* Return the rendered string data via oldptr. */
 3814                 err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
 3815         } else {
 3816                 /* Return the upper bound size for buffer sizing requests. */
 3817                 err = SYSCTL_OUT(req, NULL, len);
 3818         }
 3819 
 3820 process_new:
 3821         if (err || req->newptr == NULL)
 3822                 goto done;
 3823 
 3824         newlen = req->newlen - req->newidx;
 3825         err = SYSCTL_IN(req, buf, newlen);
 3826         if (err)
 3827                 goto done;
 3828 
 3829         /*
 3830          * Initialise format strings at run time.
 3831          *
 3832          * Write the max template spec string length into the
 3833          * template_spec=percent key-value pair parsing format string as:
 3834          *     " %<width>[^=]=%u %n"
 3835          *
 3836          * Write the max template name string length into the tplname:tplhash
 3837          * parsing format string as:
 3838          *     "%<width>[^:]:%u"
 3839          *
 3840          * Subtract 1 for \0 appended by sscanf().
 3841          */
 3842         sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
 3843         sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
 3844 
 3845         /*
 3846          * Parse each CSV key-value pair specifying a template and its sample
 3847          * percentage. Whitespace either side of a key-value pair is ignored.
 3848          * Templates can be specified by name, hash, or name and hash per the
 3849          * following formats (chars in [] are optional):
 3850          *    ["]<tplname>["]=<percent>
 3851          *    :hash=pct
 3852          *    ["]<tplname>["]:hash=<percent>
 3853          */
 3854         cum_pct = nrates = 0;
 3855         rates = NULL;
 3856         buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
 3857         new_rates_usr_str = buf;
 3858         while (isspace(*new_rates_usr_str))
 3859                 new_rates_usr_str++; /* Skip leading whitespace. */
 3860         while (*new_rates_usr_str != '\0') {
 3861                 tpl_name_p = tpl_name;
 3862                 tpl_name[0] = '\0';
 3863                 tpl_hash = 0;
 3864                 off = 0;
 3865 
 3866                 /*
 3867                  * Parse key-value pair which must perform 2 conversions, then
 3868                  * parse the template spec to extract either name, hash, or name
 3869                  * and hash depending on the three possible spec formats. The
 3870                  * tplspec_fmt format specifier parses name or name and hash
 3871                  * template specs, while the ":%u" format specifier parses
 3872                  * hash-only template specs. If parsing is successfull, ensure
 3873                  * the cumulative sampling percentage does not exceed 100.
 3874                  */
 3875                 err = EINVAL;
 3876                 if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
 3877                     &off))
 3878                         break;
 3879                 if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
 3880                     (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
 3881                         break;
 3882                 if ((cum_pct += pct) > 100)
 3883                         break;
 3884                 err = 0;
 3885 
 3886                 /* Strip surrounding "" from template name if present. */
 3887                 len = strlen(tpl_name);
 3888                 if (len > 0) {
 3889                         if (tpl_name[len - 1] == '"')
 3890                                 tpl_name[--len] = '\0';
 3891                         if (tpl_name[0] == '"') {
 3892                                 tpl_name_p++;
 3893                                 len--;
 3894                         }
 3895                 }
 3896 
 3897                 rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
 3898                     (nrates + 1) * sizeof(*rates), M_WAITOK);
 3899                 rates[nrates].tpl_slot_id =
 3900                     stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
 3901                 if (rates[nrates].tpl_slot_id < 0) {
 3902                         err = -rates[nrates].tpl_slot_id;
 3903                         break;
 3904                 }
 3905                 rates[nrates].tpl_sample_pct = pct;
 3906                 nrates++;
 3907                 new_rates_usr_str += off;
 3908                 if (*new_rates_usr_str != ',')
 3909                         break; /* End-of-input or malformed. */
 3910                 new_rates_usr_str++; /* Move past comma to next pair. */
 3911         }
 3912 
 3913         if (!err) {
 3914                 if ((new_rates_usr_str - buf) < newlen) {
 3915                         /* Entire input has not been consumed. */
 3916                         err = EINVAL;
 3917                 } else {
 3918                         /*
 3919                          * Give subsystem the new rates. They'll return the
 3920                          * appropriate rates pointer for us to garbage collect.
 3921                          */
 3922                         err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
 3923                             subsys_ctx);
 3924                 }
 3925         }
 3926         stats_free(rates);
 3927 
 3928 done:
 3929         free(buf, M_TEMP);
 3930         free(subsys_ctx, M_TEMP);
 3931         return (err);
 3932 }
 3933 
 3934 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
 3935     "stats(9) MIB");
 3936 
 3937 SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
 3938     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 3939     stats_tpl_list_available, "A",
 3940     "list the name/hash of all available stats(9) templates");
 3941 
 3942 #else /* ! _KERNEL */
 3943 
 3944 static void __attribute__ ((constructor))
 3945 stats_constructor(void)
 3946 {
 3947 
 3948         pthread_rwlock_init(&tpllistlock, NULL);
 3949 }
 3950 
 3951 static void __attribute__ ((destructor))
 3952 stats_destructor(void)
 3953 {
 3954 
 3955         pthread_rwlock_destroy(&tpllistlock);
 3956 }
 3957 
 3958 #endif /* _KERNEL */

Cache object: 8a3cc7c2229e890fa57ebe5e81c259e8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.