The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/lockstat.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: lockstat.c,v 1.30 2022/04/08 10:17:54 andvar Exp $     */
    2 
    3 /*-
    4  * Copyright (c) 2006, 2007, 2019 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Andrew Doran.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Lock statistics driver, providing kernel support for the lockstat(8)
   34  * command.
   35  *
   36  * We use a global lock word (lockstat_lock) to track device opens.
   37  * Only one thread can hold the device at a time, providing a global lock.
   38  *
   39  * XXX Timings for contention on sleep locks are currently incorrect.
   40  * XXX Convert this to use timecounters!
   41  */
   42 
   43 #include <sys/cdefs.h>
   44 __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.30 2022/04/08 10:17:54 andvar Exp $");
   45 
   46 #include <sys/types.h>
   47 #include <sys/param.h>
   48 
   49 #include <sys/atomic.h>
   50 #include <sys/conf.h>
   51 #include <sys/cpu.h>
   52 #include <sys/kernel.h>
   53 #include <sys/kmem.h>
   54 #include <sys/lock.h>
   55 #include <sys/proc.h>
   56 #include <sys/resourcevar.h>
   57 #include <sys/syslog.h>
   58 #include <sys/systm.h>
   59 #include <sys/xcall.h>
   60 
   61 #include <dev/lockstat.h>
   62 
   63 #include "ioconf.h"
   64 
   65 #ifndef __HAVE_CPU_COUNTER
   66 #error CPU counters not available
   67 #endif
   68 
   69 #if LONG_BIT == 64
   70 #define LOCKSTAT_HASH_SHIFT     3
   71 #elif LONG_BIT == 32
   72 #define LOCKSTAT_HASH_SHIFT     2
   73 #endif
   74 
   75 #define LOCKSTAT_MINBUFS        1000
   76 #define LOCKSTAT_DEFBUFS        20000
   77 #define LOCKSTAT_MAXBUFS        1000000
   78 
   79 #define LOCKSTAT_HASH_SIZE      128
   80 #define LOCKSTAT_HASH_MASK      (LOCKSTAT_HASH_SIZE - 1)
   81 #define LOCKSTAT_HASH(key)      \
   82         ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
   83 
   84 typedef struct lscpu {
   85         SLIST_HEAD(, lsbuf)     lc_free;
   86         u_int                   lc_overflow;
   87         LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
   88 } lscpu_t;
   89 
   90 typedef struct lslist lslist_t;
   91 
   92 void    lockstat_start(lsenable_t *);
   93 int     lockstat_alloc(lsenable_t *);
   94 void    lockstat_init_tables(lsenable_t *);
   95 int     lockstat_stop(lsdisable_t *);
   96 void    lockstat_free(void);
   97 
   98 dev_type_open(lockstat_open);
   99 dev_type_close(lockstat_close);
  100 dev_type_read(lockstat_read);
  101 dev_type_ioctl(lockstat_ioctl);
  102 
  103 volatile u_int  lockstat_enabled;
  104 volatile u_int  lockstat_dev_enabled;
  105 __cpu_simple_lock_t lockstat_enabled_lock;
  106 uintptr_t       lockstat_csstart;
  107 uintptr_t       lockstat_csend;
  108 uintptr_t       lockstat_csmask;
  109 uintptr_t       lockstat_lamask;
  110 uintptr_t       lockstat_lockstart;
  111 uintptr_t       lockstat_lockend;
  112 __cpu_simple_lock_t lockstat_lock;
  113 lwp_t           *lockstat_lwp;
  114 lsbuf_t         *lockstat_baseb;
  115 size_t          lockstat_sizeb;
  116 int             lockstat_busy;
  117 struct timespec lockstat_stime;
  118 
  119 #ifdef KDTRACE_HOOKS
  120 volatile u_int lockstat_dtrace_enabled;
  121 CTASSERT(LB_NEVENT <= 3);
  122 CTASSERT(LB_NLOCK <= (7 << LB_LOCK_SHIFT));
  123 void
  124 lockstat_probe_stub(uint32_t id, uintptr_t lock, uintptr_t callsite,
  125     uintptr_t flags, uintptr_t count, uintptr_t cycles)
  126 {
  127 }
  128 
  129 uint32_t        lockstat_probemap[LS_NPROBES];
  130 void            (*lockstat_probe_func)(uint32_t, uintptr_t, uintptr_t,
  131                     uintptr_t, uintptr_t, uintptr_t) = &lockstat_probe_stub;
  132 #endif
  133 
  134 const struct cdevsw lockstat_cdevsw = {
  135         .d_open = lockstat_open,
  136         .d_close = lockstat_close,
  137         .d_read = lockstat_read,
  138         .d_write = nowrite,
  139         .d_ioctl = lockstat_ioctl,
  140         .d_stop = nostop,
  141         .d_tty = notty,
  142         .d_poll = nopoll,
  143         .d_mmap = nommap,
  144         .d_kqfilter = nokqfilter,
  145         .d_discard = nodiscard,
  146         .d_flag = D_OTHER | D_MPSAFE
  147 };
  148 
  149 /*
  150  * Called when the pseudo-driver is attached.
  151  */
  152 void
  153 lockstatattach(int nunits)
  154 {
  155 
  156         (void)nunits;
  157 
  158         __cpu_simple_lock_init(&lockstat_lock);
  159         __cpu_simple_lock_init(&lockstat_enabled_lock);
  160 }
  161 
  162 /*
  163  * Prepare the per-CPU tables for use, or clear down tables when tracing is
  164  * stopped.
  165  */
  166 void
  167 lockstat_init_tables(lsenable_t *le)
  168 {
  169         int i, per, slop, cpuno;
  170         CPU_INFO_ITERATOR cii;
  171         struct cpu_info *ci;
  172         lscpu_t *lc;
  173         lsbuf_t *lb;
  174 
  175         /* coverity[assert_side_effect] */
  176         KASSERT(!lockstat_dev_enabled);
  177 
  178         for (CPU_INFO_FOREACH(cii, ci)) {
  179                 if (ci->ci_lockstat != NULL) {
  180                         kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
  181                         ci->ci_lockstat = NULL;
  182                 }
  183         }
  184 
  185         if (le == NULL)
  186                 return;
  187 
  188         lb = lockstat_baseb;
  189         per = le->le_nbufs / ncpu;
  190         slop = le->le_nbufs - (per * ncpu);
  191         cpuno = 0;
  192         for (CPU_INFO_FOREACH(cii, ci)) {
  193                 lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
  194                 lc->lc_overflow = 0;
  195                 ci->ci_lockstat = lc;
  196 
  197                 SLIST_INIT(&lc->lc_free);
  198                 for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
  199                         LIST_INIT(&lc->lc_hash[i]);
  200 
  201                 for (i = per; i != 0; i--, lb++) {
  202                         lb->lb_cpu = (uint16_t)cpuno;
  203                         SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
  204                 }
  205                 if (--slop > 0) {
  206                         lb->lb_cpu = (uint16_t)cpuno;
  207                         SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
  208                         lb++;
  209                 }
  210                 cpuno++;
  211         }
  212 }
  213 
  214 /*
  215  * Start collecting lock statistics.
  216  */
  217 void
  218 lockstat_start(lsenable_t *le)
  219 {
  220 
  221         /* coverity[assert_side_effect] */
  222         KASSERT(!lockstat_dev_enabled);
  223 
  224         lockstat_init_tables(le);
  225 
  226         if ((le->le_flags & LE_CALLSITE) != 0)
  227                 lockstat_csmask = (uintptr_t)-1LL;
  228         else
  229                 lockstat_csmask = 0;
  230 
  231         if ((le->le_flags & LE_LOCK) != 0)
  232                 lockstat_lamask = (uintptr_t)-1LL;
  233         else
  234                 lockstat_lamask = 0;
  235 
  236         lockstat_csstart = le->le_csstart;
  237         lockstat_csend = le->le_csend;
  238         lockstat_lockstart = le->le_lockstart;
  239         lockstat_lockstart = le->le_lockstart;
  240         lockstat_lockend = le->le_lockend;
  241 
  242         /*
  243          * Ensure everything is initialized on all CPUs, by issuing a
  244          * null xcall with the side effect of a release barrier on this
  245          * CPU and an acquire barrier on all other CPUs, before they
  246          * can witness any flags set in lockstat_dev_enabled -- this
  247          * way we don't need to add any barriers in lockstat_event.
  248          */
  249         xc_barrier(0);
  250 
  251         /*
  252          * Start timing after the xcall, so we don't spuriously count
  253          * xcall communication time, but before flipping the switch, so
  254          * we don't dirty sample with locks taken in the timecounter.
  255          */
  256         getnanotime(&lockstat_stime);
  257 
  258         LOCKSTAT_ENABLED_UPDATE_BEGIN();
  259         atomic_store_relaxed(&lockstat_dev_enabled, le->le_mask);
  260         LOCKSTAT_ENABLED_UPDATE_END();
  261 }
  262 
  263 /*
  264  * Stop collecting lock statistics.
  265  */
  266 int
  267 lockstat_stop(lsdisable_t *ld)
  268 {
  269         CPU_INFO_ITERATOR cii;
  270         struct cpu_info *ci;
  271         u_int cpuno, overflow;
  272         struct timespec ts;
  273         int error;
  274         lwp_t *l;
  275 
  276         /* coverity[assert_side_effect] */
  277         KASSERT(lockstat_dev_enabled);
  278 
  279         /*
  280          * Disable and wait for other CPUs to exit lockstat_event().
  281          */
  282         LOCKSTAT_ENABLED_UPDATE_BEGIN();
  283         atomic_store_relaxed(&lockstat_dev_enabled, 0);
  284         LOCKSTAT_ENABLED_UPDATE_END();
  285         getnanotime(&ts);
  286         xc_barrier(0);
  287 
  288         /*
  289          * Did we run out of buffers while tracing?
  290          */
  291         overflow = 0;
  292         for (CPU_INFO_FOREACH(cii, ci))
  293                 overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
  294 
  295         if (overflow != 0) {
  296                 error = EOVERFLOW;
  297                 log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
  298                     overflow);
  299         } else
  300                 error = 0;
  301 
  302         lockstat_init_tables(NULL);
  303 
  304         /* Run through all LWPs and clear the slate for the next run. */
  305         mutex_enter(&proc_lock);
  306         LIST_FOREACH(l, &alllwp, l_list) {
  307                 l->l_pfailaddr = 0;
  308                 l->l_pfailtime = 0;
  309                 l->l_pfaillock = 0;
  310         }
  311         mutex_exit(&proc_lock);
  312 
  313         if (ld == NULL)
  314                 return error;
  315 
  316         /*
  317          * Fill out the disable struct for the caller.
  318          */
  319         timespecsub(&ts, &lockstat_stime, &ld->ld_time);
  320         ld->ld_size = lockstat_sizeb;
  321 
  322         cpuno = 0;
  323         for (CPU_INFO_FOREACH(cii, ci)) {
  324                 if (cpuno >= sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
  325                         log(LOG_WARNING, "lockstat: too many CPUs\n");
  326                         break;
  327                 }
  328                 ld->ld_freq[cpuno++] = cpu_frequency(ci);
  329         }
  330 
  331         return error;
  332 }
  333 
  334 /*
  335  * Allocate buffers for lockstat_start().
  336  */
  337 int
  338 lockstat_alloc(lsenable_t *le)
  339 {
  340         lsbuf_t *lb;
  341         size_t sz;
  342 
  343         /* coverity[assert_side_effect] */
  344         KASSERT(!lockstat_dev_enabled);
  345         lockstat_free();
  346 
  347         sz = sizeof(*lb) * le->le_nbufs;
  348 
  349         lb = kmem_zalloc(sz, KM_SLEEP);
  350 
  351         /* coverity[assert_side_effect] */
  352         KASSERT(!lockstat_dev_enabled);
  353         KASSERT(lockstat_baseb == NULL);
  354         lockstat_sizeb = sz;
  355         lockstat_baseb = lb;
  356 
  357         return (0);
  358 }
  359 
  360 /*
  361  * Free allocated buffers after tracing has stopped.
  362  */
  363 void
  364 lockstat_free(void)
  365 {
  366 
  367         /* coverity[assert_side_effect] */
  368         KASSERT(!lockstat_dev_enabled);
  369 
  370         if (lockstat_baseb != NULL) {
  371                 kmem_free(lockstat_baseb, lockstat_sizeb);
  372                 lockstat_baseb = NULL;
  373         }
  374 }
  375 
  376 /*
  377  * Main entry point from lock primitives.
  378  */
  379 void
  380 lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
  381                uint64_t cycles)
  382 {
  383         lslist_t *ll;
  384         lscpu_t *lc;
  385         lsbuf_t *lb;
  386         u_int event;
  387         int s;
  388 
  389 #ifdef KDTRACE_HOOKS
  390         uint32_t id;
  391         CTASSERT((LS_NPROBES & (LS_NPROBES - 1)) == 0);
  392         if ((id = atomic_load_relaxed(&lockstat_probemap[LS_COMPRESS(flags)]))
  393             != 0)
  394                 (*lockstat_probe_func)(id, lock, callsite, flags, count,
  395                     cycles);
  396 #endif
  397 
  398         if ((flags & atomic_load_relaxed(&lockstat_dev_enabled)) != flags ||
  399             count == 0)
  400                 return;
  401         if (lock < lockstat_lockstart || lock > lockstat_lockend)
  402                 return;
  403         if (callsite < lockstat_csstart || callsite > lockstat_csend)
  404                 return;
  405 
  406         callsite &= lockstat_csmask;
  407         lock &= lockstat_lamask;
  408 
  409         /*
  410          * Find the table for this lock+callsite pair, and try to locate a
  411          * buffer with the same key.
  412          */
  413         s = splhigh();
  414         lc = curcpu()->ci_lockstat;
  415         ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
  416         event = (flags & LB_EVENT_MASK) - 1;
  417 
  418         LIST_FOREACH(lb, ll, lb_chain.list) {
  419                 if (lb->lb_lock == lock && lb->lb_callsite == callsite)
  420                         break;
  421         }
  422 
  423         if (lb != NULL) {
  424                 /*
  425                  * We found a record.  Move it to the front of the list, as
  426                  * we're likely to hit it again soon.
  427                  */
  428                 if (lb != LIST_FIRST(ll)) {
  429                         LIST_REMOVE(lb, lb_chain.list);
  430                         LIST_INSERT_HEAD(ll, lb, lb_chain.list);
  431                 }
  432                 lb->lb_counts[event] += count;
  433                 lb->lb_times[event] += cycles;
  434         } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
  435                 /*
  436                  * Pinch a new buffer and fill it out.
  437                  */
  438                 SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
  439                 LIST_INSERT_HEAD(ll, lb, lb_chain.list);
  440                 lb->lb_flags = (uint16_t)flags;
  441                 lb->lb_lock = lock;
  442                 lb->lb_callsite = callsite;
  443                 lb->lb_counts[event] = count;
  444                 lb->lb_times[event] = cycles;
  445         } else {
  446                 /*
  447                  * We didn't find a buffer and there were none free.
  448                  * lockstat_stop() will notice later on and report the
  449                  * error.
  450                  */
  451                  lc->lc_overflow++;
  452         }
  453 
  454         splx(s);
  455 }
  456 
  457 /*
  458  * Accept an open() on /dev/lockstat.
  459  */
  460 int
  461 lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
  462 {
  463 
  464         if (!__cpu_simple_lock_try(&lockstat_lock))
  465                 return EBUSY;
  466         lockstat_lwp = curlwp;
  467         return 0;
  468 }
  469 
  470 /*
  471  * Accept the last close() on /dev/lockstat.
  472  */
  473 int
  474 lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
  475 {
  476 
  477         lockstat_lwp = NULL;
  478         if (lockstat_dev_enabled) {
  479                 lockstat_stop(NULL);
  480                 lockstat_free();
  481         }
  482         __cpu_simple_unlock(&lockstat_lock);
  483         return 0;
  484 }
  485 
  486 /*
  487  * Handle control operations.
  488  */
  489 int
  490 lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
  491 {
  492         lsenable_t *le;
  493         int error;
  494 
  495         if (lockstat_lwp != curlwp)
  496                 return EBUSY;
  497 
  498         switch (cmd) {
  499         case IOC_LOCKSTAT_GVERSION:
  500                 *(int *)data = LS_VERSION;
  501                 error = 0;
  502                 break;
  503 
  504         case IOC_LOCKSTAT_ENABLE:
  505                 le = (lsenable_t *)data;
  506 
  507                 if (!cpu_hascounter()) {
  508                         error = ENODEV;
  509                         break;
  510                 }
  511                 if (atomic_load_relaxed(&lockstat_dev_enabled)) {
  512                         error = EBUSY;
  513                         break;
  514                 }
  515 
  516                 /*
  517                  * Sanitize the arguments passed in and set up filtering.
  518                  */
  519                 if (le->le_nbufs == 0) {
  520                         le->le_nbufs = MIN(LOCKSTAT_DEFBUFS * ncpu,
  521                             LOCKSTAT_MAXBUFS);
  522                 } else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
  523                     le->le_nbufs < LOCKSTAT_MINBUFS) {
  524                         error = EINVAL;
  525                         break;
  526                 }
  527                 if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
  528                         le->le_csstart = 0;
  529                         le->le_csend = le->le_csstart - 1;
  530                 }
  531                 if ((le->le_flags & LE_ONE_LOCK) == 0) {
  532                         le->le_lockstart = 0;
  533                         le->le_lockend = le->le_lockstart - 1;
  534                 }
  535                 if ((le->le_mask & LB_EVENT_MASK) == 0)
  536                         return EINVAL;
  537                 if ((le->le_mask & LB_LOCK_MASK) == 0)
  538                         return EINVAL;
  539 
  540                 /*
  541                  * Start tracing.
  542                  */
  543                 if ((error = lockstat_alloc(le)) == 0)
  544                         lockstat_start(le);
  545                 break;
  546 
  547         case IOC_LOCKSTAT_DISABLE:
  548                 if (!atomic_load_relaxed(&lockstat_dev_enabled))
  549                         error = EINVAL;
  550                 else
  551                         error = lockstat_stop((lsdisable_t *)data);
  552                 break;
  553 
  554         default:
  555                 error = ENOTTY;
  556                 break;
  557         }
  558 
  559         return error;
  560 }
  561 
  562 /*
  563  * Copy buffers out to user-space.
  564  */
  565 int
  566 lockstat_read(dev_t dev, struct uio *uio, int flag)
  567 {
  568 
  569         if (curlwp != lockstat_lwp || lockstat_dev_enabled)
  570                 return EBUSY;
  571         return uiomove(lockstat_baseb, lockstat_sizeb, uio);
  572 }

Cache object: 66b13735ec0e592aa6b917ab26836306


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.