The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/lockstat.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: lockstat.c,v 1.15 2008/04/28 20:23:46 martin Exp $     */
    2 
    3 /*-
    4  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Andrew Doran.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Lock statistics driver, providing kernel support for the lockstat(8)
   34  * command.
   35  *
   36  * We use a global lock word (lockstat_lock) to track device opens.
   37  * Only one thread can hold the device at a time, providing a global lock.
   38  *
   39  * XXX Timings for contention on sleep locks are currently incorrect.
   40  */
   41 
   42 #include <sys/cdefs.h>
   43 __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.15 2008/04/28 20:23:46 martin Exp $");
   44 
   45 #include <sys/types.h>
   46 #include <sys/param.h>
   47 #include <sys/proc.h> 
   48 #include <sys/resourcevar.h>
   49 #include <sys/systm.h>
   50 #include <sys/kernel.h>
   51 #include <sys/kmem.h>
   52 #include <sys/conf.h>
   53 #include <sys/syslog.h>
   54 #include <sys/atomic.h>
   55 
   56 #include <dev/lockstat.h>
   57 
   58 #include <machine/lock.h>
   59 
   60 #ifndef __HAVE_CPU_COUNTER
   61 #error CPU counters not available
   62 #endif
   63 
   64 #if LONG_BIT == 64
   65 #define LOCKSTAT_HASH_SHIFT     3
   66 #elif LONG_BIT == 32
   67 #define LOCKSTAT_HASH_SHIFT     2
   68 #endif
   69 
   70 #define LOCKSTAT_MINBUFS        1000
   71 #define LOCKSTAT_DEFBUFS        10000
   72 #define LOCKSTAT_MAXBUFS        50000
   73 
   74 #define LOCKSTAT_HASH_SIZE      128
   75 #define LOCKSTAT_HASH_MASK      (LOCKSTAT_HASH_SIZE - 1)
   76 #define LOCKSTAT_HASH(key)      \
   77         ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)
   78 
   79 typedef struct lscpu {
   80         SLIST_HEAD(, lsbuf)     lc_free;
   81         u_int                   lc_overflow;
   82         LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
   83 } lscpu_t;
   84 
   85 typedef struct lslist lslist_t;
   86 
   87 void    lockstatattach(int);
   88 void    lockstat_start(lsenable_t *);
   89 int     lockstat_alloc(lsenable_t *);
   90 void    lockstat_init_tables(lsenable_t *);
   91 int     lockstat_stop(lsdisable_t *);
   92 void    lockstat_free(void);
   93 
   94 dev_type_open(lockstat_open);
   95 dev_type_close(lockstat_close);
   96 dev_type_read(lockstat_read);
   97 dev_type_ioctl(lockstat_ioctl);
   98 
   99 volatile u_int  lockstat_enabled;
  100 uintptr_t       lockstat_csstart;
  101 uintptr_t       lockstat_csend;
  102 uintptr_t       lockstat_csmask;
  103 uintptr_t       lockstat_lamask;
  104 uintptr_t       lockstat_lockstart;
  105 uintptr_t       lockstat_lockend;
  106 __cpu_simple_lock_t lockstat_lock;
  107 lwp_t           *lockstat_lwp;
  108 lsbuf_t         *lockstat_baseb;
  109 size_t          lockstat_sizeb;
  110 int             lockstat_busy;
  111 struct timespec lockstat_stime;
  112 
  113 const struct cdevsw lockstat_cdevsw = {
  114         lockstat_open, lockstat_close, lockstat_read, nowrite, lockstat_ioctl,
  115         nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE
  116 };
  117 
  118 /*
  119  * Called when the pseudo-driver is attached.
  120  */
  121 void
  122 lockstatattach(int nunits)
  123 {
  124 
  125         (void)nunits;
  126 
  127         __cpu_simple_lock_init(&lockstat_lock);
  128 }
  129 
  130 /*
  131  * Prepare the per-CPU tables for use, or clear down tables when tracing is
  132  * stopped.
  133  */
  134 void
  135 lockstat_init_tables(lsenable_t *le)
  136 {
  137         int i, per, slop, cpuno;
  138         CPU_INFO_ITERATOR cii;
  139         struct cpu_info *ci;
  140         lscpu_t *lc;
  141         lsbuf_t *lb;
  142 
  143         KASSERT(!lockstat_enabled);
  144 
  145         for (CPU_INFO_FOREACH(cii, ci)) {
  146                 if (ci->ci_lockstat != NULL) {
  147                         kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
  148                         ci->ci_lockstat = NULL;
  149                 }
  150         }
  151 
  152         if (le == NULL)
  153                 return;
  154 
  155         lb = lockstat_baseb;
  156         per = le->le_nbufs / ncpu;
  157         slop = le->le_nbufs - (per * ncpu);
  158         cpuno = 0;
  159         for (CPU_INFO_FOREACH(cii, ci)) {
  160                 lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
  161                 lc->lc_overflow = 0;
  162                 ci->ci_lockstat = lc;
  163 
  164                 SLIST_INIT(&lc->lc_free);
  165                 for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
  166                         LIST_INIT(&lc->lc_hash[i]);
  167 
  168                 for (i = per; i != 0; i--, lb++) {
  169                         lb->lb_cpu = (uint16_t)cpuno;
  170                         SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
  171                 }
  172                 if (--slop > 0) {
  173                         lb->lb_cpu = (uint16_t)cpuno;
  174                         SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
  175                         lb++;
  176                 }
  177                 cpuno++;
  178         }
  179 }
  180 
  181 /*
  182  * Start collecting lock statistics.
  183  */
  184 void
  185 lockstat_start(lsenable_t *le)
  186 {
  187 
  188         KASSERT(!lockstat_enabled);
  189 
  190         lockstat_init_tables(le);
  191 
  192         if ((le->le_flags & LE_CALLSITE) != 0)
  193                 lockstat_csmask = (uintptr_t)-1LL;
  194         else
  195                 lockstat_csmask = 0;
  196 
  197         if ((le->le_flags & LE_LOCK) != 0)
  198                 lockstat_lamask = (uintptr_t)-1LL;
  199         else
  200                 lockstat_lamask = 0;
  201 
  202         lockstat_csstart = le->le_csstart;
  203         lockstat_csend = le->le_csend;
  204         lockstat_lockstart = le->le_lockstart;
  205         lockstat_lockstart = le->le_lockstart;
  206         lockstat_lockend = le->le_lockend;
  207         membar_sync();
  208         getnanotime(&lockstat_stime);
  209         lockstat_enabled = le->le_mask;
  210         membar_producer();
  211 }
  212 
  213 /*
  214  * Stop collecting lock statistics.
  215  */
  216 int
  217 lockstat_stop(lsdisable_t *ld)
  218 {
  219         CPU_INFO_ITERATOR cii;
  220         struct cpu_info *ci;
  221         u_int cpuno, overflow;
  222         struct timespec ts;
  223         int error;
  224         lwp_t *l;
  225 
  226         KASSERT(lockstat_enabled);
  227 
  228         /*
  229          * Set enabled false, force a write barrier, and wait for other CPUs
  230          * to exit lockstat_event().
  231          */
  232         lockstat_enabled = 0;
  233         membar_producer();
  234         getnanotime(&ts);
  235         tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10));
  236 
  237         /*
  238          * Did we run out of buffers while tracing?
  239          */
  240         overflow = 0;
  241         for (CPU_INFO_FOREACH(cii, ci))
  242                 overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;
  243 
  244         if (overflow != 0) {
  245                 error = EOVERFLOW;
  246                 log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
  247                     overflow);
  248         } else
  249                 error = 0;
  250 
  251         lockstat_init_tables(NULL);
  252 
  253         /* Run through all LWPs and clear the slate for the next run. */
  254         mutex_enter(proc_lock);
  255         LIST_FOREACH(l, &alllwp, l_list) {
  256                 l->l_pfailaddr = 0;
  257                 l->l_pfailtime = 0;
  258                 l->l_pfaillock = 0;
  259         }
  260         mutex_exit(proc_lock);
  261 
  262         if (ld == NULL)
  263                 return error;
  264 
  265         /*
  266          * Fill out the disable struct for the caller.
  267          */
  268         timespecsub(&ts, &lockstat_stime, &ld->ld_time);
  269         ld->ld_size = lockstat_sizeb;
  270 
  271         cpuno = 0;
  272         for (CPU_INFO_FOREACH(cii, ci)) {
  273                 if (cpuno > sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
  274                         log(LOG_WARNING, "lockstat: too many CPUs\n");
  275                         break;
  276                 }
  277                 ld->ld_freq[cpuno++] = cpu_frequency(ci);
  278         }
  279 
  280         return error;
  281 }
  282 
  283 /*
  284  * Allocate buffers for lockstat_start().
  285  */
  286 int
  287 lockstat_alloc(lsenable_t *le)
  288 {
  289         lsbuf_t *lb;
  290         size_t sz;
  291 
  292         KASSERT(!lockstat_enabled);
  293         lockstat_free();
  294 
  295         sz = sizeof(*lb) * le->le_nbufs;
  296 
  297         lb = kmem_zalloc(sz, KM_SLEEP);
  298         if (lb == NULL)
  299                 return (ENOMEM);
  300 
  301         KASSERT(!lockstat_enabled);
  302         KASSERT(lockstat_baseb == NULL);
  303         lockstat_sizeb = sz;
  304         lockstat_baseb = lb;
  305                 
  306         return (0);
  307 }
  308 
  309 /*
  310  * Free allocated buffers after tracing has stopped.
  311  */
  312 void
  313 lockstat_free(void)
  314 {
  315 
  316         KASSERT(!lockstat_enabled);
  317 
  318         if (lockstat_baseb != NULL) {
  319                 kmem_free(lockstat_baseb, lockstat_sizeb);
  320                 lockstat_baseb = NULL;
  321         }
  322 }
  323 
  324 /*
  325  * Main entry point from lock primatives.
  326  */
  327 void
  328 lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
  329                uint64_t cycles)
  330 {
  331         lslist_t *ll;
  332         lscpu_t *lc;
  333         lsbuf_t *lb;
  334         u_int event;
  335         int s;
  336 
  337         if ((flags & lockstat_enabled) != flags || count == 0)
  338                 return;
  339         if (lock < lockstat_lockstart || lock > lockstat_lockend)
  340                 return;
  341         if (callsite < lockstat_csstart || callsite > lockstat_csend)
  342                 return;
  343 
  344         callsite &= lockstat_csmask;
  345         lock &= lockstat_lamask;
  346 
  347         /*
  348          * Find the table for this lock+callsite pair, and try to locate a
  349          * buffer with the same key.
  350          */
  351         s = splhigh();
  352         lc = curcpu()->ci_lockstat;
  353         ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
  354         event = (flags & LB_EVENT_MASK) - 1;
  355 
  356         LIST_FOREACH(lb, ll, lb_chain.list) {
  357                 if (lb->lb_lock == lock && lb->lb_callsite == callsite)
  358                         break;
  359         }
  360 
  361         if (lb != NULL) {
  362                 /*
  363                  * We found a record.  Move it to the front of the list, as
  364                  * we're likely to hit it again soon.
  365                  */
  366                 if (lb != LIST_FIRST(ll)) {
  367                         LIST_REMOVE(lb, lb_chain.list);
  368                         LIST_INSERT_HEAD(ll, lb, lb_chain.list);
  369                 }
  370                 lb->lb_counts[event] += count;
  371                 lb->lb_times[event] += cycles;
  372         } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
  373                 /*
  374                  * Pinch a new buffer and fill it out.
  375                  */
  376                 SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
  377                 LIST_INSERT_HEAD(ll, lb, lb_chain.list);
  378                 lb->lb_flags = (uint16_t)flags;
  379                 lb->lb_lock = lock;
  380                 lb->lb_callsite = callsite;
  381                 lb->lb_counts[event] = count;
  382                 lb->lb_times[event] = cycles;
  383         } else {
  384                 /*
  385                  * We didn't find a buffer and there were none free.
  386                  * lockstat_stop() will notice later on and report the
  387                  * error.
  388                  */
  389                  lc->lc_overflow++;
  390         }
  391 
  392         splx(s);
  393 }
  394 
  395 /*
  396  * Accept an open() on /dev/lockstat.
  397  */
  398 int
  399 lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
  400 {
  401 
  402         if (!__cpu_simple_lock_try(&lockstat_lock))
  403                 return EBUSY;
  404         lockstat_lwp = curlwp;
  405         return 0;
  406 }
  407 
  408 /*
  409  * Accept the last close() on /dev/lockstat.
  410  */
  411 int
  412 lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
  413 {
  414 
  415         lockstat_lwp = NULL;
  416         __cpu_simple_unlock(&lockstat_lock);
  417         return 0;
  418 }
  419 
  420 /*
  421  * Handle control operations.
  422  */
  423 int
  424 lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
  425 {
  426         lsenable_t *le;
  427         int error;
  428 
  429         if (lockstat_lwp != curlwp)
  430                 return EBUSY;
  431 
  432         switch (cmd) {
  433         case IOC_LOCKSTAT_GVERSION:
  434                 *(int *)data = LS_VERSION;
  435                 error = 0;
  436                 break;
  437 
  438         case IOC_LOCKSTAT_ENABLE:
  439                 le = (lsenable_t *)data;
  440 
  441                 if (!cpu_hascounter()) {
  442                         error = ENODEV;
  443                         break;
  444                 }
  445                 if (lockstat_enabled) {
  446                         error = EBUSY;
  447                         break;
  448                 }
  449 
  450                 /*
  451                  * Sanitize the arguments passed in and set up filtering.
  452                  */
  453                 if (le->le_nbufs == 0)
  454                         le->le_nbufs = LOCKSTAT_DEFBUFS;
  455                 else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
  456                     le->le_nbufs < LOCKSTAT_MINBUFS) {
  457                         error = EINVAL;
  458                         break;
  459                 }
  460                 if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
  461                         le->le_csstart = 0;
  462                         le->le_csend = le->le_csstart - 1;
  463                 }
  464                 if ((le->le_flags & LE_ONE_LOCK) == 0) {
  465                         le->le_lockstart = 0;
  466                         le->le_lockend = le->le_lockstart - 1;
  467                 }
  468                 if ((le->le_mask & LB_EVENT_MASK) == 0)
  469                         return EINVAL;
  470                 if ((le->le_mask & LB_LOCK_MASK) == 0)
  471                         return EINVAL;
  472 
  473                 /*
  474                  * Start tracing.
  475                  */
  476                 if ((error = lockstat_alloc(le)) == 0)
  477                         lockstat_start(le);
  478                 break;
  479 
  480         case IOC_LOCKSTAT_DISABLE:
  481                 if (!lockstat_enabled)
  482                         error = EINVAL;
  483                 else
  484                         error = lockstat_stop((lsdisable_t *)data);
  485                 break;
  486 
  487         default:
  488                 error = ENOTTY;
  489                 break;
  490         }
  491 
  492         return error;
  493 }
  494 
  495 /*
  496  * Copy buffers out to user-space.
  497  */
  498 int
  499 lockstat_read(dev_t dev, struct uio *uio, int flag)
  500 {
  501 
  502         if (curlwp != lockstat_lwp || lockstat_enabled)
  503                 return EBUSY;
  504         return uiomove(lockstat_baseb, lockstat_sizeb, uio);
  505 }

Cache object: 8b9bd857a15caca247c11d0d589c420b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.