The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/hwpmc/hwpmc_mod.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2003-2008 Joseph Koshy
    5  * Copyright (c) 2007 The FreeBSD Foundation
    6  * Copyright (c) 2018 Matthew Macy
    7  * All rights reserved.
    8  *
    9  * Portions of this software were developed by A. Joseph Koshy under
   10  * sponsorship from the FreeBSD Foundation and Google, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/domainset.h>
   41 #include <sys/eventhandler.h>
   42 #include <sys/jail.h>
   43 #include <sys/kernel.h>
   44 #include <sys/kthread.h>
   45 #include <sys/limits.h>
   46 #include <sys/lock.h>
   47 #include <sys/malloc.h>
   48 #include <sys/module.h>
   49 #include <sys/mount.h>
   50 #include <sys/mutex.h>
   51 #include <sys/pmc.h>
   52 #include <sys/pmckern.h>
   53 #include <sys/pmclog.h>
   54 #include <sys/priv.h>
   55 #include <sys/proc.h>
   56 #include <sys/queue.h>
   57 #include <sys/resourcevar.h>
   58 #include <sys/rwlock.h>
   59 #include <sys/sched.h>
   60 #include <sys/signalvar.h>
   61 #include <sys/smp.h>
   62 #include <sys/sx.h>
   63 #include <sys/sysctl.h>
   64 #include <sys/sysent.h>
   65 #include <sys/syslog.h>
   66 #include <sys/taskqueue.h>
   67 #include <sys/vnode.h>
   68 
   69 #include <sys/linker.h>         /* needs to be after <sys/malloc.h> */
   70 
   71 #include <machine/atomic.h>
   72 #include <machine/md_var.h>
   73 
   74 #include <vm/vm.h>
   75 #include <vm/vm_extern.h>
   76 #include <vm/pmap.h>
   77 #include <vm/vm_map.h>
   78 #include <vm/vm_object.h>
   79 
   80 #include "hwpmc_soft.h"
   81 
   82 #define PMC_EPOCH_ENTER() struct epoch_tracker pmc_et; epoch_enter_preempt(global_epoch_preempt, &pmc_et)
   83 #define PMC_EPOCH_EXIT() epoch_exit_preempt(global_epoch_preempt, &pmc_et)
   84 
   85 /*
   86  * Types
   87  */
   88 
   89 enum pmc_flags {
   90         PMC_FLAG_NONE     = 0x00, /* do nothing */
   91         PMC_FLAG_REMOVE   = 0x01, /* atomically remove entry from hash */
   92         PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */
   93         PMC_FLAG_NOWAIT   = 0x04, /* do not wait for mallocs */
   94 };
   95 
   96 /*
   97  * The offset in sysent where the syscall is allocated.
   98  */
   99 
  100 static int pmc_syscall_num = NO_SYSCALL;
  101 struct pmc_cpu          **pmc_pcpu;      /* per-cpu state */
  102 pmc_value_t             *pmc_pcpu_saved; /* saved PMC values: CSW handling */
  103 
  104 #define PMC_PCPU_SAVED(C,R)     pmc_pcpu_saved[(R) + md->pmd_npmc*(C)]
  105 
  106 struct mtx_pool         *pmc_mtxpool;
  107 static int              *pmc_pmcdisp;    /* PMC row dispositions */
  108 
  109 #define PMC_ROW_DISP_IS_FREE(R)         (pmc_pmcdisp[(R)] == 0)
  110 #define PMC_ROW_DISP_IS_THREAD(R)       (pmc_pmcdisp[(R)] > 0)
  111 #define PMC_ROW_DISP_IS_STANDALONE(R)   (pmc_pmcdisp[(R)] < 0)
  112 
  113 #define PMC_MARK_ROW_FREE(R) do {                                         \
  114         pmc_pmcdisp[(R)] = 0;                                             \
  115 } while (0)
  116 
  117 #define PMC_MARK_ROW_STANDALONE(R) do {                                   \
  118         KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
  119                     __LINE__));                                           \
  120         atomic_add_int(&pmc_pmcdisp[(R)], -1);                            \
  121         KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()),              \
  122                 ("[pmc,%d] row disposition error", __LINE__));            \
  123 } while (0)
  124 
  125 #define PMC_UNMARK_ROW_STANDALONE(R) do {                                 \
  126         atomic_add_int(&pmc_pmcdisp[(R)], 1);                             \
  127         KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
  128                     __LINE__));                                           \
  129 } while (0)
  130 
  131 #define PMC_MARK_ROW_THREAD(R) do {                                       \
  132         KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
  133                     __LINE__));                                           \
  134         atomic_add_int(&pmc_pmcdisp[(R)], 1);                             \
  135 } while (0)
  136 
  137 #define PMC_UNMARK_ROW_THREAD(R) do {                                     \
  138         atomic_add_int(&pmc_pmcdisp[(R)], -1);                            \
  139         KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
  140                     __LINE__));                                           \
  141 } while (0)
  142 
  143 
  144 /* various event handlers */
  145 static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag,
  146     pmc_kld_unload_tag;
  147 
  148 /* Module statistics */
  149 struct pmc_driverstats pmc_stats;
  150 
  151 
  152 /* Machine/processor dependent operations */
  153 static struct pmc_mdep  *md;
  154 
  155 /*
  156  * Hash tables mapping owner processes and target threads to PMCs.
  157  */
  158 
  159 struct mtx pmc_processhash_mtx;         /* spin mutex */
  160 static u_long pmc_processhashmask;
  161 static LIST_HEAD(pmc_processhash, pmc_process)  *pmc_processhash;
  162 
  163 /*
  164  * Hash table of PMC owner descriptors.  This table is protected by
  165  * the shared PMC "sx" lock.
  166  */
  167 
  168 static u_long pmc_ownerhashmask;
  169 static LIST_HEAD(pmc_ownerhash, pmc_owner)      *pmc_ownerhash;
  170 
  171 /*
  172  * List of PMC owners with system-wide sampling PMCs.
  173  */
  174 
  175 static CK_LIST_HEAD(, pmc_owner)                        pmc_ss_owners;
  176 
  177 /*
  178  * List of free thread entries. This is protected by the spin
  179  * mutex.
  180  */
  181 static struct mtx pmc_threadfreelist_mtx;       /* spin mutex */
  182 static LIST_HEAD(, pmc_thread)                  pmc_threadfreelist;
  183 static int pmc_threadfreelist_entries=0;
  184 #define THREADENTRY_SIZE                                                \
  185 (sizeof(struct pmc_thread) + (md->pmd_npmc * sizeof(struct pmc_threadpmcstate)))
  186 
  187 /*
  188  * Task to free thread descriptors
  189  */
  190 static struct task free_task;
  191 
  192 /*
  193  * A map of row indices to classdep structures.
  194  */
  195 static struct pmc_classdep **pmc_rowindex_to_classdep;
  196 
  197 /*
  198  * Prototypes
  199  */
  200 
  201 #ifdef  HWPMC_DEBUG
  202 static int      pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS);
  203 static int      pmc_debugflags_parse(char *newstr, char *fence);
  204 #endif
  205 
  206 static int      load(struct module *module, int cmd, void *arg);
  207 static int      pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf);
  208 static void     pmc_add_thread_descriptors_from_proc(struct proc *p,
  209     struct pmc_process *pp);
  210 static int      pmc_attach_process(struct proc *p, struct pmc *pm);
  211 static struct pmc *pmc_allocate_pmc_descriptor(void);
  212 static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p);
  213 static int      pmc_attach_one_process(struct proc *p, struct pmc *pm);
  214 static int      pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
  215     int cpu);
  216 static int      pmc_can_attach(struct pmc *pm, struct proc *p);
  217 static void     pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf);
  218 static void     pmc_cleanup(void);
  219 static int      pmc_detach_process(struct proc *p, struct pmc *pm);
  220 static int      pmc_detach_one_process(struct proc *p, struct pmc *pm,
  221     int flags);
  222 static void     pmc_destroy_owner_descriptor(struct pmc_owner *po);
  223 static void     pmc_destroy_pmc_descriptor(struct pmc *pm);
  224 static void     pmc_destroy_process_descriptor(struct pmc_process *pp);
  225 static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p);
  226 static int      pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm);
  227 static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
  228     pmc_id_t pmc);
  229 static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
  230     uint32_t mode);
  231 static struct pmc_thread *pmc_find_thread_descriptor(struct pmc_process *pp,
  232     struct thread *td, uint32_t mode);
  233 static void     pmc_force_context_switch(void);
  234 static void     pmc_link_target_process(struct pmc *pm,
  235     struct pmc_process *pp);
  236 static void     pmc_log_all_process_mappings(struct pmc_owner *po);
  237 static void     pmc_log_kernel_mappings(struct pmc *pm);
  238 static void     pmc_log_process_mappings(struct pmc_owner *po, struct proc *p);
  239 static void     pmc_maybe_remove_owner(struct pmc_owner *po);
  240 static void     pmc_process_csw_in(struct thread *td);
  241 static void     pmc_process_csw_out(struct thread *td);
  242 static void     pmc_process_exit(void *arg, struct proc *p);
  243 static void     pmc_process_fork(void *arg, struct proc *p1,
  244     struct proc *p2, int n);
  245 static void     pmc_process_samples(int cpu, ring_type_t soft);
  246 static void     pmc_release_pmc_descriptor(struct pmc *pmc);
  247 static void     pmc_process_thread_add(struct thread *td);
  248 static void     pmc_process_thread_delete(struct thread *td);
  249 static void     pmc_process_thread_userret(struct thread *td);
  250 static void     pmc_remove_owner(struct pmc_owner *po);
  251 static void     pmc_remove_process_descriptor(struct pmc_process *pp);
  252 static int      pmc_start(struct pmc *pm);
  253 static int      pmc_stop(struct pmc *pm);
  254 static int      pmc_syscall_handler(struct thread *td, void *syscall_args);
  255 static struct pmc_thread *pmc_thread_descriptor_pool_alloc(void);
  256 static void     pmc_thread_descriptor_pool_drain(void);
  257 static void     pmc_thread_descriptor_pool_free(struct pmc_thread *pt);
  258 static void     pmc_unlink_target_process(struct pmc *pmc,
  259     struct pmc_process *pp);
  260 static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp);
  261 static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp);
  262 static struct pmc_mdep *pmc_generic_cpu_initialize(void);
  263 static void pmc_generic_cpu_finalize(struct pmc_mdep *md);
  264 static void pmc_post_callchain_callback(void);
  265 static void pmc_process_threadcreate(struct thread *td);
  266 static void pmc_process_threadexit(struct thread *td);
  267 static void pmc_process_proccreate(struct proc *p);
  268 static void pmc_process_allproc(struct pmc *pm);
  269 
  270 /*
  271  * Kernel tunables and sysctl(8) interface.
  272  */
  273 
  274 SYSCTL_DECL(_kern_hwpmc);
  275 SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  276     "HWPMC stats");
  277 
  278 
  279 /* Stats. */
  280 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW,
  281                                    &pmc_stats.pm_intr_ignored, "# of interrupts ignored");
  282 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW,
  283                                    &pmc_stats.pm_intr_processed, "# of interrupts processed");
  284 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW,
  285                                    &pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full");
  286 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW,
  287                                    &pmc_stats.pm_syscalls, "# of syscalls");
  288 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW,
  289                                    &pmc_stats.pm_syscall_errors, "# of syscall_errors");
  290 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW,
  291                                    &pmc_stats.pm_buffer_requests, "# of buffer requests");
  292 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW,
  293                                    &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed");
  294 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW,
  295                                    &pmc_stats.pm_log_sweeps, "# of times samples were processed");
  296 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, merges, CTLFLAG_RW,
  297                                    &pmc_stats.pm_merges, "# of times kernel stack was found for user trace");
  298 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, overwrites, CTLFLAG_RW,
  299                                    &pmc_stats.pm_overwrites, "# of times a sample was overwritten before being logged");
  300 
  301 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
  302 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN,
  303     &pmc_callchaindepth, 0, "depth of call chain records");
  304 
  305 char pmc_cpuid[PMC_CPUID_LEN];
  306 SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD,
  307         pmc_cpuid, 0, "cpu version string");
  308 #ifdef  HWPMC_DEBUG
  309 struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
  310 char    pmc_debugstr[PMC_DEBUG_STRSIZE];
  311 TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr,
  312     sizeof(pmc_debugstr));
  313 SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags,
  314     CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
  315     0, 0, pmc_debugflags_sysctl_handler, "A",
  316     "debug flags");
  317 #endif
  318 
  319 
  320 /*
  321  * kern.hwpmc.hashrows -- determines the number of rows in the
  322  * of the hash table used to look up threads
  323  */
  324 
  325 static int pmc_hashsize = PMC_HASH_SIZE;
  326 SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN,
  327     &pmc_hashsize, 0, "rows in hash tables");
  328 
  329 /*
  330  * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU
  331  */
  332 
  333 static int pmc_nsamples = PMC_NSAMPLES;
  334 SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN,
  335     &pmc_nsamples, 0, "number of PC samples per CPU");
  336 
  337 static uint64_t pmc_sample_mask = PMC_NSAMPLES-1;
  338 
  339 /*
  340  * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
  341  */
  342 
  343 static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE;
  344 SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN,
  345     &pmc_mtxpool_size, 0, "size of spin mutex pool");
  346 
  347 
  348 /*
  349  * kern.hwpmc.threadfreelist_entries -- number of free entries
  350  */
  351 
  352 SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_entries, CTLFLAG_RD,
  353     &pmc_threadfreelist_entries, 0, "number of available thread entries");
  354 
  355 
  356 /*
  357  * kern.hwpmc.threadfreelist_max -- maximum number of free entries
  358  */
  359 
  360 static int pmc_threadfreelist_max = PMC_THREADLIST_MAX;
  361 SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_max, CTLFLAG_RW,
  362     &pmc_threadfreelist_max, 0,
  363     "maximum number of available thread entries before freeing some");
  364 
  365 
  366 /*
  367  * kern.hwpmc.mincount -- minimum sample count
  368  */
  369 static u_int pmc_mincount = 1000;
  370 SYSCTL_INT(_kern_hwpmc, OID_AUTO, mincount, CTLFLAG_RWTUN,
  371     &pmc_mincount, 0,
  372     "minimum count for sampling counters");
  373 
  374 /*
  375  * security.bsd.unprivileged_syspmcs -- allow non-root processes to
  376  * allocate system-wide PMCs.
  377  *
  378  * Allowing unprivileged processes to allocate system PMCs is convenient
  379  * if system-wide measurements need to be taken concurrently with other
  380  * per-process measurements.  This feature is turned off by default.
  381  */
  382 
  383 static int pmc_unprivileged_syspmcs = 0;
  384 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN,
  385     &pmc_unprivileged_syspmcs, 0,
  386     "allow unprivileged process to allocate system PMCs");
  387 
  388 /*
  389  * Hash function.  Discard the lower 2 bits of the pointer since
  390  * these are always zero for our uses.  The hash multiplier is
  391  * round((2^LONG_BIT) * ((sqrt(5)-1)/2)).
  392  */
  393 
  394 #if     LONG_BIT == 64
  395 #define _PMC_HM         11400714819323198486u
  396 #elif   LONG_BIT == 32
  397 #define _PMC_HM         2654435769u
  398 #else
  399 #error  Must know the size of 'long' to compile
  400 #endif
  401 
  402 #define PMC_HASH_PTR(P,M)       ((((unsigned long) (P) >> 2) * _PMC_HM) & (M))
  403 
  404 /*
  405  * Syscall structures
  406  */
  407 
  408 /* The `sysent' for the new syscall */
  409 static struct sysent pmc_sysent = {
  410         .sy_narg =      2,
  411         .sy_call =      pmc_syscall_handler,
  412 };
  413 
  414 static struct syscall_module_data pmc_syscall_mod = {
  415         .chainevh =     load,
  416         .chainarg =     NULL,
  417         .offset =       &pmc_syscall_num,
  418         .new_sysent =   &pmc_sysent,
  419         .old_sysent =   { .sy_narg = 0, .sy_call = NULL },
  420         .flags =        SY_THR_STATIC_KLD,
  421 };
  422 
  423 static moduledata_t pmc_mod = {
  424         .name =         PMC_MODULE_NAME,
  425         .evhand =       syscall_module_handler,
  426         .priv =         &pmc_syscall_mod,
  427 };
  428 
  429 #ifdef EARLY_AP_STARTUP
  430 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SYSCALLS, SI_ORDER_ANY);
  431 #else
  432 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY);
  433 #endif
  434 MODULE_VERSION(pmc, PMC_VERSION);
  435 
  436 #ifdef  HWPMC_DEBUG
  437 enum pmc_dbgparse_state {
  438         PMCDS_WS,               /* in whitespace */
  439         PMCDS_MAJOR,            /* seen a major keyword */
  440         PMCDS_MINOR
  441 };
  442 
  443 static int
  444 pmc_debugflags_parse(char *newstr, char *fence)
  445 {
  446         char c, *p, *q;
  447         struct pmc_debugflags *tmpflags;
  448         int error, found, *newbits, tmp;
  449         size_t kwlen;
  450 
  451         tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO);
  452 
  453         p = newstr;
  454         error = 0;
  455 
  456         for (; p < fence && (c = *p); p++) {
  457 
  458                 /* skip white space */
  459                 if (c == ' ' || c == '\t')
  460                         continue;
  461 
  462                 /* look for a keyword followed by "=" */
  463                 for (q = p; p < fence && (c = *p) && c != '='; p++)
  464                         ;
  465                 if (c != '=') {
  466                         error = EINVAL;
  467                         goto done;
  468                 }
  469 
  470                 kwlen = p - q;
  471                 newbits = NULL;
  472 
  473                 /* lookup flag group name */
  474 #define DBG_SET_FLAG_MAJ(S,F)                                           \
  475                 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0)  \
  476                         newbits = &tmpflags->pdb_ ## F;
  477 
  478                 DBG_SET_FLAG_MAJ("cpu",         CPU);
  479                 DBG_SET_FLAG_MAJ("csw",         CSW);
  480                 DBG_SET_FLAG_MAJ("logging",     LOG);
  481                 DBG_SET_FLAG_MAJ("module",      MOD);
  482                 DBG_SET_FLAG_MAJ("md",          MDP);
  483                 DBG_SET_FLAG_MAJ("owner",       OWN);
  484                 DBG_SET_FLAG_MAJ("pmc",         PMC);
  485                 DBG_SET_FLAG_MAJ("process",     PRC);
  486                 DBG_SET_FLAG_MAJ("sampling",    SAM);
  487 
  488                 if (newbits == NULL) {
  489                         error = EINVAL;
  490                         goto done;
  491                 }
  492 
  493                 p++;            /* skip the '=' */
  494 
  495                 /* Now parse the individual flags */
  496                 tmp = 0;
  497         newflag:
  498                 for (q = p; p < fence && (c = *p); p++)
  499                         if (c == ' ' || c == '\t' || c == ',')
  500                                 break;
  501 
  502                 /* p == fence or c == ws or c == "," or c == 0 */
  503 
  504                 if ((kwlen = p - q) == 0) {
  505                         *newbits = tmp;
  506                         continue;
  507                 }
  508 
  509                 found = 0;
  510 #define DBG_SET_FLAG_MIN(S,F)                                           \
  511                 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0)  \
  512                         tmp |= found = (1 << PMC_DEBUG_MIN_ ## F)
  513 
  514                 /* a '*' denotes all possible flags in the group */
  515                 if (kwlen == 1 && *q == '*')
  516                         tmp = found = ~0;
  517                 /* look for individual flag names */
  518                 DBG_SET_FLAG_MIN("allocaterow", ALR);
  519                 DBG_SET_FLAG_MIN("allocate",    ALL);
  520                 DBG_SET_FLAG_MIN("attach",      ATT);
  521                 DBG_SET_FLAG_MIN("bind",        BND);
  522                 DBG_SET_FLAG_MIN("config",      CFG);
  523                 DBG_SET_FLAG_MIN("exec",        EXC);
  524                 DBG_SET_FLAG_MIN("exit",        EXT);
  525                 DBG_SET_FLAG_MIN("find",        FND);
  526                 DBG_SET_FLAG_MIN("flush",       FLS);
  527                 DBG_SET_FLAG_MIN("fork",        FRK);
  528                 DBG_SET_FLAG_MIN("getbuf",      GTB);
  529                 DBG_SET_FLAG_MIN("hook",        PMH);
  530                 DBG_SET_FLAG_MIN("init",        INI);
  531                 DBG_SET_FLAG_MIN("intr",        INT);
  532                 DBG_SET_FLAG_MIN("linktarget",  TLK);
  533                 DBG_SET_FLAG_MIN("mayberemove", OMR);
  534                 DBG_SET_FLAG_MIN("ops",         OPS);
  535                 DBG_SET_FLAG_MIN("read",        REA);
  536                 DBG_SET_FLAG_MIN("register",    REG);
  537                 DBG_SET_FLAG_MIN("release",     REL);
  538                 DBG_SET_FLAG_MIN("remove",      ORM);
  539                 DBG_SET_FLAG_MIN("sample",      SAM);
  540                 DBG_SET_FLAG_MIN("scheduleio",  SIO);
  541                 DBG_SET_FLAG_MIN("select",      SEL);
  542                 DBG_SET_FLAG_MIN("signal",      SIG);
  543                 DBG_SET_FLAG_MIN("swi",         SWI);
  544                 DBG_SET_FLAG_MIN("swo",         SWO);
  545                 DBG_SET_FLAG_MIN("start",       STA);
  546                 DBG_SET_FLAG_MIN("stop",        STO);
  547                 DBG_SET_FLAG_MIN("syscall",     PMS);
  548                 DBG_SET_FLAG_MIN("unlinktarget", TUL);
  549                 DBG_SET_FLAG_MIN("write",       WRI);
  550                 if (found == 0) {
  551                         /* unrecognized flag name */
  552                         error = EINVAL;
  553                         goto done;
  554                 }
  555 
  556                 if (c == 0 || c == ' ' || c == '\t') {  /* end of flag group */
  557                         *newbits = tmp;
  558                         continue;
  559                 }
  560 
  561                 p++;
  562                 goto newflag;
  563         }
  564 
  565         /* save the new flag set */
  566         bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags));
  567 
  568  done:
  569         free(tmpflags, M_PMC);
  570         return error;
  571 }
  572 
  573 static int
  574 pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS)
  575 {
  576         char *fence, *newstr;
  577         int error;
  578         unsigned int n;
  579 
  580         (void) arg1; (void) arg2; /* unused parameters */
  581 
  582         n = sizeof(pmc_debugstr);
  583         newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO);
  584         (void) strlcpy(newstr, pmc_debugstr, n);
  585 
  586         error = sysctl_handle_string(oidp, newstr, n, req);
  587 
  588         /* if there is a new string, parse and copy it */
  589         if (error == 0 && req->newptr != NULL) {
  590                 fence = newstr + (n < req->newlen ? n : req->newlen + 1);
  591                 if ((error = pmc_debugflags_parse(newstr, fence)) == 0)
  592                         (void) strlcpy(pmc_debugstr, newstr,
  593                             sizeof(pmc_debugstr));
  594         }
  595 
  596         free(newstr, M_PMC);
  597 
  598         return error;
  599 }
  600 #endif
  601 
  602 /*
  603  * Map a row index to a classdep structure and return the adjusted row
  604  * index for the PMC class index.
  605  */
  606 static struct pmc_classdep *
  607 pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri)
  608 {
  609         struct pmc_classdep *pcd;
  610 
  611         (void) md;
  612 
  613         KASSERT(ri >= 0 && ri < md->pmd_npmc,
  614             ("[pmc,%d] illegal row-index %d", __LINE__, ri));
  615 
  616         pcd = pmc_rowindex_to_classdep[ri];
  617 
  618         KASSERT(pcd != NULL,
  619             ("[pmc,%d] ri %d null pcd", __LINE__, ri));
  620 
  621         *adjri = ri - pcd->pcd_ri;
  622 
  623         KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num,
  624             ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri));
  625 
  626         return (pcd);
  627 }
  628 
  629 /*
  630  * Concurrency Control
  631  *
  632  * The driver manages the following data structures:
  633  *
  634  *   - target process descriptors, one per target process
  635  *   - owner process descriptors (and attached lists), one per owner process
  636  *   - lookup hash tables for owner and target processes
  637  *   - PMC descriptors (and attached lists)
  638  *   - per-cpu hardware state
  639  *   - the 'hook' variable through which the kernel calls into
  640  *     this module
  641  *   - the machine hardware state (managed by the MD layer)
  642  *
  643  * These data structures are accessed from:
  644  *
  645  * - thread context-switch code
  646  * - interrupt handlers (possibly on multiple cpus)
  647  * - kernel threads on multiple cpus running on behalf of user
  648  *   processes doing system calls
  649  * - this driver's private kernel threads
  650  *
  651  * = Locks and Locking strategy =
  652  *
  653  * The driver uses four locking strategies for its operation:
  654  *
  655  * - The global SX lock "pmc_sx" is used to protect internal
  656  *   data structures.
  657  *
  658  *   Calls into the module by syscall() start with this lock being
  659  *   held in exclusive mode.  Depending on the requested operation,
  660  *   the lock may be downgraded to 'shared' mode to allow more
  661  *   concurrent readers into the module.  Calls into the module from
  662  *   other parts of the kernel acquire the lock in shared mode.
  663  *
  664  *   This SX lock is held in exclusive mode for any operations that
  665  *   modify the linkages between the driver's internal data structures.
  666  *
  667  *   The 'pmc_hook' function pointer is also protected by this lock.
  668  *   It is only examined with the sx lock held in exclusive mode.  The
  669  *   kernel module is allowed to be unloaded only with the sx lock held
  670  *   in exclusive mode.  In normal syscall handling, after acquiring the
  671  *   pmc_sx lock we first check that 'pmc_hook' is non-null before
  672  *   proceeding.  This prevents races between the thread unloading the module
  673  *   and other threads seeking to use the module.
  674  *
  675  * - Lookups of target process structures and owner process structures
  676  *   cannot use the global "pmc_sx" SX lock because these lookups need
  677  *   to happen during context switches and in other critical sections
  678  *   where sleeping is not allowed.  We protect these lookup tables
  679  *   with their own private spin-mutexes, "pmc_processhash_mtx" and
  680  *   "pmc_ownerhash_mtx".
  681  *
  682  * - Interrupt handlers work in a lock free manner.  At interrupt
  683  *   time, handlers look at the PMC pointer (phw->phw_pmc) configured
  684  *   when the PMC was started.  If this pointer is NULL, the interrupt
  685  *   is ignored after updating driver statistics.  We ensure that this
  686  *   pointer is set (using an atomic operation if necessary) before the
  687  *   PMC hardware is started.  Conversely, this pointer is unset atomically
  688  *   only after the PMC hardware is stopped.
  689  *
  690  *   We ensure that everything needed for the operation of an
  691  *   interrupt handler is available without it needing to acquire any
  692  *   locks.  We also ensure that a PMC's software state is destroyed only
  693  *   after the PMC is taken off hardware (on all CPUs).
  694  *
  695  * - Context-switch handling with process-private PMCs needs more
  696  *   care.
  697  *
  698  *   A given process may be the target of multiple PMCs.  For example,
  699  *   PMCATTACH and PMCDETACH may be requested by a process on one CPU
  700  *   while the target process is running on another.  A PMC could also
  701  *   be getting released because its owner is exiting.  We tackle
  702  *   these situations in the following manner:
  703  *
  704  *   - each target process structure 'pmc_process' has an array
  705  *     of 'struct pmc *' pointers, one for each hardware PMC.
  706  *
  707  *   - At context switch IN time, each "target" PMC in RUNNING state
  708  *     gets started on hardware and a pointer to each PMC is copied into
  709  *     the per-cpu phw array.  The 'runcount' for the PMC is
  710  *     incremented.
  711  *
  712  *   - At context switch OUT time, all process-virtual PMCs are stopped
  713  *     on hardware.  The saved value is added to the PMCs value field
  714  *     only if the PMC is in a non-deleted state (the PMCs state could
  715  *     have changed during the current time slice).
  716  *
  717  *     Note that since in-between a switch IN on a processor and a switch
  718  *     OUT, the PMC could have been released on another CPU.  Therefore
  719  *     context switch OUT always looks at the hardware state to turn
  720  *     OFF PMCs and will update a PMC's saved value only if reachable
  721  *     from the target process record.
  722  *
  723  *   - OP PMCRELEASE could be called on a PMC at any time (the PMC could
  724  *     be attached to many processes at the time of the call and could
  725  *     be active on multiple CPUs).
  726  *
  727  *     We prevent further scheduling of the PMC by marking it as in
  728  *     state 'DELETED'.  If the runcount of the PMC is non-zero then
  729  *     this PMC is currently running on a CPU somewhere.  The thread
  730  *     doing the PMCRELEASE operation waits by repeatedly doing a
  731  *     pause() till the runcount comes to zero.
  732  *
  733  * The contents of a PMC descriptor (struct pmc) are protected using
  734  * a spin-mutex.  In order to save space, we use a mutex pool.
  735  *
  736  * In terms of lock types used by witness(4), we use:
  737  * - Type "pmc-sx", used by the global SX lock.
  738  * - Type "pmc-sleep", for sleep mutexes used by logger threads.
  739  * - Type "pmc-per-proc", for protecting PMC owner descriptors.
  740  * - Type "pmc-leaf", used for all other spin mutexes.
  741  */
  742 
  743 /*
  744  * save the cpu binding of the current kthread
  745  */
  746 
  747 void
  748 pmc_save_cpu_binding(struct pmc_binding *pb)
  749 {
  750         PMCDBG0(CPU,BND,2, "save-cpu");
  751         thread_lock(curthread);
  752         pb->pb_bound = sched_is_bound(curthread);
  753         pb->pb_cpu   = curthread->td_oncpu;
  754         pb->pb_priority = curthread->td_priority;
  755         thread_unlock(curthread);
  756         PMCDBG1(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu);
  757 }
  758 
  759 /*
  760  * restore the cpu binding of the current thread
  761  */
  762 
  763 void
  764 pmc_restore_cpu_binding(struct pmc_binding *pb)
  765 {
  766         PMCDBG2(CPU,BND,2, "restore-cpu curcpu=%d restore=%d",
  767             curthread->td_oncpu, pb->pb_cpu);
  768         thread_lock(curthread);
  769         sched_bind(curthread, pb->pb_cpu);
  770         if (!pb->pb_bound)
  771                 sched_unbind(curthread);
  772         sched_prio(curthread, pb->pb_priority);
  773         thread_unlock(curthread);
  774         PMCDBG0(CPU,BND,2, "restore-cpu done");
  775 }
  776 
  777 /*
  778  * move execution over the specified cpu and bind it there.
  779  */
  780 
  781 void
  782 pmc_select_cpu(int cpu)
  783 {
  784         KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
  785             ("[pmc,%d] bad cpu number %d", __LINE__, cpu));
  786 
  787         /* Never move to an inactive CPU. */
  788         KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive "
  789             "CPU %d", __LINE__, cpu));
  790 
  791         PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d", cpu);
  792         thread_lock(curthread);
  793         sched_prio(curthread, PRI_MIN);
  794         sched_bind(curthread, cpu);
  795         thread_unlock(curthread);
  796 
  797         KASSERT(curthread->td_oncpu == cpu,
  798             ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__,
  799                 cpu, curthread->td_oncpu));
  800 
  801         PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d ok", cpu);
  802 }
  803 
  804 /*
  805  * Force a context switch.
  806  *
  807  * We do this by pause'ing for 1 tick -- invoking mi_switch() is not
  808  * guaranteed to force a context switch.
  809  */
  810 
  811 static void
  812 pmc_force_context_switch(void)
  813 {
  814 
  815         pause("pmcctx", 1);
  816 }
  817 
  818 uint64_t
  819 pmc_rdtsc(void)
  820 {
  821 #if defined(__i386__) || defined(__amd64__)
  822         if (__predict_true(amd_feature & AMDID_RDTSCP))
  823                 return rdtscp();
  824         else
  825                 return rdtsc();
  826 #else
  827         return get_cyclecount();
  828 #endif
  829 }
  830 
  831 /*
  832  * Get the file name for an executable.  This is a simple wrapper
  833  * around vn_fullpath(9).
  834  */
  835 
  836 static void
  837 pmc_getfilename(struct vnode *v, char **fullpath, char **freepath)
  838 {
  839 
  840         *fullpath = "unknown";
  841         *freepath = NULL;
  842         vn_fullpath(v, fullpath, freepath);
  843 }
  844 
  845 /*
  846  * remove an process owning PMCs
  847  */
  848 
  849 void
  850 pmc_remove_owner(struct pmc_owner *po)
  851 {
  852         struct pmc *pm, *tmp;
  853 
  854         sx_assert(&pmc_sx, SX_XLOCKED);
  855 
  856         PMCDBG1(OWN,ORM,1, "remove-owner po=%p", po);
  857 
  858         /* Remove descriptor from the owner hash table */
  859         LIST_REMOVE(po, po_next);
  860 
  861         /* release all owned PMC descriptors */
  862         LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) {
  863                 PMCDBG1(OWN,ORM,2, "pmc=%p", pm);
  864                 KASSERT(pm->pm_owner == po,
  865                     ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po));
  866 
  867                 pmc_release_pmc_descriptor(pm); /* will unlink from the list */
  868                 pmc_destroy_pmc_descriptor(pm);
  869         }
  870 
  871         KASSERT(po->po_sscount == 0,
  872             ("[pmc,%d] SS count not zero", __LINE__));
  873         KASSERT(LIST_EMPTY(&po->po_pmcs),
  874             ("[pmc,%d] PMC list not empty", __LINE__));
  875 
  876         /* de-configure the log file if present */
  877         if (po->po_flags & PMC_PO_OWNS_LOGFILE)
  878                 pmclog_deconfigure_log(po);
  879 }
  880 
  881 /*
  882  * remove an owner process record if all conditions are met.
  883  */
  884 
  885 static void
  886 pmc_maybe_remove_owner(struct pmc_owner *po)
  887 {
  888 
  889         PMCDBG1(OWN,OMR,1, "maybe-remove-owner po=%p", po);
  890 
  891         /*
  892          * Remove owner record if
  893          * - this process does not own any PMCs
  894          * - this process has not allocated a system-wide sampling buffer
  895          */
  896 
  897         if (LIST_EMPTY(&po->po_pmcs) &&
  898             ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) {
  899                 pmc_remove_owner(po);
  900                 pmc_destroy_owner_descriptor(po);
  901         }
  902 }
  903 
  904 /*
  905  * Add an association between a target process and a PMC.
  906  */
  907 
  908 static void
  909 pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
  910 {
  911         int ri;
  912         struct pmc_target *pt;
  913 #ifdef INVARIANTS
  914         struct pmc_thread *pt_td;
  915 #endif
  916 
  917         sx_assert(&pmc_sx, SX_XLOCKED);
  918 
  919         KASSERT(pm != NULL && pp != NULL,
  920             ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
  921         KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
  922             ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d",
  923                 __LINE__, pm, pp->pp_proc->p_pid));
  924         KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1),
  925             ("[pmc,%d] Illegal reference count %d for process record %p",
  926                 __LINE__, pp->pp_refcnt, (void *) pp));
  927 
  928         ri = PMC_TO_ROWINDEX(pm);
  929 
  930         PMCDBG3(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p",
  931             pm, ri, pp);
  932 
  933 #ifdef  HWPMC_DEBUG
  934         LIST_FOREACH(pt, &pm->pm_targets, pt_next)
  935             if (pt->pt_process == pp)
  936                     KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets",
  937                                 __LINE__, pp, pm));
  938 #endif
  939 
  940         pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO);
  941         pt->pt_process = pp;
  942 
  943         LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next);
  944 
  945         atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc,
  946             (uintptr_t)pm);
  947 
  948         if (pm->pm_owner->po_owner == pp->pp_proc)
  949                 pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER;
  950 
  951         /*
  952          * Initialize the per-process values at this row index.
  953          */
  954         pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ?
  955             pm->pm_sc.pm_reloadcount : 0;
  956 
  957         pp->pp_refcnt++;
  958 
  959 #ifdef INVARIANTS
  960         /* Confirm that the per-thread values at this row index are cleared. */
  961         if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
  962                 mtx_lock_spin(pp->pp_tdslock);
  963                 LIST_FOREACH(pt_td, &pp->pp_tds, pt_next) {
  964                         KASSERT(pt_td->pt_pmcs[ri].pt_pmcval == (pmc_value_t) 0,
  965                             ("[pmc,%d] pt_pmcval not cleared for pid=%d at "
  966                             "ri=%d", __LINE__, pp->pp_proc->p_pid, ri));
  967                 }
  968                 mtx_unlock_spin(pp->pp_tdslock);
  969         }
  970 #endif
  971 }
  972 
  973 /*
  974  * Removes the association between a target process and a PMC.
  975  */
  976 
  977 static void
  978 pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
  979 {
  980         int ri;
  981         struct proc *p;
  982         struct pmc_target *ptgt;
  983         struct pmc_thread *pt;
  984 
  985         sx_assert(&pmc_sx, SX_XLOCKED);
  986 
  987         KASSERT(pm != NULL && pp != NULL,
  988             ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
  989 
  990         KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc,
  991             ("[pmc,%d] Illegal ref count %d on process record %p",
  992                 __LINE__, pp->pp_refcnt, (void *) pp));
  993 
  994         ri = PMC_TO_ROWINDEX(pm);
  995 
  996         PMCDBG3(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p",
  997             pm, ri, pp);
  998 
  999         KASSERT(pp->pp_pmcs[ri].pp_pmc == pm,
 1000             ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__,
 1001                 ri, pm, pp->pp_pmcs[ri].pp_pmc));
 1002 
 1003         pp->pp_pmcs[ri].pp_pmc = NULL;
 1004         pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0;
 1005 
 1006         /* Clear the per-thread values at this row index. */
 1007         if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
 1008                 mtx_lock_spin(pp->pp_tdslock);
 1009                 LIST_FOREACH(pt, &pp->pp_tds, pt_next)
 1010                         pt->pt_pmcs[ri].pt_pmcval = (pmc_value_t) 0;
 1011                 mtx_unlock_spin(pp->pp_tdslock);
 1012         }
 1013 
 1014         /* Remove owner-specific flags */
 1015         if (pm->pm_owner->po_owner == pp->pp_proc) {
 1016                 pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS;
 1017                 pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER;
 1018         }
 1019 
 1020         pp->pp_refcnt--;
 1021 
 1022         /* Remove the target process from the PMC structure */
 1023         LIST_FOREACH(ptgt, &pm->pm_targets, pt_next)
 1024                 if (ptgt->pt_process == pp)
 1025                         break;
 1026 
 1027         KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found "
 1028                     "in pmc %p", __LINE__, pp->pp_proc, pp, pm));
 1029 
 1030         LIST_REMOVE(ptgt, pt_next);
 1031         free(ptgt, M_PMC);
 1032 
 1033         /* if the PMC now lacks targets, send the owner a SIGIO */
 1034         if (LIST_EMPTY(&pm->pm_targets)) {
 1035                 p = pm->pm_owner->po_owner;
 1036                 PROC_LOCK(p);
 1037                 kern_psignal(p, SIGIO);
 1038                 PROC_UNLOCK(p);
 1039 
 1040                 PMCDBG2(PRC,SIG,2, "signalling proc=%p signal=%d", p,
 1041                     SIGIO);
 1042         }
 1043 }
 1044 
 1045 /*
 1046  * Check if PMC 'pm' may be attached to target process 't'.
 1047  */
 1048 
 1049 static int
 1050 pmc_can_attach(struct pmc *pm, struct proc *t)
 1051 {
 1052         struct proc *o;         /* pmc owner */
 1053         struct ucred *oc, *tc;  /* owner, target credentials */
 1054         int decline_attach, i;
 1055 
 1056         /*
 1057          * A PMC's owner can always attach that PMC to itself.
 1058          */
 1059 
 1060         if ((o = pm->pm_owner->po_owner) == t)
 1061                 return 0;
 1062 
 1063         PROC_LOCK(o);
 1064         oc = o->p_ucred;
 1065         crhold(oc);
 1066         PROC_UNLOCK(o);
 1067 
 1068         PROC_LOCK(t);
 1069         tc = t->p_ucred;
 1070         crhold(tc);
 1071         PROC_UNLOCK(t);
 1072 
 1073         /*
 1074          * The effective uid of the PMC owner should match at least one
 1075          * of the {effective,real,saved} uids of the target process.
 1076          */
 1077 
 1078         decline_attach = oc->cr_uid != tc->cr_uid &&
 1079             oc->cr_uid != tc->cr_svuid &&
 1080             oc->cr_uid != tc->cr_ruid;
 1081 
 1082         /*
 1083          * Every one of the target's group ids, must be in the owner's
 1084          * group list.
 1085          */
 1086         for (i = 0; !decline_attach && i < tc->cr_ngroups; i++)
 1087                 decline_attach = !groupmember(tc->cr_groups[i], oc);
 1088 
 1089         /* check the read and saved gids too */
 1090         if (decline_attach == 0)
 1091                 decline_attach = !groupmember(tc->cr_rgid, oc) ||
 1092                     !groupmember(tc->cr_svgid, oc);
 1093 
 1094         crfree(tc);
 1095         crfree(oc);
 1096 
 1097         return !decline_attach;
 1098 }
 1099 
 1100 /*
 1101  * Attach a process to a PMC.
 1102  */
 1103 
 1104 static int
 1105 pmc_attach_one_process(struct proc *p, struct pmc *pm)
 1106 {
 1107         int ri, error;
 1108         char *fullpath, *freepath;
 1109         struct pmc_process      *pp;
 1110 
 1111         sx_assert(&pmc_sx, SX_XLOCKED);
 1112 
 1113         PMCDBG5(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm,
 1114             PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
 1115 
 1116         /*
 1117          * Locate the process descriptor corresponding to process 'p',
 1118          * allocating space as needed.
 1119          *
 1120          * Verify that rowindex 'pm_rowindex' is free in the process
 1121          * descriptor.
 1122          *
 1123          * If not, allocate space for a descriptor and link the
 1124          * process descriptor and PMC.
 1125          */
 1126         ri = PMC_TO_ROWINDEX(pm);
 1127 
 1128         /* mark process as using HWPMCs */
 1129         PROC_LOCK(p);
 1130         p->p_flag |= P_HWPMC;
 1131         PROC_UNLOCK(p);
 1132 
 1133         if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) {
 1134                 error = ENOMEM;
 1135                 goto fail;
 1136         }
 1137 
 1138         if (pp->pp_pmcs[ri].pp_pmc == pm) {/* already present at slot [ri] */
 1139                 error = EEXIST;
 1140                 goto fail;
 1141         }
 1142 
 1143         if (pp->pp_pmcs[ri].pp_pmc != NULL) {
 1144                 error = EBUSY;
 1145                 goto fail;
 1146         }
 1147 
 1148         pmc_link_target_process(pm, pp);
 1149 
 1150         if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) &&
 1151             (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0)
 1152                 pm->pm_flags |= PMC_F_NEEDS_LOGFILE;
 1153 
 1154         pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */
 1155 
 1156         /* issue an attach event to a configured log file */
 1157         if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) {
 1158                 if (p->p_flag & P_KPROC) {
 1159                         fullpath = kernelname;
 1160                         freepath = NULL;
 1161                 } else {
 1162                         pmc_getfilename(p->p_textvp, &fullpath, &freepath);
 1163                         pmclog_process_pmcattach(pm, p->p_pid, fullpath);
 1164                 }
 1165                 free(freepath, M_TEMP);
 1166                 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 1167                         pmc_log_process_mappings(pm->pm_owner, p);
 1168         }
 1169 
 1170         return (0);
 1171  fail:
 1172         PROC_LOCK(p);
 1173         p->p_flag &= ~P_HWPMC;
 1174         PROC_UNLOCK(p);
 1175         return (error);
 1176 }
 1177 
 1178 /*
 1179  * Attach a process and optionally its children
 1180  */
 1181 
 1182 static int
 1183 pmc_attach_process(struct proc *p, struct pmc *pm)
 1184 {
 1185         int error;
 1186         struct proc *top;
 1187 
 1188         sx_assert(&pmc_sx, SX_XLOCKED);
 1189 
 1190         PMCDBG5(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm,
 1191             PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
 1192 
 1193 
 1194         /*
 1195          * If this PMC successfully allowed a GETMSR operation
 1196          * in the past, disallow further ATTACHes.
 1197          */
 1198 
 1199         if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0)
 1200                 return EPERM;
 1201 
 1202         if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
 1203                 return pmc_attach_one_process(p, pm);
 1204 
 1205         /*
 1206          * Traverse all child processes, attaching them to
 1207          * this PMC.
 1208          */
 1209 
 1210         sx_slock(&proctree_lock);
 1211 
 1212         top = p;
 1213 
 1214         for (;;) {
 1215                 if ((error = pmc_attach_one_process(p, pm)) != 0)
 1216                         break;
 1217                 if (!LIST_EMPTY(&p->p_children))
 1218                         p = LIST_FIRST(&p->p_children);
 1219                 else for (;;) {
 1220                         if (p == top)
 1221                                 goto done;
 1222                         if (LIST_NEXT(p, p_sibling)) {
 1223                                 p = LIST_NEXT(p, p_sibling);
 1224                                 break;
 1225                         }
 1226                         p = p->p_pptr;
 1227                 }
 1228         }
 1229 
 1230         if (error)
 1231                 (void) pmc_detach_process(top, pm);
 1232 
 1233  done:
 1234         sx_sunlock(&proctree_lock);
 1235         return error;
 1236 }
 1237 
 1238 /*
 1239  * Detach a process from a PMC.  If there are no other PMCs tracking
 1240  * this process, remove the process structure from its hash table.  If
 1241  * 'flags' contains PMC_FLAG_REMOVE, then free the process structure.
 1242  */
 1243 
 1244 static int
 1245 pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags)
 1246 {
 1247         int ri;
 1248         struct pmc_process *pp;
 1249 
 1250         sx_assert(&pmc_sx, SX_XLOCKED);
 1251 
 1252         KASSERT(pm != NULL,
 1253             ("[pmc,%d] null pm pointer", __LINE__));
 1254 
 1255         ri = PMC_TO_ROWINDEX(pm);
 1256 
 1257         PMCDBG6(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x",
 1258             pm, ri, p, p->p_pid, p->p_comm, flags);
 1259 
 1260         if ((pp = pmc_find_process_descriptor(p, 0)) == NULL)
 1261                 return ESRCH;
 1262 
 1263         if (pp->pp_pmcs[ri].pp_pmc != pm)
 1264                 return EINVAL;
 1265 
 1266         pmc_unlink_target_process(pm, pp);
 1267 
 1268         /* Issue a detach entry if a log file is configured */
 1269         if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE)
 1270                 pmclog_process_pmcdetach(pm, p->p_pid);
 1271 
 1272         /*
 1273          * If there are no PMCs targeting this process, we remove its
 1274          * descriptor from the target hash table and unset the P_HWPMC
 1275          * flag in the struct proc.
 1276          */
 1277         KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
 1278             ("[pmc,%d] Illegal refcnt %d for process struct %p",
 1279                 __LINE__, pp->pp_refcnt, pp));
 1280 
 1281         if (pp->pp_refcnt != 0) /* still a target of some PMC */
 1282                 return 0;
 1283 
 1284         pmc_remove_process_descriptor(pp);
 1285 
 1286         if (flags & PMC_FLAG_REMOVE)
 1287                 pmc_destroy_process_descriptor(pp);
 1288 
 1289         PROC_LOCK(p);
 1290         p->p_flag &= ~P_HWPMC;
 1291         PROC_UNLOCK(p);
 1292 
 1293         return 0;
 1294 }
 1295 
 1296 /*
 1297  * Detach a process and optionally its descendants from a PMC.
 1298  */
 1299 
 1300 static int
 1301 pmc_detach_process(struct proc *p, struct pmc *pm)
 1302 {
 1303         struct proc *top;
 1304 
 1305         sx_assert(&pmc_sx, SX_XLOCKED);
 1306 
 1307         PMCDBG5(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm,
 1308             PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
 1309 
 1310         if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
 1311                 return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
 1312 
 1313         /*
 1314          * Traverse all children, detaching them from this PMC.  We
 1315          * ignore errors since we could be detaching a PMC from a
 1316          * partially attached proc tree.
 1317          */
 1318 
 1319         sx_slock(&proctree_lock);
 1320 
 1321         top = p;
 1322 
 1323         for (;;) {
 1324                 (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
 1325 
 1326                 if (!LIST_EMPTY(&p->p_children))
 1327                         p = LIST_FIRST(&p->p_children);
 1328                 else for (;;) {
 1329                         if (p == top)
 1330                                 goto done;
 1331                         if (LIST_NEXT(p, p_sibling)) {
 1332                                 p = LIST_NEXT(p, p_sibling);
 1333                                 break;
 1334                         }
 1335                         p = p->p_pptr;
 1336                 }
 1337         }
 1338 
 1339  done:
 1340         sx_sunlock(&proctree_lock);
 1341 
 1342         if (LIST_EMPTY(&pm->pm_targets))
 1343                 pm->pm_flags &= ~PMC_F_ATTACH_DONE;
 1344 
 1345         return 0;
 1346 }
 1347 
 1348 
 1349 /*
 1350  * Thread context switch IN
 1351  */
 1352 
 1353 static void
 1354 pmc_process_csw_in(struct thread *td)
 1355 {
 1356         int cpu;
 1357         unsigned int adjri, ri;
 1358         struct pmc *pm;
 1359         struct proc *p;
 1360         struct pmc_cpu *pc;
 1361         struct pmc_hw *phw __diagused;
 1362         pmc_value_t newvalue;
 1363         struct pmc_process *pp;
 1364         struct pmc_thread *pt;
 1365         struct pmc_classdep *pcd;
 1366 
 1367         p = td->td_proc;
 1368         pt = NULL;
 1369         if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL)
 1370                 return;
 1371 
 1372         KASSERT(pp->pp_proc == td->td_proc,
 1373             ("[pmc,%d] not my thread state", __LINE__));
 1374 
 1375         critical_enter(); /* no preemption from this point */
 1376 
 1377         cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
 1378 
 1379         PMCDBG5(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
 1380             p->p_pid, p->p_comm, pp);
 1381 
 1382         KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 1383             ("[pmc,%d] weird CPU id %d", __LINE__, cpu));
 1384 
 1385         pc = pmc_pcpu[cpu];
 1386 
 1387         for (ri = 0; ri < md->pmd_npmc; ri++) {
 1388 
 1389                 if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
 1390                         continue;
 1391 
 1392                 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
 1393                     ("[pmc,%d] Target PMC in non-virtual mode (%d)",
 1394                         __LINE__, PMC_TO_MODE(pm)));
 1395 
 1396                 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
 1397                     ("[pmc,%d] Row index mismatch pmc %d != ri %d",
 1398                         __LINE__, PMC_TO_ROWINDEX(pm), ri));
 1399 
 1400                 /*
 1401                  * Only PMCs that are marked as 'RUNNING' need
 1402                  * be placed on hardware.
 1403                  */
 1404 
 1405                 if (pm->pm_state != PMC_STATE_RUNNING)
 1406                         continue;
 1407 
 1408                 KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
 1409             ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
 1410                  (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 1411 
 1412                 /* increment PMC runcount */
 1413                 counter_u64_add(pm->pm_runcount, 1);
 1414 
 1415                 /* configure the HWPMC we are going to use. */
 1416                 pcd = pmc_ri_to_classdep(md, ri, &adjri);
 1417                 pcd->pcd_config_pmc(cpu, adjri, pm);
 1418 
 1419                 phw = pc->pc_hwpmcs[ri];
 1420 
 1421                 KASSERT(phw != NULL,
 1422                     ("[pmc,%d] null hw pointer", __LINE__));
 1423 
 1424                 KASSERT(phw->phw_pmc == pm,
 1425                     ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__,
 1426                         phw->phw_pmc, pm));
 1427 
 1428                 /*
 1429                  * Write out saved value and start the PMC.
 1430                  *
 1431                  * Sampling PMCs use a per-thread value, while
 1432                  * counting mode PMCs use a per-pmc value that is
 1433                  * inherited across descendants.
 1434                  */
 1435                 if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
 1436                         if (pt == NULL)
 1437                                 pt = pmc_find_thread_descriptor(pp, td,
 1438                                     PMC_FLAG_NONE);
 1439 
 1440                         KASSERT(pt != NULL,
 1441                             ("[pmc,%d] No thread found for td=%p", __LINE__,
 1442                             td));
 1443 
 1444                         mtx_pool_lock_spin(pmc_mtxpool, pm);
 1445 
 1446                         /*
 1447                          * If we have a thread descriptor, use the per-thread
 1448                          * counter in the descriptor. If not, we will use
 1449                          * a per-process counter. 
 1450                          *
 1451                          * TODO: Remove the per-process "safety net" once
 1452                          * we have thoroughly tested that we don't hit the
 1453                          * above assert.
 1454                          */
 1455                         if (pt != NULL) {
 1456                                 if (pt->pt_pmcs[ri].pt_pmcval > 0)
 1457                                         newvalue = pt->pt_pmcs[ri].pt_pmcval;
 1458                                 else
 1459                                         newvalue = pm->pm_sc.pm_reloadcount;
 1460                         } else {
 1461                                 /*
 1462                                  * Use the saved value calculated after the most
 1463                                  * recent time a thread using the shared counter
 1464                                  * switched out. Reset the saved count in case
 1465                                  * another thread from this process switches in
 1466                                  * before any threads switch out.
 1467                                  */
 1468 
 1469                                 newvalue = pp->pp_pmcs[ri].pp_pmcval;
 1470                                 pp->pp_pmcs[ri].pp_pmcval =
 1471                                     pm->pm_sc.pm_reloadcount;
 1472                         }
 1473                         mtx_pool_unlock_spin(pmc_mtxpool, pm);
 1474                         KASSERT(newvalue > 0 && newvalue <=
 1475                             pm->pm_sc.pm_reloadcount,
 1476                             ("[pmc,%d] pmcval outside of expected range cpu=%d "
 1477                             "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__,
 1478                             cpu, ri, newvalue, pm->pm_sc.pm_reloadcount));
 1479                 } else {
 1480                         KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
 1481                             ("[pmc,%d] illegal mode=%d", __LINE__,
 1482                             PMC_TO_MODE(pm)));
 1483                         mtx_pool_lock_spin(pmc_mtxpool, pm);
 1484                         newvalue = PMC_PCPU_SAVED(cpu, ri) =
 1485                             pm->pm_gv.pm_savedvalue;
 1486                         mtx_pool_unlock_spin(pmc_mtxpool, pm);
 1487                 }
 1488 
 1489                 PMCDBG3(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue);
 1490 
 1491                 pcd->pcd_write_pmc(cpu, adjri, newvalue);
 1492 
 1493                 /* If a sampling mode PMC, reset stalled state. */
 1494                 if (PMC_TO_MODE(pm) == PMC_MODE_TS)
 1495                         pm->pm_pcpu_state[cpu].pps_stalled = 0;
 1496 
 1497                 /* Indicate that we desire this to run. */
 1498                 pm->pm_pcpu_state[cpu].pps_cpustate = 1;
 1499 
 1500                 /* Start the PMC. */
 1501                 pcd->pcd_start_pmc(cpu, adjri);
 1502         }
 1503 
 1504         /*
 1505          * perform any other architecture/cpu dependent thread
 1506          * switch-in actions.
 1507          */
 1508 
 1509         (void) (*md->pmd_switch_in)(pc, pp);
 1510 
 1511         critical_exit();
 1512 
 1513 }
 1514 
 1515 /*
 1516  * Thread context switch OUT.
 1517  */
 1518 
 1519 static void
 1520 pmc_process_csw_out(struct thread *td)
 1521 {
 1522         int cpu;
 1523         int64_t tmp;
 1524         struct pmc *pm;
 1525         struct proc *p;
 1526         enum pmc_mode mode;
 1527         struct pmc_cpu *pc;
 1528         pmc_value_t newvalue;
 1529         unsigned int adjri, ri;
 1530         struct pmc_process *pp;
 1531         struct pmc_thread *pt = NULL;
 1532         struct pmc_classdep *pcd;
 1533 
 1534 
 1535         /*
 1536          * Locate our process descriptor; this may be NULL if
 1537          * this process is exiting and we have already removed
 1538          * the process from the target process table.
 1539          *
 1540          * Note that due to kernel preemption, multiple
 1541          * context switches may happen while the process is
 1542          * exiting.
 1543          *
 1544          * Note also that if the target process cannot be
 1545          * found we still need to deconfigure any PMCs that
 1546          * are currently running on hardware.
 1547          */
 1548 
 1549         p = td->td_proc;
 1550         pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE);
 1551 
 1552         /*
 1553          * save PMCs
 1554          */
 1555 
 1556         critical_enter();
 1557 
 1558         cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
 1559 
 1560         PMCDBG5(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
 1561             p->p_pid, p->p_comm, pp);
 1562 
 1563         KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 1564             ("[pmc,%d weird CPU id %d", __LINE__, cpu));
 1565 
 1566         pc = pmc_pcpu[cpu];
 1567 
 1568         /*
 1569          * When a PMC gets unlinked from a target PMC, it will
 1570          * be removed from the target's pp_pmc[] array.
 1571          *
 1572          * However, on a MP system, the target could have been
 1573          * executing on another CPU at the time of the unlink.
 1574          * So, at context switch OUT time, we need to look at
 1575          * the hardware to determine if a PMC is scheduled on
 1576          * it.
 1577          */
 1578 
 1579         for (ri = 0; ri < md->pmd_npmc; ri++) {
 1580 
 1581                 pcd = pmc_ri_to_classdep(md, ri, &adjri);
 1582                 pm  = NULL;
 1583                 (void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
 1584 
 1585                 if (pm == NULL) /* nothing at this row index */
 1586                         continue;
 1587 
 1588                 mode = PMC_TO_MODE(pm);
 1589                 if (!PMC_IS_VIRTUAL_MODE(mode))
 1590                         continue; /* not a process virtual PMC */
 1591 
 1592                 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
 1593                     ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
 1594                         __LINE__, PMC_TO_ROWINDEX(pm), ri));
 1595 
 1596                 /*
 1597                  * Change desired state, and then stop if not stalled.
 1598                  * This two-step dance should avoid race conditions where
 1599                  * an interrupt re-enables the PMC after this code has
 1600                  * already checked the pm_stalled flag.
 1601                  */
 1602                 pm->pm_pcpu_state[cpu].pps_cpustate = 0;
 1603                 if (pm->pm_pcpu_state[cpu].pps_stalled == 0)
 1604                         pcd->pcd_stop_pmc(cpu, adjri);
 1605 
 1606                 KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
 1607                         ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
 1608                          (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 1609 
 1610                 /* reduce this PMC's runcount */
 1611                 counter_u64_add(pm->pm_runcount, -1);
 1612 
 1613                 /*
 1614                  * If this PMC is associated with this process,
 1615                  * save the reading.
 1616                  */
 1617 
 1618                 if (pm->pm_state != PMC_STATE_DELETED && pp != NULL &&
 1619                     pp->pp_pmcs[ri].pp_pmc != NULL) {
 1620                         KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
 1621                             ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__,
 1622                                 pm, ri, pp->pp_pmcs[ri].pp_pmc));
 1623 
 1624                         KASSERT(pp->pp_refcnt > 0,
 1625                             ("[pmc,%d] pp refcnt = %d", __LINE__,
 1626                                 pp->pp_refcnt));
 1627 
 1628                         pcd->pcd_read_pmc(cpu, adjri, &newvalue);
 1629 
 1630                         if (mode == PMC_MODE_TS) {
 1631                                 PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d val=%jd (samp)",
 1632                                     cpu, ri, newvalue);
 1633 
 1634                                 if (pt == NULL)
 1635                                         pt = pmc_find_thread_descriptor(pp, td,
 1636                                             PMC_FLAG_NONE);
 1637 
 1638                                 KASSERT(pt != NULL,
 1639                                     ("[pmc,%d] No thread found for td=%p",
 1640                                     __LINE__, td));
 1641 
 1642                                 mtx_pool_lock_spin(pmc_mtxpool, pm);
 1643 
 1644                                 /*
 1645                                  * If we have a thread descriptor, save the
 1646                                  * per-thread counter in the descriptor. If not,
 1647                                  * we will update the per-process counter.
 1648                                  *
 1649                                  * TODO: Remove the per-process "safety net"
 1650                                  * once we have thoroughly tested that we
 1651                                  * don't hit the above assert.
 1652                                  */
 1653                                 if (pt != NULL)
 1654                                         pt->pt_pmcs[ri].pt_pmcval = newvalue;
 1655                                 else {
 1656                                         /*
 1657                                          * For sampling process-virtual PMCs,
 1658                                          * newvalue is the number of events to
 1659                                          * be seen until the next sampling
 1660                                          * interrupt. We can just add the events
 1661                                          * left from this invocation to the
 1662                                          * counter, then adjust in case we
 1663                                          * overflow our range.
 1664                                          *
 1665                                          * (Recall that we reload the counter
 1666                                          * every time we use it.)
 1667                                          */
 1668                                         pp->pp_pmcs[ri].pp_pmcval += newvalue;
 1669                                         if (pp->pp_pmcs[ri].pp_pmcval >
 1670                                             pm->pm_sc.pm_reloadcount)
 1671                                                 pp->pp_pmcs[ri].pp_pmcval -=
 1672                                                     pm->pm_sc.pm_reloadcount;
 1673                                 }
 1674                                 mtx_pool_unlock_spin(pmc_mtxpool, pm);
 1675                         } else {
 1676                                 tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
 1677 
 1678                                 PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (count)",
 1679                                     cpu, ri, tmp);
 1680 
 1681                                 /*
 1682                                  * For counting process-virtual PMCs,
 1683                                  * we expect the count to be
 1684                                  * increasing monotonically, modulo a 64
 1685                                  * bit wraparound.
 1686                                  */
 1687                                 KASSERT(tmp >= 0,
 1688                                     ("[pmc,%d] negative increment cpu=%d "
 1689                                      "ri=%d newvalue=%jx saved=%jx "
 1690                                      "incr=%jx", __LINE__, cpu, ri,
 1691                                      newvalue, PMC_PCPU_SAVED(cpu,ri), tmp));
 1692 
 1693                                 mtx_pool_lock_spin(pmc_mtxpool, pm);
 1694                                 pm->pm_gv.pm_savedvalue += tmp;
 1695                                 pp->pp_pmcs[ri].pp_pmcval += tmp;
 1696                                 mtx_pool_unlock_spin(pmc_mtxpool, pm);
 1697 
 1698                                 if (pm->pm_flags & PMC_F_LOG_PROCCSW)
 1699                                         pmclog_process_proccsw(pm, pp, tmp, td);
 1700                         }
 1701                 }
 1702 
 1703                 /* mark hardware as free */
 1704                 pcd->pcd_config_pmc(cpu, adjri, NULL);
 1705         }
 1706 
 1707         /*
 1708          * perform any other architecture/cpu dependent thread
 1709          * switch out functions.
 1710          */
 1711 
 1712         (void) (*md->pmd_switch_out)(pc, pp);
 1713 
 1714         critical_exit();
 1715 }
 1716 
 1717 /*
 1718  * A new thread for a process.
 1719  */
 1720 static void
 1721 pmc_process_thread_add(struct thread *td)
 1722 {
 1723         struct pmc_process *pmc;
 1724 
 1725         pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE);
 1726         if (pmc != NULL)
 1727                 pmc_find_thread_descriptor(pmc, td, PMC_FLAG_ALLOCATE);
 1728 }
 1729 
 1730 /*
 1731  * A thread delete for a process.
 1732  */
 1733 static void
 1734 pmc_process_thread_delete(struct thread *td)
 1735 {
 1736         struct pmc_process *pmc;
 1737 
 1738         pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE);
 1739         if (pmc != NULL)
 1740                 pmc_thread_descriptor_pool_free(pmc_find_thread_descriptor(pmc,
 1741                     td, PMC_FLAG_REMOVE));
 1742 }
 1743 
 1744 /*
 1745  * A userret() call for a thread.
 1746  */
 1747 static void
 1748 pmc_process_thread_userret(struct thread *td)
 1749 {
 1750         sched_pin();
 1751         pmc_capture_user_callchain(curcpu, PMC_UR, td->td_frame);
 1752         sched_unpin();
 1753 }
 1754 
 1755 /*
 1756  * A mapping change for a process.
 1757  */
 1758 
 1759 static void
 1760 pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm)
 1761 {
 1762         int ri;
 1763         pid_t pid;
 1764         char *fullpath, *freepath;
 1765         const struct pmc *pm;
 1766         struct pmc_owner *po;
 1767         const struct pmc_process *pp;
 1768 
 1769         freepath = fullpath = NULL;
 1770         MPASS(!in_epoch(global_epoch_preempt));
 1771         pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
 1772 
 1773         pid = td->td_proc->p_pid;
 1774 
 1775         PMC_EPOCH_ENTER();
 1776         /* Inform owners of all system-wide sampling PMCs. */
 1777         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 1778             if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 1779                         pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
 1780 
 1781         if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 1782                 goto done;
 1783 
 1784         /*
 1785          * Inform sampling PMC owners tracking this process.
 1786          */
 1787         for (ri = 0; ri < md->pmd_npmc; ri++)
 1788                 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
 1789                     PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 1790                         pmclog_process_map_in(pm->pm_owner,
 1791                             pid, pkm->pm_address, fullpath);
 1792 
 1793   done:
 1794         if (freepath)
 1795                 free(freepath, M_TEMP);
 1796         PMC_EPOCH_EXIT();
 1797 }
 1798 
 1799 
 1800 /*
 1801  * Log an munmap request.
 1802  */
 1803 
 1804 static void
 1805 pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
 1806 {
 1807         int ri;
 1808         pid_t pid;
 1809         struct pmc_owner *po;
 1810         const struct pmc *pm;
 1811         const struct pmc_process *pp;
 1812 
 1813         pid = td->td_proc->p_pid;
 1814 
 1815         PMC_EPOCH_ENTER();
 1816         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 1817             if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 1818                 pmclog_process_map_out(po, pid, pkm->pm_address,
 1819                     pkm->pm_address + pkm->pm_size);
 1820         PMC_EPOCH_EXIT();
 1821 
 1822         if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 1823                 return;
 1824 
 1825         for (ri = 0; ri < md->pmd_npmc; ri++)
 1826                 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
 1827                     PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 1828                         pmclog_process_map_out(pm->pm_owner, pid,
 1829                             pkm->pm_address, pkm->pm_address + pkm->pm_size);
 1830 }
 1831 
 1832 /*
 1833  * Log mapping information about the kernel.
 1834  */
 1835 
 1836 static void
 1837 pmc_log_kernel_mappings(struct pmc *pm)
 1838 {
 1839         struct pmc_owner *po;
 1840         struct pmckern_map_in *km, *kmbase;
 1841 
 1842         MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx));
 1843         KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
 1844             ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
 1845                 __LINE__, (void *) pm));
 1846 
 1847         po = pm->pm_owner;
 1848 
 1849         if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE)
 1850                 return;
 1851         if (PMC_TO_MODE(pm) == PMC_MODE_SS)
 1852                 pmc_process_allproc(pm);
 1853         /*
 1854          * Log the current set of kernel modules.
 1855          */
 1856         kmbase = linker_hwpmc_list_objects();
 1857         for (km = kmbase; km->pm_file != NULL; km++) {
 1858                 PMCDBG2(LOG,REG,1,"%s %p", (char *) km->pm_file,
 1859                     (void *) km->pm_address);
 1860                 pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
 1861                     km->pm_file);
 1862         }
 1863         free(kmbase, M_LINKER);
 1864 
 1865         po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE;
 1866 }
 1867 
 1868 /*
 1869  * Log the mappings for a single process.
 1870  */
 1871 
 1872 static void
 1873 pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
 1874 {
 1875         vm_map_t map;
 1876         struct vnode *vp;
 1877         struct vmspace *vm;
 1878         vm_map_entry_t entry;
 1879         vm_offset_t last_end;
 1880         u_int last_timestamp;
 1881         struct vnode *last_vp;
 1882         vm_offset_t start_addr;
 1883         vm_object_t obj, lobj, tobj;
 1884         char *fullpath, *freepath;
 1885 
 1886         last_vp = NULL;
 1887         last_end = (vm_offset_t) 0;
 1888         fullpath = freepath = NULL;
 1889 
 1890         if ((vm = vmspace_acquire_ref(p)) == NULL)
 1891                 return;
 1892 
 1893         map = &vm->vm_map;
 1894         vm_map_lock_read(map);
 1895 
 1896         VM_MAP_ENTRY_FOREACH(entry, map) {
 1897 
 1898                 if (entry == NULL) {
 1899                         PMCDBG2(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly "
 1900                             "NULL! pid=%d vm_map=%p\n", p->p_pid, map);
 1901                         break;
 1902                 }
 1903 
 1904                 /*
 1905                  * We only care about executable map entries.
 1906                  */
 1907                 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
 1908                     !(entry->protection & VM_PROT_EXECUTE) ||
 1909                     (entry->object.vm_object == NULL)) {
 1910                         continue;
 1911                 }
 1912 
 1913                 obj = entry->object.vm_object;
 1914                 VM_OBJECT_RLOCK(obj);
 1915 
 1916                 /* 
 1917                  * Walk the backing_object list to find the base
 1918                  * (non-shadowed) vm_object.
 1919                  */
 1920                 for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
 1921                         if (tobj != obj)
 1922                                 VM_OBJECT_RLOCK(tobj);
 1923                         if (lobj != obj)
 1924                                 VM_OBJECT_RUNLOCK(lobj);
 1925                         lobj = tobj;
 1926                 }
 1927 
 1928                 /*
 1929                  * At this point lobj is the base vm_object and it is locked.
 1930                  */
 1931                 if (lobj == NULL) {
 1932                         PMCDBG3(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d "
 1933                             "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj);
 1934                         VM_OBJECT_RUNLOCK(obj);
 1935                         continue;
 1936                 }
 1937 
 1938                 vp = vm_object_vnode(lobj);
 1939                 if (vp == NULL) {
 1940                         if (lobj != obj)
 1941                                 VM_OBJECT_RUNLOCK(lobj);
 1942                         VM_OBJECT_RUNLOCK(obj);
 1943                         continue;
 1944                 }
 1945 
 1946                 /*
 1947                  * Skip contiguous regions that point to the same
 1948                  * vnode, so we don't emit redundant MAP-IN
 1949                  * directives.
 1950                  */
 1951                 if (entry->start == last_end && vp == last_vp) {
 1952                         last_end = entry->end;
 1953                         if (lobj != obj)
 1954                                 VM_OBJECT_RUNLOCK(lobj);
 1955                         VM_OBJECT_RUNLOCK(obj);
 1956                         continue;
 1957                 }
 1958 
 1959                 /* 
 1960                  * We don't want to keep the proc's vm_map or this
 1961                  * vm_object locked while we walk the pathname, since
 1962                  * vn_fullpath() can sleep.  However, if we drop the
 1963                  * lock, it's possible for concurrent activity to
 1964                  * modify the vm_map list.  To protect against this,
 1965                  * we save the vm_map timestamp before we release the
 1966                  * lock, and check it after we reacquire the lock
 1967                  * below.
 1968                  */
 1969                 start_addr = entry->start;
 1970                 last_end = entry->end;
 1971                 last_timestamp = map->timestamp;
 1972                 vm_map_unlock_read(map);
 1973 
 1974                 vref(vp);
 1975                 if (lobj != obj)
 1976                         VM_OBJECT_RUNLOCK(lobj);
 1977 
 1978                 VM_OBJECT_RUNLOCK(obj);
 1979 
 1980                 freepath = NULL;
 1981                 pmc_getfilename(vp, &fullpath, &freepath);
 1982                 last_vp = vp;
 1983 
 1984                 vrele(vp);
 1985 
 1986                 vp = NULL;
 1987                 pmclog_process_map_in(po, p->p_pid, start_addr, fullpath);
 1988                 if (freepath)
 1989                         free(freepath, M_TEMP);
 1990 
 1991                 vm_map_lock_read(map);
 1992 
 1993                 /*
 1994                  * If our saved timestamp doesn't match, this means
 1995                  * that the vm_map was modified out from under us and
 1996                  * we can't trust our current "entry" pointer.  Do a
 1997                  * new lookup for this entry.  If there is no entry
 1998                  * for this address range, vm_map_lookup_entry() will
 1999                  * return the previous one, so we always want to go to
 2000                  * the next entry on the next loop iteration.
 2001                  * 
 2002                  * There is an edge condition here that can occur if
 2003                  * there is no entry at or before this address.  In
 2004                  * this situation, vm_map_lookup_entry returns
 2005                  * &map->header, which would cause our loop to abort
 2006                  * without processing the rest of the map.  However,
 2007                  * in practice this will never happen for process
 2008                  * vm_map.  This is because the executable's text
 2009                  * segment is the first mapping in the proc's address
 2010                  * space, and this mapping is never removed until the
 2011                  * process exits, so there will always be a non-header
 2012                  * entry at or before the requested address for
 2013                  * vm_map_lookup_entry to return.
 2014                  */
 2015                 if (map->timestamp != last_timestamp)
 2016                         vm_map_lookup_entry(map, last_end - 1, &entry);
 2017         }
 2018 
 2019         vm_map_unlock_read(map);
 2020         vmspace_free(vm);
 2021         return;
 2022 }
 2023 
 2024 /*
 2025  * Log mappings for all processes in the system.
 2026  */
 2027 
 2028 static void
 2029 pmc_log_all_process_mappings(struct pmc_owner *po)
 2030 {
 2031         struct proc *p, *top;
 2032 
 2033         sx_assert(&pmc_sx, SX_XLOCKED);
 2034 
 2035         if ((p = pfind(1)) == NULL)
 2036                 panic("[pmc,%d] Cannot find init", __LINE__);
 2037 
 2038         PROC_UNLOCK(p);
 2039 
 2040         sx_slock(&proctree_lock);
 2041 
 2042         top = p;
 2043 
 2044         for (;;) {
 2045                 pmc_log_process_mappings(po, p);
 2046                 if (!LIST_EMPTY(&p->p_children))
 2047                         p = LIST_FIRST(&p->p_children);
 2048                 else for (;;) {
 2049                         if (p == top)
 2050                                 goto done;
 2051                         if (LIST_NEXT(p, p_sibling)) {
 2052                                 p = LIST_NEXT(p, p_sibling);
 2053                                 break;
 2054                         }
 2055                         p = p->p_pptr;
 2056                 }
 2057         }
 2058  done:
 2059         sx_sunlock(&proctree_lock);
 2060 }
 2061 
 2062 /*
 2063  * The 'hook' invoked from the kernel proper
 2064  */
 2065 
 2066 
 2067 #ifdef  HWPMC_DEBUG
 2068 const char *pmc_hooknames[] = {
 2069         /* these strings correspond to PMC_FN_* in <sys/pmckern.h> */
 2070         "",
 2071         "EXEC",
 2072         "CSW-IN",
 2073         "CSW-OUT",
 2074         "SAMPLE",
 2075         "UNUSED1",
 2076         "UNUSED2",
 2077         "MMAP",
 2078         "MUNMAP",
 2079         "CALLCHAIN-NMI",
 2080         "CALLCHAIN-SOFT",
 2081         "SOFTSAMPLING",
 2082         "THR-CREATE",
 2083         "THR-EXIT",
 2084         "THR-USERRET",
 2085         "THR-CREATE-LOG",
 2086         "THR-EXIT-LOG",
 2087         "PROC-CREATE-LOG"
 2088 };
 2089 #endif
 2090 
 2091 static int
 2092 pmc_hook_handler(struct thread *td, int function, void *arg)
 2093 {
 2094         int cpu;
 2095 
 2096         PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function,
 2097             pmc_hooknames[function], arg);
 2098 
 2099         switch (function)
 2100         {
 2101 
 2102         /*
 2103          * Process exec()
 2104          */
 2105 
 2106         case PMC_FN_PROCESS_EXEC:
 2107         {
 2108                 char *fullpath, *freepath;
 2109                 unsigned int ri;
 2110                 int is_using_hwpmcs;
 2111                 struct pmc *pm;
 2112                 struct proc *p;
 2113                 struct pmc_owner *po;
 2114                 struct pmc_process *pp;
 2115                 struct pmckern_procexec *pk;
 2116 
 2117                 sx_assert(&pmc_sx, SX_XLOCKED);
 2118 
 2119                 p = td->td_proc;
 2120                 pmc_getfilename(p->p_textvp, &fullpath, &freepath);
 2121 
 2122                 pk = (struct pmckern_procexec *) arg;
 2123 
 2124                 PMC_EPOCH_ENTER();
 2125                 /* Inform owners of SS mode PMCs of the exec event. */
 2126                 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 2127                     if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 2128                             pmclog_process_procexec(po, PMC_ID_INVALID,
 2129                                 p->p_pid, pk->pm_entryaddr, fullpath);
 2130                 PMC_EPOCH_EXIT();
 2131 
 2132                 PROC_LOCK(p);
 2133                 is_using_hwpmcs = p->p_flag & P_HWPMC;
 2134                 PROC_UNLOCK(p);
 2135 
 2136                 if (!is_using_hwpmcs) {
 2137                         if (freepath)
 2138                                 free(freepath, M_TEMP);
 2139                         break;
 2140                 }
 2141 
 2142                 /*
 2143                  * PMCs are not inherited across an exec():  remove any
 2144                  * PMCs that this process is the owner of.
 2145                  */
 2146 
 2147                 if ((po = pmc_find_owner_descriptor(p)) != NULL) {
 2148                         pmc_remove_owner(po);
 2149                         pmc_destroy_owner_descriptor(po);
 2150                 }
 2151 
 2152                 /*
 2153                  * If the process being exec'ed is not the target of any
 2154                  * PMC, we are done.
 2155                  */
 2156                 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) {
 2157                         if (freepath)
 2158                                 free(freepath, M_TEMP);
 2159                         break;
 2160                 }
 2161 
 2162                 /*
 2163                  * Log the exec event to all monitoring owners.  Skip
 2164                  * owners who have already received the event because
 2165                  * they had system sampling PMCs active.
 2166                  */
 2167                 for (ri = 0; ri < md->pmd_npmc; ri++)
 2168                         if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
 2169                                 po = pm->pm_owner;
 2170                                 if (po->po_sscount == 0 &&
 2171                                     po->po_flags & PMC_PO_OWNS_LOGFILE)
 2172                                         pmclog_process_procexec(po, pm->pm_id,
 2173                                             p->p_pid, pk->pm_entryaddr,
 2174                                             fullpath);
 2175                         }
 2176 
 2177                 if (freepath)
 2178                         free(freepath, M_TEMP);
 2179 
 2180 
 2181                 PMCDBG4(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d",
 2182                     p, p->p_pid, p->p_comm, pk->pm_credentialschanged);
 2183 
 2184                 if (pk->pm_credentialschanged == 0) /* no change */
 2185                         break;
 2186 
 2187                 /*
 2188                  * If the newly exec()'ed process has a different credential
 2189                  * than before, allow it to be the target of a PMC only if
 2190                  * the PMC's owner has sufficient privilege.
 2191                  */
 2192 
 2193                 for (ri = 0; ri < md->pmd_npmc; ri++)
 2194                         if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL)
 2195                                 if (pmc_can_attach(pm, td->td_proc) != 0)
 2196                                         pmc_detach_one_process(td->td_proc,
 2197                                             pm, PMC_FLAG_NONE);
 2198 
 2199                 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
 2200                     ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__,
 2201                         pp->pp_refcnt, pp));
 2202 
 2203                 /*
 2204                  * If this process is no longer the target of any
 2205                  * PMCs, we can remove the process entry and free
 2206                  * up space.
 2207                  */
 2208 
 2209                 if (pp->pp_refcnt == 0) {
 2210                         pmc_remove_process_descriptor(pp);
 2211                         pmc_destroy_process_descriptor(pp);
 2212                         break;
 2213                 }
 2214 
 2215         }
 2216         break;
 2217 
 2218         case PMC_FN_CSW_IN:
 2219                 pmc_process_csw_in(td);
 2220                 break;
 2221 
 2222         case PMC_FN_CSW_OUT:
 2223                 pmc_process_csw_out(td);
 2224                 break;
 2225 
 2226         /*
 2227          * Process accumulated PC samples.
 2228          *
 2229          * This function is expected to be called by hardclock() for
 2230          * each CPU that has accumulated PC samples.
 2231          *
 2232          * This function is to be executed on the CPU whose samples
 2233          * are being processed.
 2234          */
 2235         case PMC_FN_DO_SAMPLES:
 2236 
 2237                 /*
 2238                  * Clear the cpu specific bit in the CPU mask before
 2239                  * do the rest of the processing.  If the NMI handler
 2240                  * gets invoked after the "atomic_clear_int()" call
 2241                  * below but before "pmc_process_samples()" gets
 2242                  * around to processing the interrupt, then we will
 2243                  * come back here at the next hardclock() tick (and
 2244                  * may find nothing to do if "pmc_process_samples()"
 2245                  * had already processed the interrupt).  We don't
 2246                  * lose the interrupt sample.
 2247                  */
 2248                 DPCPU_SET(pmc_sampled, 0);
 2249                 cpu = PCPU_GET(cpuid);
 2250                 pmc_process_samples(cpu, PMC_HR);
 2251                 pmc_process_samples(cpu, PMC_SR);
 2252                 pmc_process_samples(cpu, PMC_UR);
 2253                 break;
 2254 
 2255         case PMC_FN_MMAP:
 2256                 pmc_process_mmap(td, (struct pmckern_map_in *) arg);
 2257                 break;
 2258 
 2259         case PMC_FN_MUNMAP:
 2260                 MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx));
 2261                 pmc_process_munmap(td, (struct pmckern_map_out *) arg);
 2262                 break;
 2263 
 2264         case PMC_FN_PROC_CREATE_LOG:
 2265                 pmc_process_proccreate((struct proc *)arg);
 2266                 break;
 2267 
 2268         case PMC_FN_USER_CALLCHAIN:
 2269                 /*
 2270                  * Record a call chain.
 2271                  */
 2272                 KASSERT(td == curthread, ("[pmc,%d] td != curthread",
 2273                     __LINE__));
 2274 
 2275                 pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR,
 2276                     (struct trapframe *) arg);
 2277 
 2278                 KASSERT(td->td_pinned == 1,
 2279                         ("[pmc,%d] invalid td_pinned value", __LINE__));
 2280                 sched_unpin();  /* Can migrate safely now. */
 2281 
 2282                 td->td_pflags &= ~TDP_CALLCHAIN;
 2283                 break;
 2284 
 2285         case PMC_FN_USER_CALLCHAIN_SOFT:
 2286                 /*
 2287                  * Record a call chain.
 2288                  */
 2289                 KASSERT(td == curthread, ("[pmc,%d] td != curthread",
 2290                     __LINE__));
 2291 
 2292                 cpu = PCPU_GET(cpuid);
 2293                 pmc_capture_user_callchain(cpu, PMC_SR,
 2294                     (struct trapframe *) arg);
 2295 
 2296                 KASSERT(td->td_pinned == 1,
 2297                     ("[pmc,%d] invalid td_pinned value", __LINE__));
 2298 
 2299                 sched_unpin();  /* Can migrate safely now. */
 2300 
 2301                 td->td_pflags &= ~TDP_CALLCHAIN;
 2302                 break;
 2303 
 2304         case PMC_FN_SOFT_SAMPLING:
 2305                 /*
 2306                  * Call soft PMC sampling intr.
 2307                  */
 2308                 pmc_soft_intr((struct pmckern_soft *) arg);
 2309                 break;
 2310 
 2311         case PMC_FN_THR_CREATE:
 2312                 pmc_process_thread_add(td);
 2313                 pmc_process_threadcreate(td);
 2314                 break;
 2315 
 2316         case PMC_FN_THR_CREATE_LOG:
 2317                 pmc_process_threadcreate(td);
 2318                 break;
 2319 
 2320         case PMC_FN_THR_EXIT:
 2321                 KASSERT(td == curthread, ("[pmc,%d] td != curthread",
 2322                     __LINE__));
 2323                 pmc_process_thread_delete(td);
 2324                 pmc_process_threadexit(td);
 2325                 break;
 2326         case PMC_FN_THR_EXIT_LOG:
 2327                 pmc_process_threadexit(td);
 2328                 break;
 2329         case PMC_FN_THR_USERRET:
 2330                 KASSERT(td == curthread, ("[pmc,%d] td != curthread",
 2331                     __LINE__));
 2332                 pmc_process_thread_userret(td);
 2333                 break;
 2334 
 2335         default:
 2336 #ifdef  HWPMC_DEBUG
 2337                 KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
 2338 #endif
 2339                 break;
 2340 
 2341         }
 2342 
 2343         return 0;
 2344 }
 2345 
 2346 /*
 2347  * allocate a 'struct pmc_owner' descriptor in the owner hash table.
 2348  */
 2349 
 2350 static struct pmc_owner *
 2351 pmc_allocate_owner_descriptor(struct proc *p)
 2352 {
 2353         uint32_t hindex;
 2354         struct pmc_owner *po;
 2355         struct pmc_ownerhash *poh;
 2356 
 2357         hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
 2358         poh = &pmc_ownerhash[hindex];
 2359 
 2360         /* allocate space for N pointers and one descriptor struct */
 2361         po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO);
 2362         po->po_owner = p;
 2363         LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */
 2364 
 2365         TAILQ_INIT(&po->po_logbuffers);
 2366         mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN);
 2367 
 2368         PMCDBG4(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p",
 2369             p, p->p_pid, p->p_comm, po);
 2370 
 2371         return po;
 2372 }
 2373 
 2374 static void
 2375 pmc_destroy_owner_descriptor(struct pmc_owner *po)
 2376 {
 2377 
 2378         PMCDBG4(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)",
 2379             po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm);
 2380 
 2381         mtx_destroy(&po->po_mtx);
 2382         free(po, M_PMC);
 2383 }
 2384 
 2385 /*
 2386  * Allocate a thread descriptor from the free pool.
 2387  *
 2388  * NOTE: This *can* return NULL.
 2389  */
 2390 static struct pmc_thread *
 2391 pmc_thread_descriptor_pool_alloc(void)
 2392 {
 2393         struct pmc_thread *pt;
 2394 
 2395         mtx_lock_spin(&pmc_threadfreelist_mtx);
 2396         if ((pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) {
 2397                 LIST_REMOVE(pt, pt_next);
 2398                 pmc_threadfreelist_entries--;
 2399         }
 2400         mtx_unlock_spin(&pmc_threadfreelist_mtx);
 2401 
 2402         return (pt);
 2403 }
 2404 
 2405 /*
 2406  * Add a thread descriptor to the free pool. We use this instead of free()
 2407  * to maintain a cache of free entries. Additionally, we can safely call
 2408  * this function when we cannot call free(), such as in a critical section.
 2409  * 
 2410  */
 2411 static void
 2412 pmc_thread_descriptor_pool_free(struct pmc_thread *pt)
 2413 {
 2414 
 2415         if (pt == NULL)
 2416                 return;
 2417 
 2418         memset(pt, 0, THREADENTRY_SIZE);
 2419         mtx_lock_spin(&pmc_threadfreelist_mtx);
 2420         LIST_INSERT_HEAD(&pmc_threadfreelist, pt, pt_next);
 2421         pmc_threadfreelist_entries++;
 2422         if (pmc_threadfreelist_entries > pmc_threadfreelist_max)
 2423                 taskqueue_enqueue(taskqueue_fast, &free_task);
 2424         mtx_unlock_spin(&pmc_threadfreelist_mtx);
 2425 }
 2426 
 2427 /*
 2428  * An asynchronous task to manage the free list.
 2429  */
 2430 static void
 2431 pmc_thread_descriptor_pool_free_task(void *arg __unused, int pending __unused)
 2432 {
 2433         struct pmc_thread *pt;
 2434         LIST_HEAD(, pmc_thread) tmplist;
 2435         int delta;
 2436 
 2437         LIST_INIT(&tmplist);
 2438 
 2439         /* Determine what changes, if any, we need to make. */
 2440         mtx_lock_spin(&pmc_threadfreelist_mtx);
 2441         delta = pmc_threadfreelist_entries - pmc_threadfreelist_max;
 2442         while (delta > 0 && (pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) {
 2443                 delta--;
 2444                 pmc_threadfreelist_entries--;
 2445                 LIST_REMOVE(pt, pt_next);
 2446                 LIST_INSERT_HEAD(&tmplist, pt, pt_next);
 2447         }
 2448         mtx_unlock_spin(&pmc_threadfreelist_mtx);
 2449 
 2450         /* If there are entries to free, free them. */
 2451         while (!LIST_EMPTY(&tmplist)) {
 2452                 pt = LIST_FIRST(&tmplist);
 2453                 LIST_REMOVE(pt, pt_next);
 2454                 free(pt, M_PMC);
 2455         }
 2456 }
 2457 
 2458 /*
 2459  * Drain the thread free pool, freeing all allocations.
 2460  */
 2461 static void
 2462 pmc_thread_descriptor_pool_drain(void)
 2463 {
 2464         struct pmc_thread *pt, *next;
 2465 
 2466         LIST_FOREACH_SAFE(pt, &pmc_threadfreelist, pt_next, next) {
 2467                 LIST_REMOVE(pt, pt_next);
 2468                 free(pt, M_PMC);
 2469         }
 2470 }
 2471 
 2472 /*
 2473  * find the descriptor corresponding to thread 'td', adding or removing it
 2474  * as specified by 'mode'.
 2475  *
 2476  * Note that this supports additional mode flags in addition to those
 2477  * supported by pmc_find_process_descriptor():
 2478  * PMC_FLAG_NOWAIT: Causes the function to not wait for mallocs.
 2479  *     This makes it safe to call while holding certain other locks.
 2480  */
 2481 
 2482 static struct pmc_thread *
 2483 pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td,
 2484     uint32_t mode)
 2485 {
 2486         struct pmc_thread *pt = NULL, *ptnew = NULL;
 2487         int wait_flag;
 2488 
 2489         KASSERT(td != NULL, ("[pmc,%d] called to add NULL td", __LINE__));
 2490 
 2491         /*
 2492          * Pre-allocate memory in the PMC_FLAG_ALLOCATE case prior to
 2493          * acquiring the lock.
 2494          */
 2495         if (mode & PMC_FLAG_ALLOCATE) {
 2496                 if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) {
 2497                         wait_flag = M_WAITOK;
 2498                         if ((mode & PMC_FLAG_NOWAIT) || in_epoch(global_epoch_preempt))
 2499                                 wait_flag = M_NOWAIT;
 2500 
 2501                         ptnew = malloc(THREADENTRY_SIZE, M_PMC,
 2502                             wait_flag|M_ZERO);
 2503                 }
 2504         }
 2505 
 2506         mtx_lock_spin(pp->pp_tdslock);
 2507 
 2508         LIST_FOREACH(pt, &pp->pp_tds, pt_next)
 2509                 if (pt->pt_td == td)
 2510                         break;
 2511 
 2512         if ((mode & PMC_FLAG_REMOVE) && pt != NULL)
 2513                 LIST_REMOVE(pt, pt_next);
 2514 
 2515         if ((mode & PMC_FLAG_ALLOCATE) && pt == NULL && ptnew != NULL) {
 2516                 pt = ptnew;
 2517                 ptnew = NULL;
 2518                 pt->pt_td = td;
 2519                 LIST_INSERT_HEAD(&pp->pp_tds, pt, pt_next);
 2520         }
 2521 
 2522         mtx_unlock_spin(pp->pp_tdslock);
 2523 
 2524         if (ptnew != NULL) {
 2525                 free(ptnew, M_PMC);
 2526         }
 2527 
 2528         return pt;
 2529 }
 2530 
 2531 /*
 2532  * Try to add thread descriptors for each thread in a process.
 2533  */
 2534 
 2535 static void
 2536 pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp)
 2537 {
 2538         struct thread *curtd;
 2539         struct pmc_thread **tdlist;
 2540         int i, tdcnt, tdlistsz;
 2541 
 2542         KASSERT(!PROC_LOCKED(p), ("[pmc,%d] proc unexpectedly locked",
 2543             __LINE__));
 2544         tdcnt = 32;
 2545  restart:
 2546         tdlistsz = roundup2(tdcnt, 32);
 2547 
 2548         tdcnt = 0;
 2549         tdlist = malloc(sizeof(struct pmc_thread*) * tdlistsz, M_TEMP, M_WAITOK);
 2550 
 2551         PROC_LOCK(p);
 2552         FOREACH_THREAD_IN_PROC(p, curtd)
 2553                 tdcnt++;
 2554         if (tdcnt >= tdlistsz) {
 2555                 PROC_UNLOCK(p);
 2556                 free(tdlist, M_TEMP);
 2557                 goto restart;
 2558         }
 2559         /*
 2560          * Try to add each thread to the list without sleeping. If unable,
 2561          * add to a queue to retry after dropping the process lock.
 2562          */
 2563         tdcnt = 0;
 2564         FOREACH_THREAD_IN_PROC(p, curtd) {
 2565                 tdlist[tdcnt] = pmc_find_thread_descriptor(pp, curtd,
 2566                                                    PMC_FLAG_ALLOCATE|PMC_FLAG_NOWAIT);
 2567                 if (tdlist[tdcnt] == NULL) {
 2568                         PROC_UNLOCK(p);
 2569                         for (i = 0; i <= tdcnt; i++)
 2570                                 pmc_thread_descriptor_pool_free(tdlist[i]);
 2571                         free(tdlist, M_TEMP);
 2572                         goto restart;
 2573                 }
 2574                 tdcnt++;
 2575         }
 2576         PROC_UNLOCK(p);
 2577         free(tdlist, M_TEMP);
 2578 }
 2579 
 2580 /*
 2581  * find the descriptor corresponding to process 'p', adding or removing it
 2582  * as specified by 'mode'.
 2583  */
 2584 
 2585 static struct pmc_process *
 2586 pmc_find_process_descriptor(struct proc *p, uint32_t mode)
 2587 {
 2588         uint32_t hindex;
 2589         struct pmc_process *pp, *ppnew;
 2590         struct pmc_processhash *pph;
 2591 
 2592         hindex = PMC_HASH_PTR(p, pmc_processhashmask);
 2593         pph = &pmc_processhash[hindex];
 2594 
 2595         ppnew = NULL;
 2596 
 2597         /*
 2598          * Pre-allocate memory in the PMC_FLAG_ALLOCATE case since we
 2599          * cannot call malloc(9) once we hold a spin lock.
 2600          */
 2601         if (mode & PMC_FLAG_ALLOCATE)
 2602                 ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc *
 2603                     sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO);
 2604 
 2605         mtx_lock_spin(&pmc_processhash_mtx);
 2606         LIST_FOREACH(pp, pph, pp_next)
 2607             if (pp->pp_proc == p)
 2608                     break;
 2609 
 2610         if ((mode & PMC_FLAG_REMOVE) && pp != NULL)
 2611                 LIST_REMOVE(pp, pp_next);
 2612 
 2613         if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL &&
 2614             ppnew != NULL) {
 2615                 ppnew->pp_proc = p;
 2616                 LIST_INIT(&ppnew->pp_tds);
 2617                 ppnew->pp_tdslock = mtx_pool_find(pmc_mtxpool, ppnew);
 2618                 LIST_INSERT_HEAD(pph, ppnew, pp_next);
 2619                 mtx_unlock_spin(&pmc_processhash_mtx);
 2620                 pp = ppnew;
 2621                 ppnew = NULL;
 2622 
 2623                 /* Add thread descriptors for this process' current threads. */
 2624                 pmc_add_thread_descriptors_from_proc(p, pp);
 2625         }
 2626         else
 2627                 mtx_unlock_spin(&pmc_processhash_mtx);
 2628 
 2629         if (ppnew != NULL)
 2630                 free(ppnew, M_PMC);
 2631 
 2632         return pp;
 2633 }
 2634 
 2635 /*
 2636  * remove a process descriptor from the process hash table.
 2637  */
 2638 
 2639 static void
 2640 pmc_remove_process_descriptor(struct pmc_process *pp)
 2641 {
 2642         KASSERT(pp->pp_refcnt == 0,
 2643             ("[pmc,%d] Removing process descriptor %p with count %d",
 2644                 __LINE__, pp, pp->pp_refcnt));
 2645 
 2646         mtx_lock_spin(&pmc_processhash_mtx);
 2647         LIST_REMOVE(pp, pp_next);
 2648         mtx_unlock_spin(&pmc_processhash_mtx);
 2649 }
 2650 
 2651 /*
 2652  * destroy a process descriptor.
 2653  */
 2654 
 2655 static void
 2656 pmc_destroy_process_descriptor(struct pmc_process *pp)
 2657 {
 2658         struct pmc_thread *pmc_td;
 2659 
 2660         while ((pmc_td = LIST_FIRST(&pp->pp_tds)) != NULL) {
 2661                 LIST_REMOVE(pmc_td, pt_next);
 2662                 pmc_thread_descriptor_pool_free(pmc_td);
 2663         }
 2664         free(pp, M_PMC);
 2665 }
 2666 
 2667 
 2668 /*
 2669  * find an owner descriptor corresponding to proc 'p'
 2670  */
 2671 
 2672 static struct pmc_owner *
 2673 pmc_find_owner_descriptor(struct proc *p)
 2674 {
 2675         uint32_t hindex;
 2676         struct pmc_owner *po;
 2677         struct pmc_ownerhash *poh;
 2678 
 2679         hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
 2680         poh = &pmc_ownerhash[hindex];
 2681 
 2682         po = NULL;
 2683         LIST_FOREACH(po, poh, po_next)
 2684             if (po->po_owner == p)
 2685                     break;
 2686 
 2687         PMCDBG5(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> "
 2688             "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po);
 2689 
 2690         return po;
 2691 }
 2692 
 2693 /*
 2694  * pmc_allocate_pmc_descriptor
 2695  *
 2696  * Allocate a pmc descriptor and initialize its
 2697  * fields.
 2698  */
 2699 
 2700 static struct pmc *
 2701 pmc_allocate_pmc_descriptor(void)
 2702 {
 2703         struct pmc *pmc;
 2704 
 2705         pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO);
 2706         pmc->pm_runcount = counter_u64_alloc(M_WAITOK);
 2707         pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state)*mp_ncpus, M_PMC, M_WAITOK|M_ZERO);
 2708         PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc);
 2709 
 2710         return pmc;
 2711 }
 2712 
 2713 /*
 2714  * Destroy a pmc descriptor.
 2715  */
 2716 
 2717 static void
 2718 pmc_destroy_pmc_descriptor(struct pmc *pm)
 2719 {
 2720 
 2721         KASSERT(pm->pm_state == PMC_STATE_DELETED ||
 2722             pm->pm_state == PMC_STATE_FREE,
 2723             ("[pmc,%d] destroying non-deleted PMC", __LINE__));
 2724         KASSERT(LIST_EMPTY(&pm->pm_targets),
 2725             ("[pmc,%d] destroying pmc with targets", __LINE__));
 2726         KASSERT(pm->pm_owner == NULL,
 2727             ("[pmc,%d] destroying pmc attached to an owner", __LINE__));
 2728         KASSERT(counter_u64_fetch(pm->pm_runcount) == 0,
 2729             ("[pmc,%d] pmc has non-zero run count %ld", __LINE__,
 2730                  (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 2731 
 2732         counter_u64_free(pm->pm_runcount);
 2733         free(pm->pm_pcpu_state, M_PMC);
 2734         free(pm, M_PMC);
 2735 }
 2736 
 2737 static void
 2738 pmc_wait_for_pmc_idle(struct pmc *pm)
 2739 {
 2740 #ifdef INVARIANTS
 2741         volatile int maxloop;
 2742 
 2743         maxloop = 100 * pmc_cpu_max();
 2744 #endif
 2745         /*
 2746          * Loop (with a forced context switch) till the PMC's runcount
 2747          * comes down to zero.
 2748          */
 2749         pmclog_flush(pm->pm_owner, 1);
 2750         while (counter_u64_fetch(pm->pm_runcount) > 0) {
 2751                 pmclog_flush(pm->pm_owner, 1);
 2752 #ifdef INVARIANTS
 2753                 maxloop--;
 2754                 KASSERT(maxloop > 0,
 2755                     ("[pmc,%d] (ri%d, rc%ld) waiting too long for "
 2756                         "pmc to be free", __LINE__,
 2757                          PMC_TO_ROWINDEX(pm), (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 2758 #endif
 2759                 pmc_force_context_switch();
 2760         }
 2761 }
 2762 
 2763 /*
 2764  * This function does the following things:
 2765  *
 2766  *  - detaches the PMC from hardware
 2767  *  - unlinks all target threads that were attached to it
 2768  *  - removes the PMC from its owner's list
 2769  *  - destroys the PMC private mutex
 2770  *
 2771  * Once this function completes, the given pmc pointer can be freed by
 2772  * calling pmc_destroy_pmc_descriptor().
 2773  */
 2774 
 2775 static void
 2776 pmc_release_pmc_descriptor(struct pmc *pm)
 2777 {
 2778         enum pmc_mode mode;
 2779         struct pmc_hw *phw __diagused;
 2780         u_int adjri, ri, cpu;
 2781         struct pmc_owner *po;
 2782         struct pmc_binding pb;
 2783         struct pmc_process *pp;
 2784         struct pmc_classdep *pcd;
 2785         struct pmc_target *ptgt, *tmp;
 2786 
 2787         sx_assert(&pmc_sx, SX_XLOCKED);
 2788 
 2789         KASSERT(pm, ("[pmc,%d] null pmc", __LINE__));
 2790 
 2791         ri   = PMC_TO_ROWINDEX(pm);
 2792         pcd  = pmc_ri_to_classdep(md, ri, &adjri);
 2793         mode = PMC_TO_MODE(pm);
 2794 
 2795         PMCDBG3(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri,
 2796             mode);
 2797 
 2798         /*
 2799          * First, we take the PMC off hardware.
 2800          */
 2801         cpu = 0;
 2802         if (PMC_IS_SYSTEM_MODE(mode)) {
 2803 
 2804                 /*
 2805                  * A system mode PMC runs on a specific CPU.  Switch
 2806                  * to this CPU and turn hardware off.
 2807                  */
 2808                 pmc_save_cpu_binding(&pb);
 2809 
 2810                 cpu = PMC_TO_CPU(pm);
 2811 
 2812                 pmc_select_cpu(cpu);
 2813 
 2814                 /* switch off non-stalled CPUs */
 2815                 pm->pm_pcpu_state[cpu].pps_cpustate = 0;
 2816                 if (pm->pm_state == PMC_STATE_RUNNING &&
 2817                         pm->pm_pcpu_state[cpu].pps_stalled == 0) {
 2818 
 2819                         phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
 2820 
 2821                         KASSERT(phw->phw_pmc == pm,
 2822                             ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)",
 2823                                 __LINE__, ri, phw->phw_pmc, pm));
 2824                         PMCDBG2(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri);
 2825 
 2826                         critical_enter();
 2827                         pcd->pcd_stop_pmc(cpu, adjri);
 2828                         critical_exit();
 2829                 }
 2830 
 2831                 PMCDBG2(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri);
 2832 
 2833                 critical_enter();
 2834                 pcd->pcd_config_pmc(cpu, adjri, NULL);
 2835                 critical_exit();
 2836 
 2837                 /* adjust the global and process count of SS mode PMCs */
 2838                 if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) {
 2839                         po = pm->pm_owner;
 2840                         po->po_sscount--;
 2841                         if (po->po_sscount == 0) {
 2842                                 atomic_subtract_rel_int(&pmc_ss_count, 1);
 2843                                 CK_LIST_REMOVE(po, po_ssnext);
 2844                                 epoch_wait_preempt(global_epoch_preempt);
 2845                         }
 2846                 }
 2847 
 2848                 pm->pm_state = PMC_STATE_DELETED;
 2849 
 2850                 pmc_restore_cpu_binding(&pb);
 2851 
 2852                 /*
 2853                  * We could have references to this PMC structure in
 2854                  * the per-cpu sample queues.  Wait for the queue to
 2855                  * drain.
 2856                  */
 2857                 pmc_wait_for_pmc_idle(pm);
 2858 
 2859         } else if (PMC_IS_VIRTUAL_MODE(mode)) {
 2860 
 2861                 /*
 2862                  * A virtual PMC could be running on multiple CPUs at
 2863                  * a given instant.
 2864                  *
 2865                  * By marking its state as DELETED, we ensure that
 2866                  * this PMC is never further scheduled on hardware.
 2867                  *
 2868                  * Then we wait till all CPUs are done with this PMC.
 2869                  */
 2870                 pm->pm_state = PMC_STATE_DELETED;
 2871 
 2872 
 2873                 /* Wait for the PMCs runcount to come to zero. */
 2874                 pmc_wait_for_pmc_idle(pm);
 2875 
 2876                 /*
 2877                  * At this point the PMC is off all CPUs and cannot be
 2878                  * freshly scheduled onto a CPU.  It is now safe to
 2879                  * unlink all targets from this PMC.  If a
 2880                  * process-record's refcount falls to zero, we remove
 2881                  * it from the hash table.  The module-wide SX lock
 2882                  * protects us from races.
 2883                  */
 2884                 LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) {
 2885                         pp = ptgt->pt_process;
 2886                         pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */
 2887 
 2888                         PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt);
 2889 
 2890                         /*
 2891                          * If the target process record shows that no
 2892                          * PMCs are attached to it, reclaim its space.
 2893                          */
 2894 
 2895                         if (pp->pp_refcnt == 0) {
 2896                                 pmc_remove_process_descriptor(pp);
 2897                                 pmc_destroy_process_descriptor(pp);
 2898                         }
 2899                 }
 2900 
 2901                 cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */
 2902 
 2903         }
 2904 
 2905         /*
 2906          * Release any MD resources
 2907          */
 2908         (void) pcd->pcd_release_pmc(cpu, adjri, pm);
 2909 
 2910         /*
 2911          * Update row disposition
 2912          */
 2913 
 2914         if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm)))
 2915                 PMC_UNMARK_ROW_STANDALONE(ri);
 2916         else
 2917                 PMC_UNMARK_ROW_THREAD(ri);
 2918 
 2919         /* unlink from the owner's list */
 2920         if (pm->pm_owner) {
 2921                 LIST_REMOVE(pm, pm_next);
 2922                 pm->pm_owner = NULL;
 2923         }
 2924 }
 2925 
 2926 /*
 2927  * Register an owner and a pmc.
 2928  */
 2929 
 2930 static int
 2931 pmc_register_owner(struct proc *p, struct pmc *pmc)
 2932 {
 2933         struct pmc_owner *po;
 2934 
 2935         sx_assert(&pmc_sx, SX_XLOCKED);
 2936 
 2937         if ((po = pmc_find_owner_descriptor(p)) == NULL)
 2938                 if ((po = pmc_allocate_owner_descriptor(p)) == NULL)
 2939                         return ENOMEM;
 2940 
 2941         KASSERT(pmc->pm_owner == NULL,
 2942             ("[pmc,%d] attempting to own an initialized PMC", __LINE__));
 2943         pmc->pm_owner  = po;
 2944 
 2945         LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next);
 2946 
 2947         PROC_LOCK(p);
 2948         p->p_flag |= P_HWPMC;
 2949         PROC_UNLOCK(p);
 2950 
 2951         if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 2952                 pmclog_process_pmcallocate(pmc);
 2953 
 2954         PMCDBG2(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p",
 2955             po, pmc);
 2956 
 2957         return 0;
 2958 }
 2959 
 2960 /*
 2961  * Return the current row disposition:
 2962  * == 0 => FREE
 2963  *  > 0 => PROCESS MODE
 2964  *  < 0 => SYSTEM MODE
 2965  */
 2966 
 2967 int
 2968 pmc_getrowdisp(int ri)
 2969 {
 2970         return pmc_pmcdisp[ri];
 2971 }
 2972 
 2973 /*
 2974  * Check if a PMC at row index 'ri' can be allocated to the current
 2975  * process.
 2976  *
 2977  * Allocation can fail if:
 2978  *   - the current process is already being profiled by a PMC at index 'ri',
 2979  *     attached to it via OP_PMCATTACH.
 2980  *   - the current process has already allocated a PMC at index 'ri'
 2981  *     via OP_ALLOCATE.
 2982  */
 2983 
 2984 static int
 2985 pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu)
 2986 {
 2987         enum pmc_mode mode;
 2988         struct pmc *pm;
 2989         struct pmc_owner *po;
 2990         struct pmc_process *pp;
 2991 
 2992         PMCDBG5(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d "
 2993             "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu);
 2994 
 2995         /*
 2996          * We shouldn't have already allocated a process-mode PMC at
 2997          * row index 'ri'.
 2998          *
 2999          * We shouldn't have allocated a system-wide PMC on the same
 3000          * CPU and same RI.
 3001          */
 3002         if ((po = pmc_find_owner_descriptor(p)) != NULL)
 3003                 LIST_FOREACH(pm, &po->po_pmcs, pm_next) {
 3004                     if (PMC_TO_ROWINDEX(pm) == ri) {
 3005                             mode = PMC_TO_MODE(pm);
 3006                             if (PMC_IS_VIRTUAL_MODE(mode))
 3007                                     return EEXIST;
 3008                             if (PMC_IS_SYSTEM_MODE(mode) &&
 3009                                 (int) PMC_TO_CPU(pm) == cpu)
 3010                                     return EEXIST;
 3011                     }
 3012                 }
 3013 
 3014         /*
 3015          * We also shouldn't be the target of any PMC at this index
 3016          * since otherwise a PMC_ATTACH to ourselves will fail.
 3017          */
 3018         if ((pp = pmc_find_process_descriptor(p, 0)) != NULL)
 3019                 if (pp->pp_pmcs[ri].pp_pmc)
 3020                         return EEXIST;
 3021 
 3022         PMCDBG4(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok",
 3023             p, p->p_pid, p->p_comm, ri);
 3024 
 3025         return 0;
 3026 }
 3027 
 3028 /*
 3029  * Check if a given PMC at row index 'ri' can be currently used in
 3030  * mode 'mode'.
 3031  */
 3032 
 3033 static int
 3034 pmc_can_allocate_row(int ri, enum pmc_mode mode)
 3035 {
 3036         enum pmc_disp   disp;
 3037 
 3038         sx_assert(&pmc_sx, SX_XLOCKED);
 3039 
 3040         PMCDBG2(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode);
 3041 
 3042         if (PMC_IS_SYSTEM_MODE(mode))
 3043                 disp = PMC_DISP_STANDALONE;
 3044         else
 3045                 disp = PMC_DISP_THREAD;
 3046 
 3047         /*
 3048          * check disposition for PMC row 'ri':
 3049          *
 3050          * Expected disposition         Row-disposition         Result
 3051          *
 3052          * STANDALONE                   STANDALONE or FREE      proceed
 3053          * STANDALONE                   THREAD                  fail
 3054          * THREAD                       THREAD or FREE          proceed
 3055          * THREAD                       STANDALONE              fail
 3056          */
 3057 
 3058         if (!PMC_ROW_DISP_IS_FREE(ri) &&
 3059             !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) &&
 3060             !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri)))
 3061                 return EBUSY;
 3062 
 3063         /*
 3064          * All OK
 3065          */
 3066 
 3067         PMCDBG2(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode);
 3068 
 3069         return 0;
 3070 
 3071 }
 3072 
 3073 /*
 3074  * Find a PMC descriptor with user handle 'pmcid' for thread 'td'.
 3075  */
 3076 
 3077 static struct pmc *
 3078 pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid)
 3079 {
 3080         struct pmc *pm;
 3081 
 3082         KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc,
 3083             ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__,
 3084                 PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc));
 3085 
 3086         LIST_FOREACH(pm, &po->po_pmcs, pm_next)
 3087             if (pm->pm_id == pmcid)
 3088                     return pm;
 3089 
 3090         return NULL;
 3091 }
 3092 
 3093 static int
 3094 pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc)
 3095 {
 3096 
 3097         struct pmc *pm, *opm;
 3098         struct pmc_owner *po;
 3099         struct pmc_process *pp;
 3100 
 3101         PMCDBG1(PMC,FND,1, "find-pmc id=%d", pmcid);
 3102         if (PMC_ID_TO_ROWINDEX(pmcid) >= md->pmd_npmc)
 3103                 return (EINVAL);
 3104 
 3105         if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) {
 3106                 /*
 3107                  * In case of PMC_F_DESCENDANTS child processes we will not find
 3108                  * the current process in the owners hash list.  Find the owner
 3109                  * process first and from there lookup the po.
 3110                  */
 3111                 if ((pp = pmc_find_process_descriptor(curthread->td_proc,
 3112                     PMC_FLAG_NONE)) == NULL) {
 3113                         return ESRCH;
 3114                 } else {
 3115                         opm = pp->pp_pmcs[PMC_ID_TO_ROWINDEX(pmcid)].pp_pmc;
 3116                         if (opm == NULL)
 3117                                 return ESRCH;
 3118                         if ((opm->pm_flags & (PMC_F_ATTACHED_TO_OWNER|
 3119                             PMC_F_DESCENDANTS)) != (PMC_F_ATTACHED_TO_OWNER|
 3120                             PMC_F_DESCENDANTS))
 3121                                 return ESRCH;
 3122                         po = opm->pm_owner;
 3123                 }
 3124         }
 3125 
 3126         if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL)
 3127                 return EINVAL;
 3128 
 3129         PMCDBG2(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm);
 3130 
 3131         *pmc = pm;
 3132         return 0;
 3133 }
 3134 
 3135 /*
 3136  * Start a PMC.
 3137  */
 3138 
 3139 static int
 3140 pmc_start(struct pmc *pm)
 3141 {
 3142         enum pmc_mode mode;
 3143         struct pmc_owner *po;
 3144         struct pmc_binding pb;
 3145         struct pmc_classdep *pcd;
 3146         int adjri, error, cpu, ri;
 3147 
 3148         KASSERT(pm != NULL,
 3149             ("[pmc,%d] null pm", __LINE__));
 3150 
 3151         mode = PMC_TO_MODE(pm);
 3152         ri   = PMC_TO_ROWINDEX(pm);
 3153         pcd  = pmc_ri_to_classdep(md, ri, &adjri);
 3154 
 3155         error = 0;
 3156 
 3157         PMCDBG3(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri);
 3158 
 3159         po = pm->pm_owner;
 3160 
 3161         /*
 3162          * Disallow PMCSTART if a logfile is required but has not been
 3163          * configured yet.
 3164          */
 3165         if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
 3166             (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
 3167                 return (EDOOFUS);       /* programming error */
 3168 
 3169         /*
 3170          * If this is a sampling mode PMC, log mapping information for
 3171          * the kernel modules that are currently loaded.
 3172          */
 3173         if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 3174             pmc_log_kernel_mappings(pm);
 3175 
 3176         if (PMC_IS_VIRTUAL_MODE(mode)) {
 3177 
 3178                 /*
 3179                  * If a PMCATTACH has never been done on this PMC,
 3180                  * attach it to its owner process.
 3181                  */
 3182 
 3183                 if (LIST_EMPTY(&pm->pm_targets))
 3184                         error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH :
 3185                             pmc_attach_process(po->po_owner, pm);
 3186 
 3187                 /*
 3188                  * If the PMC is attached to its owner, then force a context
 3189                  * switch to ensure that the MD state gets set correctly.
 3190                  */
 3191 
 3192                 if (error == 0) {
 3193                         pm->pm_state = PMC_STATE_RUNNING;
 3194                         if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER)
 3195                                 pmc_force_context_switch();
 3196                 }
 3197 
 3198                 return (error);
 3199         }
 3200 
 3201 
 3202         /*
 3203          * A system-wide PMC.
 3204          *
 3205          * Add the owner to the global list if this is a system-wide
 3206          * sampling PMC.
 3207          */
 3208 
 3209         if (mode == PMC_MODE_SS) {
 3210                 /*
 3211                  * Log mapping information for all existing processes in the
 3212                  * system.  Subsequent mappings are logged as they happen;
 3213                  * see pmc_process_mmap().
 3214                  */
 3215                 if (po->po_logprocmaps == 0) {
 3216                         pmc_log_all_process_mappings(po);
 3217                         po->po_logprocmaps = 1;
 3218                 }
 3219                 po->po_sscount++;
 3220                 if (po->po_sscount == 1) {
 3221                         atomic_add_rel_int(&pmc_ss_count, 1);
 3222                         CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
 3223                         PMCDBG1(PMC,OPS,1, "po=%p in global list", po);
 3224                 }
 3225         }
 3226 
 3227         /*
 3228          * Move to the CPU associated with this
 3229          * PMC, and start the hardware.
 3230          */
 3231 
 3232         pmc_save_cpu_binding(&pb);
 3233 
 3234         cpu = PMC_TO_CPU(pm);
 3235 
 3236         if (!pmc_cpu_is_active(cpu))
 3237                 return (ENXIO);
 3238 
 3239         pmc_select_cpu(cpu);
 3240 
 3241         /*
 3242          * global PMCs are configured at allocation time
 3243          * so write out the initial value and start the PMC.
 3244          */
 3245 
 3246         pm->pm_state = PMC_STATE_RUNNING;
 3247 
 3248         critical_enter();
 3249         if ((error = pcd->pcd_write_pmc(cpu, adjri,
 3250                  PMC_IS_SAMPLING_MODE(mode) ?
 3251                  pm->pm_sc.pm_reloadcount :
 3252                  pm->pm_sc.pm_initial)) == 0) {
 3253                 /* If a sampling mode PMC, reset stalled state. */
 3254                 if (PMC_IS_SAMPLING_MODE(mode))
 3255                         pm->pm_pcpu_state[cpu].pps_stalled = 0;
 3256 
 3257                 /* Indicate that we desire this to run. Start it. */
 3258                 pm->pm_pcpu_state[cpu].pps_cpustate = 1;
 3259                 error = pcd->pcd_start_pmc(cpu, adjri);
 3260         }
 3261         critical_exit();
 3262 
 3263         pmc_restore_cpu_binding(&pb);
 3264 
 3265         return (error);
 3266 }
 3267 
 3268 /*
 3269  * Stop a PMC.
 3270  */
 3271 
 3272 static int
 3273 pmc_stop(struct pmc *pm)
 3274 {
 3275         struct pmc_owner *po;
 3276         struct pmc_binding pb;
 3277         struct pmc_classdep *pcd;
 3278         int adjri, cpu, error, ri;
 3279 
 3280         KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__));
 3281 
 3282         PMCDBG3(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm,
 3283             PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm));
 3284 
 3285         pm->pm_state = PMC_STATE_STOPPED;
 3286 
 3287         /*
 3288          * If the PMC is a virtual mode one, changing the state to
 3289          * non-RUNNING is enough to ensure that the PMC never gets
 3290          * scheduled.
 3291          *
 3292          * If this PMC is current running on a CPU, then it will
 3293          * handled correctly at the time its target process is context
 3294          * switched out.
 3295          */
 3296 
 3297         if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
 3298                 return 0;
 3299 
 3300         /*
 3301          * A system-mode PMC.  Move to the CPU associated with
 3302          * this PMC, and stop the hardware.  We update the
 3303          * 'initial count' so that a subsequent PMCSTART will
 3304          * resume counting from the current hardware count.
 3305          */
 3306 
 3307         pmc_save_cpu_binding(&pb);
 3308 
 3309         cpu = PMC_TO_CPU(pm);
 3310 
 3311         KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 3312             ("[pmc,%d] illegal cpu=%d", __LINE__, cpu));
 3313 
 3314         if (!pmc_cpu_is_active(cpu))
 3315                 return ENXIO;
 3316 
 3317         pmc_select_cpu(cpu);
 3318 
 3319         ri = PMC_TO_ROWINDEX(pm);
 3320         pcd = pmc_ri_to_classdep(md, ri, &adjri);
 3321 
 3322         pm->pm_pcpu_state[cpu].pps_cpustate = 0;
 3323         critical_enter();
 3324         if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0)
 3325                 error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial);
 3326         critical_exit();
 3327 
 3328         pmc_restore_cpu_binding(&pb);
 3329 
 3330         po = pm->pm_owner;
 3331 
 3332         /* remove this owner from the global list of SS PMC owners */
 3333         if (PMC_TO_MODE(pm) == PMC_MODE_SS) {
 3334                 po->po_sscount--;
 3335                 if (po->po_sscount == 0) {
 3336                         atomic_subtract_rel_int(&pmc_ss_count, 1);
 3337                         CK_LIST_REMOVE(po, po_ssnext);
 3338                         epoch_wait_preempt(global_epoch_preempt);
 3339                         PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po);
 3340                 }
 3341         }
 3342 
 3343         return (error);
 3344 }
 3345 
 3346 static struct pmc_classdep *
 3347 pmc_class_to_classdep(enum pmc_class class)
 3348 {
 3349         int n;
 3350 
 3351         for (n = 0; n < md->pmd_nclass; n++)
 3352                 if (md->pmd_classdep[n].pcd_class == class)
 3353                         return (&md->pmd_classdep[n]);
 3354         return (NULL);
 3355 }
 3356 
 3357 #if defined(HWPMC_DEBUG) && defined(KTR)
 3358 static const char *pmc_op_to_name[] = {
 3359 #undef  __PMC_OP
 3360 #define __PMC_OP(N, D)  #N ,
 3361         __PMC_OPS()
 3362         NULL
 3363 };
 3364 #endif
 3365 
 3366 /*
 3367  * The syscall interface
 3368  */
 3369 
 3370 #define PMC_GET_SX_XLOCK(...) do {              \
 3371         sx_xlock(&pmc_sx);                      \
 3372         if (pmc_hook == NULL) {                 \
 3373                 sx_xunlock(&pmc_sx);            \
 3374                 return __VA_ARGS__;             \
 3375         }                                       \
 3376 } while (0)
 3377 
 3378 #define PMC_DOWNGRADE_SX() do {                 \
 3379         sx_downgrade(&pmc_sx);                  \
 3380         is_sx_downgraded = 1;                   \
 3381 } while (0)
 3382 
 3383 static int
 3384 pmc_syscall_handler(struct thread *td, void *syscall_args)
 3385 {
 3386         int error, is_sx_downgraded, op;
 3387         struct pmc_syscall_args *c;
 3388         void *pmclog_proc_handle;
 3389         void *arg;
 3390 
 3391         c = (struct pmc_syscall_args *)syscall_args;
 3392         op = c->pmop_code;
 3393         arg = c->pmop_data;
 3394         /* PMC isn't set up yet */
 3395         if (pmc_hook == NULL)
 3396                 return (EINVAL);
 3397         if (op == PMC_OP_CONFIGURELOG) {
 3398                 /*
 3399                  * We cannot create the logging process inside
 3400                  * pmclog_configure_log() because there is a LOR
 3401                  * between pmc_sx and process structure locks.
 3402                  * Instead, pre-create the process and ignite the loop
 3403                  * if everything is fine, otherwise direct the process
 3404                  * to exit.
 3405                  */
 3406                 error = pmclog_proc_create(td, &pmclog_proc_handle);
 3407                 if (error != 0)
 3408                         goto done_syscall;
 3409         }
 3410 
 3411         PMC_GET_SX_XLOCK(ENOSYS);
 3412         is_sx_downgraded = 0;
 3413         PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op,
 3414             pmc_op_to_name[op], arg);
 3415 
 3416         error = 0;
 3417         counter_u64_add(pmc_stats.pm_syscalls, 1);
 3418 
 3419         switch (op) {
 3420 
 3421 
 3422         /*
 3423          * Configure a log file.
 3424          *
 3425          * XXX This OP will be reworked.
 3426          */
 3427 
 3428         case PMC_OP_CONFIGURELOG:
 3429         {
 3430                 struct proc *p;
 3431                 struct pmc *pm;
 3432                 struct pmc_owner *po;
 3433                 struct pmc_op_configurelog cl;
 3434 
 3435                 if ((error = copyin(arg, &cl, sizeof(cl))) != 0) {
 3436                         pmclog_proc_ignite(pmclog_proc_handle, NULL);
 3437                         break;
 3438                 }
 3439 
 3440                 /* mark this process as owning a log file */
 3441                 p = td->td_proc;
 3442                 if ((po = pmc_find_owner_descriptor(p)) == NULL)
 3443                         if ((po = pmc_allocate_owner_descriptor(p)) == NULL) {
 3444                                 pmclog_proc_ignite(pmclog_proc_handle, NULL);
 3445                                 error = ENOMEM;
 3446                                 break;
 3447                         }
 3448 
 3449                 /*
 3450                  * If a valid fd was passed in, try to configure that,
 3451                  * otherwise if 'fd' was less than zero and there was
 3452                  * a log file configured, flush its buffers and
 3453                  * de-configure it.
 3454                  */
 3455                 if (cl.pm_logfd >= 0) {
 3456                         error = pmclog_configure_log(md, po, cl.pm_logfd);
 3457                         pmclog_proc_ignite(pmclog_proc_handle, error == 0 ?
 3458                             po : NULL);
 3459                 } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
 3460                         pmclog_proc_ignite(pmclog_proc_handle, NULL);
 3461                         error = pmclog_close(po);
 3462                         if (error == 0) {
 3463                                 LIST_FOREACH(pm, &po->po_pmcs, pm_next)
 3464                                     if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
 3465                                         pm->pm_state == PMC_STATE_RUNNING)
 3466                                             pmc_stop(pm);
 3467                                 error = pmclog_deconfigure_log(po);
 3468                         }
 3469                 } else {
 3470                         pmclog_proc_ignite(pmclog_proc_handle, NULL);
 3471                         error = EINVAL;
 3472                 }
 3473         }
 3474         break;
 3475 
 3476         /*
 3477          * Flush a log file.
 3478          */
 3479 
 3480         case PMC_OP_FLUSHLOG:
 3481         {
 3482                 struct pmc_owner *po;
 3483 
 3484                 sx_assert(&pmc_sx, SX_XLOCKED);
 3485 
 3486                 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
 3487                         error = EINVAL;
 3488                         break;
 3489                 }
 3490 
 3491                 error = pmclog_flush(po, 0);
 3492         }
 3493         break;
 3494 
 3495         /*
 3496          * Close a log file.
 3497          */
 3498 
 3499         case PMC_OP_CLOSELOG:
 3500         {
 3501                 struct pmc_owner *po;
 3502 
 3503                 sx_assert(&pmc_sx, SX_XLOCKED);
 3504 
 3505                 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
 3506                         error = EINVAL;
 3507                         break;
 3508                 }
 3509 
 3510                 error = pmclog_close(po);
 3511         }
 3512         break;
 3513 
 3514         /*
 3515          * Retrieve hardware configuration.
 3516          */
 3517 
 3518         case PMC_OP_GETCPUINFO: /* CPU information */
 3519         {
 3520                 struct pmc_op_getcpuinfo gci;
 3521                 struct pmc_classinfo *pci;
 3522                 struct pmc_classdep *pcd;
 3523                 int cl;
 3524 
 3525                 memset(&gci, 0, sizeof(gci));
 3526                 gci.pm_cputype = md->pmd_cputype;
 3527                 gci.pm_ncpu    = pmc_cpu_max();
 3528                 gci.pm_npmc    = md->pmd_npmc;
 3529                 gci.pm_nclass  = md->pmd_nclass;
 3530                 pci = gci.pm_classes;
 3531                 pcd = md->pmd_classdep;
 3532                 for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) {
 3533                         pci->pm_caps  = pcd->pcd_caps;
 3534                         pci->pm_class = pcd->pcd_class;
 3535                         pci->pm_width = pcd->pcd_width;
 3536                         pci->pm_num   = pcd->pcd_num;
 3537                 }
 3538                 error = copyout(&gci, arg, sizeof(gci));
 3539         }
 3540         break;
 3541 
 3542         /*
 3543          * Retrieve soft events list.
 3544          */
 3545         case PMC_OP_GETDYNEVENTINFO:
 3546         {
 3547                 enum pmc_class                  cl;
 3548                 enum pmc_event                  ev;
 3549                 struct pmc_op_getdyneventinfo   *gei;
 3550                 struct pmc_dyn_event_descr      dev;
 3551                 struct pmc_soft                 *ps;
 3552                 uint32_t                        nevent;
 3553 
 3554                 sx_assert(&pmc_sx, SX_LOCKED);
 3555 
 3556                 gei = (struct pmc_op_getdyneventinfo *) arg;
 3557 
 3558                 if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0)
 3559                         break;
 3560 
 3561                 /* Only SOFT class is dynamic. */
 3562                 if (cl != PMC_CLASS_SOFT) {
 3563                         error = EINVAL;
 3564                         break;
 3565                 }
 3566 
 3567                 nevent = 0;
 3568                 for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) {
 3569                         ps = pmc_soft_ev_acquire(ev);
 3570                         if (ps == NULL)
 3571                                 continue;
 3572                         bcopy(&ps->ps_ev, &dev, sizeof(dev));
 3573                         pmc_soft_ev_release(ps);
 3574 
 3575                         error = copyout(&dev,
 3576                             &gei->pm_events[nevent],
 3577                             sizeof(struct pmc_dyn_event_descr));
 3578                         if (error != 0)
 3579                                 break;
 3580                         nevent++;
 3581                 }
 3582                 if (error != 0)
 3583                         break;
 3584 
 3585                 error = copyout(&nevent, &gei->pm_nevent,
 3586                     sizeof(nevent));
 3587         }
 3588         break;
 3589 
 3590         /*
 3591          * Get module statistics
 3592          */
 3593 
 3594         case PMC_OP_GETDRIVERSTATS:
 3595         {
 3596                 struct pmc_op_getdriverstats gms;
 3597 #define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field)
 3598                 CFETCH(gms, pmc_stats, pm_intr_ignored);
 3599                 CFETCH(gms, pmc_stats, pm_intr_processed);
 3600                 CFETCH(gms, pmc_stats, pm_intr_bufferfull);
 3601                 CFETCH(gms, pmc_stats, pm_syscalls);
 3602                 CFETCH(gms, pmc_stats, pm_syscall_errors);
 3603                 CFETCH(gms, pmc_stats, pm_buffer_requests);
 3604                 CFETCH(gms, pmc_stats, pm_buffer_requests_failed);
 3605                 CFETCH(gms, pmc_stats, pm_log_sweeps);
 3606 #undef CFETCH
 3607                 error = copyout(&gms, arg, sizeof(gms));
 3608         }
 3609         break;
 3610 
 3611 
 3612         /*
 3613          * Retrieve module version number
 3614          */
 3615 
 3616         case PMC_OP_GETMODULEVERSION:
 3617         {
 3618                 uint32_t cv, modv;
 3619 
 3620                 /* retrieve the client's idea of the ABI version */
 3621                 if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0)
 3622                         break;
 3623                 /* don't service clients newer than our driver */
 3624                 modv = PMC_VERSION;
 3625                 if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) {
 3626                         error = EPROGMISMATCH;
 3627                         break;
 3628                 }
 3629                 error = copyout(&modv, arg, sizeof(int));
 3630         }
 3631         break;
 3632 
 3633 
 3634         /*
 3635          * Retrieve the state of all the PMCs on a given
 3636          * CPU.
 3637          */
 3638 
 3639         case PMC_OP_GETPMCINFO:
 3640         {
 3641                 int ari;
 3642                 struct pmc *pm;
 3643                 size_t pmcinfo_size;
 3644                 uint32_t cpu, n, npmc;
 3645                 struct pmc_owner *po;
 3646                 struct pmc_binding pb;
 3647                 struct pmc_classdep *pcd;
 3648                 struct pmc_info *p, *pmcinfo;
 3649                 struct pmc_op_getpmcinfo *gpi;
 3650 
 3651                 PMC_DOWNGRADE_SX();
 3652 
 3653                 gpi = (struct pmc_op_getpmcinfo *) arg;
 3654 
 3655                 if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0)
 3656                         break;
 3657 
 3658                 if (cpu >= pmc_cpu_max()) {
 3659                         error = EINVAL;
 3660                         break;
 3661                 }
 3662 
 3663                 if (!pmc_cpu_is_active(cpu)) {
 3664                         error = ENXIO;
 3665                         break;
 3666                 }
 3667 
 3668                 /* switch to CPU 'cpu' */
 3669                 pmc_save_cpu_binding(&pb);
 3670                 pmc_select_cpu(cpu);
 3671 
 3672                 npmc = md->pmd_npmc;
 3673 
 3674                 pmcinfo_size = npmc * sizeof(struct pmc_info);
 3675                 pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK | M_ZERO);
 3676 
 3677                 p = pmcinfo;
 3678 
 3679                 for (n = 0; n < md->pmd_npmc; n++, p++) {
 3680 
 3681                         pcd = pmc_ri_to_classdep(md, n, &ari);
 3682 
 3683                         KASSERT(pcd != NULL,
 3684                             ("[pmc,%d] null pcd ri=%d", __LINE__, n));
 3685 
 3686                         if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0)
 3687                                 break;
 3688 
 3689                         if (PMC_ROW_DISP_IS_STANDALONE(n))
 3690                                 p->pm_rowdisp = PMC_DISP_STANDALONE;
 3691                         else if (PMC_ROW_DISP_IS_THREAD(n))
 3692                                 p->pm_rowdisp = PMC_DISP_THREAD;
 3693                         else
 3694                                 p->pm_rowdisp = PMC_DISP_FREE;
 3695 
 3696                         p->pm_ownerpid = -1;
 3697 
 3698                         if (pm == NULL) /* no PMC associated */
 3699                                 continue;
 3700 
 3701                         po = pm->pm_owner;
 3702 
 3703                         KASSERT(po->po_owner != NULL,
 3704                             ("[pmc,%d] pmc_owner had a null proc pointer",
 3705                                 __LINE__));
 3706 
 3707                         p->pm_ownerpid = po->po_owner->p_pid;
 3708                         p->pm_mode     = PMC_TO_MODE(pm);
 3709                         p->pm_event    = pm->pm_event;
 3710                         p->pm_flags    = pm->pm_flags;
 3711 
 3712                         if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
 3713                                 p->pm_reloadcount =
 3714                                     pm->pm_sc.pm_reloadcount;
 3715                 }
 3716 
 3717                 pmc_restore_cpu_binding(&pb);
 3718 
 3719                 /* now copy out the PMC info collected */
 3720                 if (error == 0)
 3721                         error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size);
 3722 
 3723                 free(pmcinfo, M_PMC);
 3724         }
 3725         break;
 3726 
 3727 
 3728         /*
 3729          * Set the administrative state of a PMC.  I.e. whether
 3730          * the PMC is to be used or not.
 3731          */
 3732 
 3733         case PMC_OP_PMCADMIN:
 3734         {
 3735                 int cpu, ri;
 3736                 enum pmc_state request;
 3737                 struct pmc_cpu *pc;
 3738                 struct pmc_hw *phw;
 3739                 struct pmc_op_pmcadmin pma;
 3740                 struct pmc_binding pb;
 3741 
 3742                 sx_assert(&pmc_sx, SX_XLOCKED);
 3743 
 3744                 KASSERT(td == curthread,
 3745                     ("[pmc,%d] td != curthread", __LINE__));
 3746 
 3747                 error = priv_check(td, PRIV_PMC_MANAGE);
 3748                 if (error)
 3749                         break;
 3750 
 3751                 if ((error = copyin(arg, &pma, sizeof(pma))) != 0)
 3752                         break;
 3753 
 3754                 cpu = pma.pm_cpu;
 3755 
 3756                 if (cpu < 0 || cpu >= (int) pmc_cpu_max()) {
 3757                         error = EINVAL;
 3758                         break;
 3759                 }
 3760 
 3761                 if (!pmc_cpu_is_active(cpu)) {
 3762                         error = ENXIO;
 3763                         break;
 3764                 }
 3765 
 3766                 request = pma.pm_state;
 3767 
 3768                 if (request != PMC_STATE_DISABLED &&
 3769                     request != PMC_STATE_FREE) {
 3770                         error = EINVAL;
 3771                         break;
 3772                 }
 3773 
 3774                 ri = pma.pm_pmc; /* pmc id == row index */
 3775                 if (ri < 0 || ri >= (int) md->pmd_npmc) {
 3776                         error = EINVAL;
 3777                         break;
 3778                 }
 3779 
 3780                 /*
 3781                  * We can't disable a PMC with a row-index allocated
 3782                  * for process virtual PMCs.
 3783                  */
 3784 
 3785                 if (PMC_ROW_DISP_IS_THREAD(ri) &&
 3786                     request == PMC_STATE_DISABLED) {
 3787                         error = EBUSY;
 3788                         break;
 3789                 }
 3790 
 3791                 /*
 3792                  * otherwise, this PMC on this CPU is either free or
 3793                  * in system-wide mode.
 3794                  */
 3795 
 3796                 pmc_save_cpu_binding(&pb);
 3797                 pmc_select_cpu(cpu);
 3798 
 3799                 pc  = pmc_pcpu[cpu];
 3800                 phw = pc->pc_hwpmcs[ri];
 3801 
 3802                 /*
 3803                  * XXX do we need some kind of 'forced' disable?
 3804                  */
 3805 
 3806                 if (phw->phw_pmc == NULL) {
 3807                         if (request == PMC_STATE_DISABLED &&
 3808                             (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) {
 3809                                 phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED;
 3810                                 PMC_MARK_ROW_STANDALONE(ri);
 3811                         } else if (request == PMC_STATE_FREE &&
 3812                             (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) {
 3813                                 phw->phw_state |=  PMC_PHW_FLAG_IS_ENABLED;
 3814                                 PMC_UNMARK_ROW_STANDALONE(ri);
 3815                         }
 3816                         /* other cases are a no-op */
 3817                 } else
 3818                         error = EBUSY;
 3819 
 3820                 pmc_restore_cpu_binding(&pb);
 3821         }
 3822         break;
 3823 
 3824 
 3825         /*
 3826          * Allocate a PMC.
 3827          */
 3828 
 3829         case PMC_OP_PMCALLOCATE:
 3830         {
 3831                 int adjri, n;
 3832                 u_int cpu;
 3833                 uint32_t caps;
 3834                 struct pmc *pmc;
 3835                 enum pmc_mode mode;
 3836                 struct pmc_hw *phw;
 3837                 struct pmc_binding pb;
 3838                 struct pmc_classdep *pcd;
 3839                 struct pmc_op_pmcallocate pa;
 3840 
 3841                 if ((error = copyin(arg, &pa, sizeof(pa))) != 0)
 3842                         break;
 3843 
 3844                 caps = pa.pm_caps;
 3845                 mode = pa.pm_mode;
 3846                 cpu  = pa.pm_cpu;
 3847 
 3848                 if ((mode != PMC_MODE_SS  &&  mode != PMC_MODE_SC  &&
 3849                      mode != PMC_MODE_TS  &&  mode != PMC_MODE_TC) ||
 3850                     (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
 3851                         error = EINVAL;
 3852                         break;
 3853                 }
 3854 
 3855                 /*
 3856                  * Virtual PMCs should only ask for a default CPU.
 3857                  * System mode PMCs need to specify a non-default CPU.
 3858                  */
 3859 
 3860                 if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) ||
 3861                     (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) {
 3862                         error = EINVAL;
 3863                         break;
 3864                 }
 3865 
 3866                 /*
 3867                  * Check that an inactive CPU is not being asked for.
 3868                  */
 3869 
 3870                 if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) {
 3871                         error = ENXIO;
 3872                         break;
 3873                 }
 3874 
 3875                 /*
 3876                  * Refuse an allocation for a system-wide PMC if this
 3877                  * process has been jailed, or if this process lacks
 3878                  * super-user credentials and the sysctl tunable
 3879                  * 'security.bsd.unprivileged_syspmcs' is zero.
 3880                  */
 3881 
 3882                 if (PMC_IS_SYSTEM_MODE(mode)) {
 3883                         if (jailed(curthread->td_ucred)) {
 3884                                 error = EPERM;
 3885                                 break;
 3886                         }
 3887                         if (!pmc_unprivileged_syspmcs) {
 3888                                 error = priv_check(curthread,
 3889                                     PRIV_PMC_SYSTEM);
 3890                                 if (error)
 3891                                         break;
 3892                         }
 3893                 }
 3894 
 3895                 /*
 3896                  * Look for valid values for 'pm_flags'
 3897                  */
 3898 
 3899                 if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
 3900                     PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN |
 3901                     PMC_F_USERCALLCHAIN)) != 0) {
 3902                         error = EINVAL;
 3903                         break;
 3904                 }
 3905 
 3906                 /* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN */
 3907                 if ((pa.pm_flags & (PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) ==
 3908                     PMC_F_USERCALLCHAIN) {
 3909                         error = EINVAL;
 3910                         break;
 3911                 }
 3912 
 3913                 /* PMC_F_USERCALLCHAIN is only valid for sampling mode */
 3914                 if (pa.pm_flags & PMC_F_USERCALLCHAIN &&
 3915                         mode != PMC_MODE_TS && mode != PMC_MODE_SS) {
 3916                         error = EINVAL;
 3917                         break;
 3918                 }
 3919 
 3920                 /* process logging options are not allowed for system PMCs */
 3921                 if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags &
 3922                     (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) {
 3923                         error = EINVAL;
 3924                         break;
 3925                 }
 3926 
 3927                 /*
 3928                  * All sampling mode PMCs need to be able to interrupt the
 3929                  * CPU.
 3930                  */
 3931                 if (PMC_IS_SAMPLING_MODE(mode))
 3932                         caps |= PMC_CAP_INTERRUPT;
 3933 
 3934                 /* A valid class specifier should have been passed in. */
 3935                 pcd = pmc_class_to_classdep(pa.pm_class);
 3936                 if (pcd == NULL) {
 3937                         error = EINVAL;
 3938                         break;
 3939                 }
 3940 
 3941                 /* The requested PMC capabilities should be feasible. */
 3942                 if ((pcd->pcd_caps & caps) != caps) {
 3943                         error = EOPNOTSUPP;
 3944                         break;
 3945                 }
 3946 
 3947                 PMCDBG4(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d",
 3948                     pa.pm_ev, caps, mode, cpu);
 3949 
 3950                 pmc = pmc_allocate_pmc_descriptor();
 3951                 pmc->pm_id    = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class,
 3952                     PMC_ID_INVALID);
 3953                 pmc->pm_event = pa.pm_ev;
 3954                 pmc->pm_state = PMC_STATE_FREE;
 3955                 pmc->pm_caps  = caps;
 3956                 pmc->pm_flags = pa.pm_flags;
 3957 
 3958                 /* XXX set lower bound on sampling for process counters */
 3959                 if (PMC_IS_SAMPLING_MODE(mode)) {
 3960                         /*
 3961                          * Don't permit requested sample rate to be
 3962                          * less than pmc_mincount.
 3963                          */
 3964                         if (pa.pm_count < MAX(1, pmc_mincount))
 3965                                 log(LOG_WARNING, "pmcallocate: passed sample "
 3966                                     "rate %ju - setting to %u\n",
 3967                                     (uintmax_t)pa.pm_count,
 3968                                     MAX(1, pmc_mincount));
 3969                         pmc->pm_sc.pm_reloadcount = MAX(MAX(1, pmc_mincount),
 3970                             pa.pm_count);
 3971                 } else
 3972                         pmc->pm_sc.pm_initial = pa.pm_count;
 3973 
 3974                 /* switch thread to CPU 'cpu' */
 3975                 pmc_save_cpu_binding(&pb);
 3976 
 3977 #define PMC_IS_SHAREABLE_PMC(cpu, n)                            \
 3978         (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state &           \
 3979          PMC_PHW_FLAG_IS_SHAREABLE)
 3980 #define PMC_IS_UNALLOCATED(cpu, n)                              \
 3981         (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL)
 3982 
 3983                 if (PMC_IS_SYSTEM_MODE(mode)) {
 3984                         pmc_select_cpu(cpu);
 3985                         for (n = pcd->pcd_ri; n < (int) md->pmd_npmc; n++) {
 3986                                 pcd = pmc_ri_to_classdep(md, n, &adjri);
 3987                                 if (pmc_can_allocate_row(n, mode) == 0 &&
 3988                                     pmc_can_allocate_rowindex(
 3989                                             curthread->td_proc, n, cpu) == 0 &&
 3990                                     (PMC_IS_UNALLOCATED(cpu, n) ||
 3991                                      PMC_IS_SHAREABLE_PMC(cpu, n)) &&
 3992                                     pcd->pcd_allocate_pmc(cpu, adjri, pmc,
 3993                                         &pa) == 0)
 3994                                         break;
 3995                         }
 3996                 } else {
 3997                         /* Process virtual mode */
 3998                         for (n = pcd->pcd_ri; n < (int) md->pmd_npmc; n++) {
 3999                                 pcd = pmc_ri_to_classdep(md, n, &adjri);
 4000                                 if (pmc_can_allocate_row(n, mode) == 0 &&
 4001                                     pmc_can_allocate_rowindex(
 4002                                             curthread->td_proc, n,
 4003                                             PMC_CPU_ANY) == 0 &&
 4004                                     pcd->pcd_allocate_pmc(curthread->td_oncpu,
 4005                                         adjri, pmc, &pa) == 0)
 4006                                         break;
 4007                         }
 4008                 }
 4009 
 4010 #undef  PMC_IS_UNALLOCATED
 4011 #undef  PMC_IS_SHAREABLE_PMC
 4012 
 4013                 pmc_restore_cpu_binding(&pb);
 4014 
 4015                 if (n == (int) md->pmd_npmc) {
 4016                         pmc_destroy_pmc_descriptor(pmc);
 4017                         pmc = NULL;
 4018                         error = EINVAL;
 4019                         break;
 4020                 }
 4021 
 4022                 /* Fill in the correct value in the ID field */
 4023                 pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n);
 4024 
 4025                 PMCDBG5(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x",
 4026                     pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id);
 4027 
 4028                 /* Process mode PMCs with logging enabled need log files */
 4029                 if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW))
 4030                         pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
 4031 
 4032                 /* All system mode sampling PMCs require a log file */
 4033                 if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode))
 4034                         pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
 4035 
 4036                 /*
 4037                  * Configure global pmc's immediately
 4038                  */
 4039 
 4040                 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) {
 4041 
 4042                         pmc_save_cpu_binding(&pb);
 4043                         pmc_select_cpu(cpu);
 4044 
 4045                         phw = pmc_pcpu[cpu]->pc_hwpmcs[n];
 4046                         pcd = pmc_ri_to_classdep(md, n, &adjri);
 4047 
 4048                         if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 ||
 4049                             (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) {
 4050                                 (void) pcd->pcd_release_pmc(cpu, adjri, pmc);
 4051                                 pmc_destroy_pmc_descriptor(pmc);
 4052                                 pmc = NULL;
 4053                                 pmc_restore_cpu_binding(&pb);
 4054                                 error = EPERM;
 4055                                 break;
 4056                         }
 4057 
 4058                         pmc_restore_cpu_binding(&pb);
 4059                 }
 4060 
 4061                 pmc->pm_state    = PMC_STATE_ALLOCATED;
 4062                 pmc->pm_class   = pa.pm_class;
 4063 
 4064                 /*
 4065                  * mark row disposition
 4066                  */
 4067 
 4068                 if (PMC_IS_SYSTEM_MODE(mode))
 4069                         PMC_MARK_ROW_STANDALONE(n);
 4070                 else
 4071                         PMC_MARK_ROW_THREAD(n);
 4072 
 4073                 /*
 4074                  * Register this PMC with the current thread as its owner.
 4075                  */
 4076 
 4077                 if ((error =
 4078                     pmc_register_owner(curthread->td_proc, pmc)) != 0) {
 4079                         pmc_release_pmc_descriptor(pmc);
 4080                         pmc_destroy_pmc_descriptor(pmc);
 4081                         pmc = NULL;
 4082                         break;
 4083                 }
 4084 
 4085 
 4086                 /*
 4087                  * Return the allocated index.
 4088                  */
 4089 
 4090                 pa.pm_pmcid = pmc->pm_id;
 4091 
 4092                 error = copyout(&pa, arg, sizeof(pa));
 4093         }
 4094         break;
 4095 
 4096 
 4097         /*
 4098          * Attach a PMC to a process.
 4099          */
 4100 
 4101         case PMC_OP_PMCATTACH:
 4102         {
 4103                 struct pmc *pm;
 4104                 struct proc *p;
 4105                 struct pmc_op_pmcattach a;
 4106 
 4107                 sx_assert(&pmc_sx, SX_XLOCKED);
 4108 
 4109                 if ((error = copyin(arg, &a, sizeof(a))) != 0)
 4110                         break;
 4111 
 4112                 if (a.pm_pid < 0) {
 4113                         error = EINVAL;
 4114                         break;
 4115                 } else if (a.pm_pid == 0)
 4116                         a.pm_pid = td->td_proc->p_pid;
 4117 
 4118                 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
 4119                         break;
 4120 
 4121                 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
 4122                         error = EINVAL;
 4123                         break;
 4124                 }
 4125 
 4126                 /* PMCs may be (re)attached only when allocated or stopped */
 4127                 if (pm->pm_state == PMC_STATE_RUNNING) {
 4128                         error = EBUSY;
 4129                         break;
 4130                 } else if (pm->pm_state != PMC_STATE_ALLOCATED &&
 4131                     pm->pm_state != PMC_STATE_STOPPED) {
 4132                         error = EINVAL;
 4133                         break;
 4134                 }
 4135 
 4136                 /* lookup pid */
 4137                 if ((p = pfind(a.pm_pid)) == NULL) {
 4138                         error = ESRCH;
 4139                         break;
 4140                 }
 4141 
 4142                 /*
 4143                  * Ignore processes that are working on exiting.
 4144                  */
 4145                 if (p->p_flag & P_WEXIT) {
 4146                         error = ESRCH;
 4147                         PROC_UNLOCK(p); /* pfind() returns a locked process */
 4148                         break;
 4149                 }
 4150 
 4151                 /*
 4152                  * we are allowed to attach a PMC to a process if
 4153                  * we can debug it.
 4154                  */
 4155                 error = p_candebug(curthread, p);
 4156 
 4157                 PROC_UNLOCK(p);
 4158 
 4159                 if (error == 0)
 4160                         error = pmc_attach_process(p, pm);
 4161         }
 4162         break;
 4163 
 4164 
 4165         /*
 4166          * Detach an attached PMC from a process.
 4167          */
 4168 
 4169         case PMC_OP_PMCDETACH:
 4170         {
 4171                 struct pmc *pm;
 4172                 struct proc *p;
 4173                 struct pmc_op_pmcattach a;
 4174 
 4175                 if ((error = copyin(arg, &a, sizeof(a))) != 0)
 4176                         break;
 4177 
 4178                 if (a.pm_pid < 0) {
 4179                         error = EINVAL;
 4180                         break;
 4181                 } else if (a.pm_pid == 0)
 4182                         a.pm_pid = td->td_proc->p_pid;
 4183 
 4184                 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
 4185                         break;
 4186 
 4187                 if ((p = pfind(a.pm_pid)) == NULL) {
 4188                         error = ESRCH;
 4189                         break;
 4190                 }
 4191 
 4192                 /*
 4193                  * Treat processes that are in the process of exiting
 4194                  * as if they were not present.
 4195                  */
 4196 
 4197                 if (p->p_flag & P_WEXIT)
 4198                         error = ESRCH;
 4199 
 4200                 PROC_UNLOCK(p); /* pfind() returns a locked process */
 4201 
 4202                 if (error == 0)
 4203                         error = pmc_detach_process(p, pm);
 4204         }
 4205         break;
 4206 
 4207 
 4208         /*
 4209          * Retrieve the MSR number associated with the counter
 4210          * 'pmc_id'.  This allows processes to directly use RDPMC
 4211          * instructions to read their PMCs, without the overhead of a
 4212          * system call.
 4213          */
 4214 
 4215         case PMC_OP_PMCGETMSR:
 4216         {
 4217                 int adjri, ri;
 4218                 struct pmc *pm;
 4219                 struct pmc_target *pt;
 4220                 struct pmc_op_getmsr gm;
 4221                 struct pmc_classdep *pcd;
 4222 
 4223                 PMC_DOWNGRADE_SX();
 4224 
 4225                 if ((error = copyin(arg, &gm, sizeof(gm))) != 0)
 4226                         break;
 4227 
 4228                 if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0)
 4229                         break;
 4230 
 4231                 /*
 4232                  * The allocated PMC has to be a process virtual PMC,
 4233                  * i.e., of type MODE_T[CS].  Global PMCs can only be
 4234                  * read using the PMCREAD operation since they may be
 4235                  * allocated on a different CPU than the one we could
 4236                  * be running on at the time of the RDPMC instruction.
 4237                  *
 4238                  * The GETMSR operation is not allowed for PMCs that
 4239                  * are inherited across processes.
 4240                  */
 4241 
 4242                 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) ||
 4243                     (pm->pm_flags & PMC_F_DESCENDANTS)) {
 4244                         error = EINVAL;
 4245                         break;
 4246                 }
 4247 
 4248                 /*
 4249                  * It only makes sense to use a RDPMC (or its
 4250                  * equivalent instruction on non-x86 architectures) on
 4251                  * a process that has allocated and attached a PMC to
 4252                  * itself.  Conversely the PMC is only allowed to have
 4253                  * one process attached to it -- its owner.
 4254                  */
 4255 
 4256                 if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL ||
 4257                     LIST_NEXT(pt, pt_next) != NULL ||
 4258                     pt->pt_process->pp_proc != pm->pm_owner->po_owner) {
 4259                         error = EINVAL;
 4260                         break;
 4261                 }
 4262 
 4263                 ri = PMC_TO_ROWINDEX(pm);
 4264                 pcd = pmc_ri_to_classdep(md, ri, &adjri);
 4265 
 4266                 /* PMC class has no 'GETMSR' support */
 4267                 if (pcd->pcd_get_msr == NULL) {
 4268                         error = ENOSYS;
 4269                         break;
 4270                 }
 4271 
 4272                 if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0)
 4273                         break;
 4274 
 4275                 if ((error = copyout(&gm, arg, sizeof(gm))) < 0)
 4276                         break;
 4277 
 4278                 /*
 4279                  * Mark our process as using MSRs.  Update machine
 4280                  * state using a forced context switch.
 4281                  */
 4282 
 4283                 pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS;
 4284                 pmc_force_context_switch();
 4285 
 4286         }
 4287         break;
 4288 
 4289         /*
 4290          * Release an allocated PMC
 4291          */
 4292 
 4293         case PMC_OP_PMCRELEASE:
 4294         {
 4295                 pmc_id_t pmcid;
 4296                 struct pmc *pm;
 4297                 struct pmc_owner *po;
 4298                 struct pmc_op_simple sp;
 4299 
 4300                 /*
 4301                  * Find PMC pointer for the named PMC.
 4302                  *
 4303                  * Use pmc_release_pmc_descriptor() to switch off the
 4304                  * PMC, remove all its target threads, and remove the
 4305                  * PMC from its owner's list.
 4306                  *
 4307                  * Remove the owner record if this is the last PMC
 4308                  * owned.
 4309                  *
 4310                  * Free up space.
 4311                  */
 4312 
 4313                 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
 4314                         break;
 4315 
 4316                 pmcid = sp.pm_pmcid;
 4317 
 4318                 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
 4319                         break;
 4320 
 4321                 po = pm->pm_owner;
 4322                 pmc_release_pmc_descriptor(pm);
 4323                 pmc_maybe_remove_owner(po);
 4324                 pmc_destroy_pmc_descriptor(pm);
 4325         }
 4326         break;
 4327 
 4328 
 4329         /*
 4330          * Read and/or write a PMC.
 4331          */
 4332 
 4333         case PMC_OP_PMCRW:
 4334         {
 4335                 int adjri;
 4336                 struct pmc *pm;
 4337                 uint32_t cpu, ri;
 4338                 pmc_value_t oldvalue;
 4339                 struct pmc_binding pb;
 4340                 struct pmc_op_pmcrw prw;
 4341                 struct pmc_classdep *pcd;
 4342                 struct pmc_op_pmcrw *pprw;
 4343 
 4344                 PMC_DOWNGRADE_SX();
 4345 
 4346                 if ((error = copyin(arg, &prw, sizeof(prw))) != 0)
 4347                         break;
 4348 
 4349                 PMCDBG2(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid,
 4350                     prw.pm_flags);
 4351 
 4352                 /* must have at least one flag set */
 4353                 if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) {
 4354                         error = EINVAL;
 4355                         break;
 4356                 }
 4357 
 4358                 /* locate pmc descriptor */
 4359                 if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0)
 4360                         break;
 4361 
 4362                 /* Can't read a PMC that hasn't been started. */
 4363                 if (pm->pm_state != PMC_STATE_ALLOCATED &&
 4364                     pm->pm_state != PMC_STATE_STOPPED &&
 4365                     pm->pm_state != PMC_STATE_RUNNING) {
 4366                         error = EINVAL;
 4367                         break;
 4368                 }
 4369 
 4370                 /* writing a new value is allowed only for 'STOPPED' pmcs */
 4371                 if (pm->pm_state == PMC_STATE_RUNNING &&
 4372                     (prw.pm_flags & PMC_F_NEWVALUE)) {
 4373                         error = EBUSY;
 4374                         break;
 4375                 }
 4376 
 4377                 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
 4378 
 4379                         /*
 4380                          * If this PMC is attached to its owner (i.e.,
 4381                          * the process requesting this operation) and
 4382                          * is running, then attempt to get an
 4383                          * upto-date reading from hardware for a READ.
 4384                          * Writes are only allowed when the PMC is
 4385                          * stopped, so only update the saved value
 4386                          * field.
 4387                          *
 4388                          * If the PMC is not running, or is not
 4389                          * attached to its owner, read/write to the
 4390                          * savedvalue field.
 4391                          */
 4392 
 4393                         ri = PMC_TO_ROWINDEX(pm);
 4394                         pcd = pmc_ri_to_classdep(md, ri, &adjri);
 4395 
 4396                         mtx_pool_lock_spin(pmc_mtxpool, pm);
 4397                         cpu = curthread->td_oncpu;
 4398 
 4399                         if (prw.pm_flags & PMC_F_OLDVALUE) {
 4400                                 if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) &&
 4401                                     (pm->pm_state == PMC_STATE_RUNNING))
 4402                                         error = (*pcd->pcd_read_pmc)(cpu, adjri,
 4403                                             &oldvalue);
 4404                                 else
 4405                                         oldvalue = pm->pm_gv.pm_savedvalue;
 4406                         }
 4407                         if (prw.pm_flags & PMC_F_NEWVALUE)
 4408                                 pm->pm_gv.pm_savedvalue = prw.pm_value;
 4409 
 4410                         mtx_pool_unlock_spin(pmc_mtxpool, pm);
 4411 
 4412                 } else { /* System mode PMCs */
 4413                         cpu = PMC_TO_CPU(pm);
 4414                         ri  = PMC_TO_ROWINDEX(pm);
 4415                         pcd = pmc_ri_to_classdep(md, ri, &adjri);
 4416 
 4417                         if (!pmc_cpu_is_active(cpu)) {
 4418                                 error = ENXIO;
 4419                                 break;
 4420                         }
 4421 
 4422                         /* move this thread to CPU 'cpu' */
 4423                         pmc_save_cpu_binding(&pb);
 4424                         pmc_select_cpu(cpu);
 4425 
 4426                         critical_enter();
 4427                         /* save old value */
 4428                         if (prw.pm_flags & PMC_F_OLDVALUE)
 4429                                 if ((error = (*pcd->pcd_read_pmc)(cpu, adjri,
 4430                                          &oldvalue)))
 4431                                         goto error;
 4432                         /* write out new value */
 4433                         if (prw.pm_flags & PMC_F_NEWVALUE)
 4434                                 error = (*pcd->pcd_write_pmc)(cpu, adjri,
 4435                                     prw.pm_value);
 4436                 error:
 4437                         critical_exit();
 4438                         pmc_restore_cpu_binding(&pb);
 4439                         if (error)
 4440                                 break;
 4441                 }
 4442 
 4443                 pprw = (struct pmc_op_pmcrw *) arg;
 4444 
 4445 #ifdef  HWPMC_DEBUG
 4446                 if (prw.pm_flags & PMC_F_NEWVALUE)
 4447                         PMCDBG3(PMC,OPS,2, "rw id=%d new %jx -> old %jx",
 4448                             ri, prw.pm_value, oldvalue);
 4449                 else if (prw.pm_flags & PMC_F_OLDVALUE)
 4450                         PMCDBG2(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue);
 4451 #endif
 4452 
 4453                 /* return old value if requested */
 4454                 if (prw.pm_flags & PMC_F_OLDVALUE)
 4455                         if ((error = copyout(&oldvalue, &pprw->pm_value,
 4456                                  sizeof(prw.pm_value))))
 4457                                 break;
 4458 
 4459         }
 4460         break;
 4461 
 4462 
 4463         /*
 4464          * Set the sampling rate for a sampling mode PMC and the
 4465          * initial count for a counting mode PMC.
 4466          */
 4467 
 4468         case PMC_OP_PMCSETCOUNT:
 4469         {
 4470                 struct pmc *pm;
 4471                 struct pmc_op_pmcsetcount sc;
 4472 
 4473                 PMC_DOWNGRADE_SX();
 4474 
 4475                 if ((error = copyin(arg, &sc, sizeof(sc))) != 0)
 4476                         break;
 4477 
 4478                 if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0)
 4479                         break;
 4480 
 4481                 if (pm->pm_state == PMC_STATE_RUNNING) {
 4482                         error = EBUSY;
 4483                         break;
 4484                 }
 4485 
 4486                 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
 4487                         /*
 4488                          * Don't permit requested sample rate to be
 4489                          * less than pmc_mincount.
 4490                          */
 4491                         if (sc.pm_count < MAX(1, pmc_mincount))
 4492                                 log(LOG_WARNING, "pmcsetcount: passed sample "
 4493                                     "rate %ju - setting to %u\n",
 4494                                     (uintmax_t)sc.pm_count,
 4495                                     MAX(1, pmc_mincount));
 4496                         pm->pm_sc.pm_reloadcount = MAX(MAX(1, pmc_mincount),
 4497                             sc.pm_count);
 4498                 } else
 4499                         pm->pm_sc.pm_initial = sc.pm_count;
 4500         }
 4501         break;
 4502 
 4503 
 4504         /*
 4505          * Start a PMC.
 4506          */
 4507 
 4508         case PMC_OP_PMCSTART:
 4509         {
 4510                 pmc_id_t pmcid;
 4511                 struct pmc *pm;
 4512                 struct pmc_op_simple sp;
 4513 
 4514                 sx_assert(&pmc_sx, SX_XLOCKED);
 4515 
 4516                 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
 4517                         break;
 4518 
 4519                 pmcid = sp.pm_pmcid;
 4520 
 4521                 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
 4522                         break;
 4523 
 4524                 KASSERT(pmcid == pm->pm_id,
 4525                     ("[pmc,%d] pmcid %x != id %x", __LINE__,
 4526                         pm->pm_id, pmcid));
 4527 
 4528                 if (pm->pm_state == PMC_STATE_RUNNING) /* already running */
 4529                         break;
 4530                 else if (pm->pm_state != PMC_STATE_STOPPED &&
 4531                     pm->pm_state != PMC_STATE_ALLOCATED) {
 4532                         error = EINVAL;
 4533                         break;
 4534                 }
 4535 
 4536                 error = pmc_start(pm);
 4537         }
 4538         break;
 4539 
 4540 
 4541         /*
 4542          * Stop a PMC.
 4543          */
 4544 
 4545         case PMC_OP_PMCSTOP:
 4546         {
 4547                 pmc_id_t pmcid;
 4548                 struct pmc *pm;
 4549                 struct pmc_op_simple sp;
 4550 
 4551                 PMC_DOWNGRADE_SX();
 4552 
 4553                 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
 4554                         break;
 4555 
 4556                 pmcid = sp.pm_pmcid;
 4557 
 4558                 /*
 4559                  * Mark the PMC as inactive and invoke the MD stop
 4560                  * routines if needed.
 4561                  */
 4562 
 4563                 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
 4564                         break;
 4565 
 4566                 KASSERT(pmcid == pm->pm_id,
 4567                     ("[pmc,%d] pmc id %x != pmcid %x", __LINE__,
 4568                         pm->pm_id, pmcid));
 4569 
 4570                 if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */
 4571                         break;
 4572                 else if (pm->pm_state != PMC_STATE_RUNNING) {
 4573                         error = EINVAL;
 4574                         break;
 4575                 }
 4576 
 4577                 error = pmc_stop(pm);
 4578         }
 4579         break;
 4580 
 4581 
 4582         /*
 4583          * Write a user supplied value to the log file.
 4584          */
 4585 
 4586         case PMC_OP_WRITELOG:
 4587         {
 4588                 struct pmc_op_writelog wl;
 4589                 struct pmc_owner *po;
 4590 
 4591                 PMC_DOWNGRADE_SX();
 4592 
 4593                 if ((error = copyin(arg, &wl, sizeof(wl))) != 0)
 4594                         break;
 4595 
 4596                 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
 4597                         error = EINVAL;
 4598                         break;
 4599                 }
 4600 
 4601                 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
 4602                         error = EINVAL;
 4603                         break;
 4604                 }
 4605 
 4606                 error = pmclog_process_userlog(po, &wl);
 4607         }
 4608         break;
 4609 
 4610 
 4611         default:
 4612                 error = EINVAL;
 4613                 break;
 4614         }
 4615 
 4616         if (is_sx_downgraded)
 4617                 sx_sunlock(&pmc_sx);
 4618         else
 4619                 sx_xunlock(&pmc_sx);
 4620 done_syscall:
 4621         if (error)
 4622                 counter_u64_add(pmc_stats.pm_syscall_errors, 1);
 4623 
 4624         return (error);
 4625 }
 4626 
 4627 /*
 4628  * Helper functions
 4629  */
 4630 
 4631 
 4632 /*
 4633  * Mark the thread as needing callchain capture and post an AST.  The
 4634  * actual callchain capture will be done in a context where it is safe
 4635  * to take page faults.
 4636  */
 4637 
 4638 static void
 4639 pmc_post_callchain_callback(void)
 4640 {
 4641         struct thread *td;
 4642 
 4643         td = curthread;
 4644 
 4645         /*
 4646          * If there is multiple PMCs for the same interrupt ignore new post
 4647          */
 4648         if (td->td_pflags & TDP_CALLCHAIN)
 4649                 return;
 4650 
 4651         /*
 4652          * Mark this thread as needing callchain capture.
 4653          * `td->td_pflags' will be safe to touch because this thread
 4654          * was in user space when it was interrupted.
 4655          */
 4656         td->td_pflags |= TDP_CALLCHAIN;
 4657 
 4658         /*
 4659          * Don't let this thread migrate between CPUs until callchain
 4660          * capture completes.
 4661          */
 4662         sched_pin();
 4663 
 4664         return;
 4665 }
 4666 
 4667 /*
 4668  * Find a free slot in the per-cpu array of samples and capture the
 4669  * current callchain there.  If a sample was successfully added, a bit
 4670  * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
 4671  * needs to be invoked from the clock handler.
 4672  *
 4673  * This function is meant to be called from an NMI handler.  It cannot
 4674  * use any of the locking primitives supplied by the OS.
 4675  */
 4676 
 4677 static int
 4678 pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf)
 4679 {
 4680         int error, cpu, callchaindepth, inuserspace;
 4681         struct thread *td;
 4682         struct pmc_sample *ps;
 4683         struct pmc_samplebuffer *psb;
 4684 
 4685         error = 0;
 4686 
 4687         /*
 4688          * Allocate space for a sample buffer.
 4689          */
 4690         cpu = curcpu;
 4691         psb = pmc_pcpu[cpu]->pc_sb[ring];
 4692         inuserspace = TRAPF_USERMODE(tf);
 4693         ps = PMC_PROD_SAMPLE(psb);
 4694         if (psb->ps_considx != psb->ps_prodidx &&
 4695                 ps->ps_nsamples) {      /* in use, reader hasn't caught up */
 4696                 pm->pm_pcpu_state[cpu].pps_stalled = 1;
 4697                 counter_u64_add(pmc_stats.pm_intr_bufferfull, 1);
 4698                 PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
 4699                     cpu, pm, (void *) tf, inuserspace,
 4700                     (int) (psb->ps_prodidx & pmc_sample_mask),
 4701                     (int) (psb->ps_considx & pmc_sample_mask));
 4702                 callchaindepth = 1;
 4703                 error = ENOMEM;
 4704                 goto done;
 4705         }
 4706 
 4707         /* Fill in entry. */
 4708         PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
 4709             (void *) tf, inuserspace,
 4710             (int) (psb->ps_prodidx & pmc_sample_mask),
 4711             (int) (psb->ps_considx & pmc_sample_mask));
 4712 
 4713         td = curthread;
 4714         ps->ps_pmc = pm;
 4715         ps->ps_td = td;
 4716         ps->ps_pid = td->td_proc->p_pid;
 4717         ps->ps_tid = td->td_tid;
 4718         ps->ps_tsc = pmc_rdtsc();
 4719         ps->ps_ticks = ticks;
 4720         ps->ps_cpu = cpu;
 4721         ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
 4722 
 4723         callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
 4724             pmc_callchaindepth : 1;
 4725 
 4726         MPASS(ps->ps_pc != NULL);
 4727         if (callchaindepth == 1)
 4728                 ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
 4729         else {
 4730                 /*
 4731                  * Kernel stack traversals can be done immediately,
 4732                  * while we defer to an AST for user space traversals.
 4733                  */
 4734                 if (!inuserspace) {
 4735                         callchaindepth =
 4736                             pmc_save_kernel_callchain(ps->ps_pc,
 4737                                 callchaindepth, tf);
 4738                 } else {
 4739                         pmc_post_callchain_callback();
 4740                         callchaindepth = PMC_USER_CALLCHAIN_PENDING;
 4741                 }
 4742         }
 4743 
 4744         ps->ps_nsamples = callchaindepth;       /* mark entry as in use */
 4745         if (ring == PMC_UR) {
 4746                 ps->ps_nsamples_actual = callchaindepth;        /* mark entry as in use */
 4747                 ps->ps_nsamples = PMC_USER_CALLCHAIN_PENDING;
 4748         } else
 4749                 ps->ps_nsamples = callchaindepth;       /* mark entry as in use */
 4750 
 4751         KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
 4752             ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
 4753                  (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 4754 
 4755         counter_u64_add(pm->pm_runcount, 1);    /* hold onto PMC */
 4756         /* increment write pointer */
 4757         psb->ps_prodidx++;
 4758  done:
 4759         /* mark CPU as needing processing */
 4760         if (callchaindepth != PMC_USER_CALLCHAIN_PENDING)
 4761                 DPCPU_SET(pmc_sampled, 1);
 4762 
 4763         return (error);
 4764 }
 4765 
 4766 /*
 4767  * Interrupt processing.
 4768  *
 4769  * This function is meant to be called from an NMI handler.  It cannot
 4770  * use any of the locking primitives supplied by the OS.
 4771  */
 4772 
 4773 int
 4774 pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf)
 4775 {
 4776         struct thread *td;
 4777 
 4778         td = curthread;
 4779         if ((pm->pm_flags & PMC_F_USERCALLCHAIN) &&
 4780             (td->td_proc->p_flag & P_KPROC) == 0 &&
 4781             !TRAPF_USERMODE(tf)) {
 4782                 atomic_add_int(&td->td_pmcpend, 1);
 4783                 return (pmc_add_sample(PMC_UR, pm, tf));
 4784         }
 4785         return (pmc_add_sample(ring, pm, tf));
 4786 }
 4787 
 4788 /*
 4789  * Capture a user call chain.  This function will be called from ast()
 4790  * before control returns to userland and before the process gets
 4791  * rescheduled.
 4792  */
 4793 
 4794 static void
 4795 pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
 4796 {
 4797         struct pmc *pm;
 4798         struct thread *td;
 4799         struct pmc_sample *ps;
 4800         struct pmc_samplebuffer *psb;
 4801         uint64_t considx, prodidx;
 4802         int nsamples, nrecords, pass, iter;
 4803 #ifdef  INVARIANTS
 4804         int start_ticks = ticks;
 4805 #endif
 4806         psb = pmc_pcpu[cpu]->pc_sb[ring];
 4807         td = curthread;
 4808 
 4809         KASSERT(td->td_pflags & TDP_CALLCHAIN,
 4810             ("[pmc,%d] Retrieving callchain for thread that doesn't want it",
 4811                 __LINE__));
 4812 
 4813         nrecords = INT_MAX;
 4814         pass = 0;
 4815  restart:
 4816         if (ring == PMC_UR)
 4817                 nrecords = atomic_readandclear_32(&td->td_pmcpend);
 4818 
 4819         for (iter = 0, considx = psb->ps_considx, prodidx = psb->ps_prodidx;
 4820             considx < prodidx && iter < pmc_nsamples; considx++, iter++) {
 4821                 ps = PMC_CONS_SAMPLE_OFF(psb, considx);
 4822 
 4823         /*
 4824          * Iterate through all deferred callchain requests.
 4825          * Walk from the current read pointer to the current
 4826          * write pointer.
 4827          */
 4828 
 4829 #ifdef  INVARIANTS
 4830                 if (ps->ps_nsamples == PMC_SAMPLE_FREE) {
 4831                         continue;
 4832                 }
 4833 #endif
 4834                 if (ps->ps_td != td ||
 4835                    ps->ps_nsamples != PMC_USER_CALLCHAIN_PENDING ||
 4836                    ps->ps_pmc->pm_state != PMC_STATE_RUNNING)
 4837                         continue;
 4838 
 4839                 KASSERT(ps->ps_cpu == cpu,
 4840                     ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__,
 4841                         ps->ps_cpu, PCPU_GET(cpuid)));
 4842 
 4843                 pm = ps->ps_pmc;
 4844 
 4845                 KASSERT(pm->pm_flags & PMC_F_CALLCHAIN,
 4846                     ("[pmc,%d] Retrieving callchain for PMC that doesn't "
 4847                         "want it", __LINE__));
 4848 
 4849                 KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
 4850                     ("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 4851 
 4852                 if (ring == PMC_UR) {
 4853                         nsamples = ps->ps_nsamples_actual;
 4854                         counter_u64_add(pmc_stats.pm_merges, 1);
 4855                 } else
 4856                         nsamples = 0;
 4857 
 4858                 /*
 4859                  * Retrieve the callchain and mark the sample buffer
 4860                  * as 'processable' by the timer tick sweep code.
 4861                  */
 4862 
 4863                 if (__predict_true(nsamples < pmc_callchaindepth - 1))
 4864                         nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples,
 4865                        pmc_callchaindepth - nsamples - 1, tf);
 4866 
 4867                 /*
 4868                  * We have to prevent hardclock from potentially overwriting
 4869                  * this sample between when we read the value and when we set
 4870                  * it
 4871                  */
 4872                 spinlock_enter();
 4873                 /*
 4874                  * Verify that the sample hasn't been dropped in the meantime
 4875                  */
 4876                 if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) {
 4877                         ps->ps_nsamples = nsamples;
 4878                         /*
 4879                          * If we couldn't get a sample, simply drop the reference
 4880                          */
 4881                         if (nsamples == 0)
 4882                                 counter_u64_add(pm->pm_runcount, -1);
 4883                 }
 4884                 spinlock_exit();
 4885                 if (nrecords-- == 1)
 4886                         break;
 4887         }
 4888         if (__predict_false(ring == PMC_UR && td->td_pmcpend)) {
 4889                 if (pass == 0) {
 4890                         pass = 1;
 4891                         goto restart;
 4892                 }
 4893                 /* only collect samples for this part once */
 4894                 td->td_pmcpend = 0;
 4895         }
 4896 
 4897 #ifdef INVARIANTS
 4898         if ((ticks - start_ticks) > hz)
 4899                 log(LOG_ERR, "%s took %d ticks\n", __func__, (ticks - start_ticks));
 4900 #endif
 4901 
 4902         /* mark CPU as needing processing */
 4903         DPCPU_SET(pmc_sampled, 1);
 4904 }
 4905 
 4906 /*
 4907  * Process saved PC samples.
 4908  */
 4909 
 4910 static void
 4911 pmc_process_samples(int cpu, ring_type_t ring)
 4912 {
 4913         struct pmc *pm;
 4914         int adjri, n;
 4915         struct thread *td;
 4916         struct pmc_owner *po;
 4917         struct pmc_sample *ps;
 4918         struct pmc_classdep *pcd;
 4919         struct pmc_samplebuffer *psb;
 4920         uint64_t delta __diagused;
 4921 
 4922         KASSERT(PCPU_GET(cpuid) == cpu,
 4923             ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__,
 4924                 PCPU_GET(cpuid), cpu));
 4925 
 4926         psb = pmc_pcpu[cpu]->pc_sb[ring];
 4927         delta = psb->ps_prodidx - psb->ps_considx;
 4928         MPASS(delta <= pmc_nsamples);
 4929         MPASS(psb->ps_considx <= psb->ps_prodidx);
 4930         for (n = 0; psb->ps_considx < psb->ps_prodidx; psb->ps_considx++, n++) {
 4931                 ps = PMC_CONS_SAMPLE(psb);
 4932 
 4933                 if (__predict_false(ps->ps_nsamples == PMC_SAMPLE_FREE))
 4934                         continue;
 4935                 pm = ps->ps_pmc;
 4936                 /* skip non-running samples */
 4937                 if (pm->pm_state != PMC_STATE_RUNNING)
 4938                         goto entrydone;
 4939 
 4940                 KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
 4941                     ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
 4942                          (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 4943 
 4944                 po = pm->pm_owner;
 4945 
 4946                 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
 4947                     ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__,
 4948                         pm, PMC_TO_MODE(pm)));
 4949 
 4950 
 4951                 /* If there is a pending AST wait for completion */
 4952                 if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) {
 4953                         /* if we've been waiting more than 1 tick to 
 4954                          * collect a callchain for this record then
 4955                          * drop it and move on.
 4956                          */
 4957                         if (ticks - ps->ps_ticks > 1) {
 4958                                 /*
 4959                                  * track how often we hit this as it will
 4960                                  * preferentially lose user samples
 4961                                  * for long running system calls 
 4962                                  */
 4963                                 counter_u64_add(pmc_stats.pm_overwrites, 1);
 4964                                 goto entrydone;
 4965                         }
 4966                         /* Need a rescan at a later time. */
 4967                         DPCPU_SET(pmc_sampled, 1);
 4968                         break;
 4969                 }
 4970 
 4971                 PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
 4972                     pm, ps->ps_nsamples, ps->ps_flags,
 4973                     (int) (psb->ps_prodidx & pmc_sample_mask),
 4974                     (int) (psb->ps_considx & pmc_sample_mask));
 4975 
 4976                 /*
 4977                  * If this is a process-mode PMC that is attached to
 4978                  * its owner, and if the PC is in user mode, update
 4979                  * profiling statistics like timer-based profiling
 4980                  * would have done.
 4981                  *
 4982                  * Otherwise, this is either a sampling-mode PMC that
 4983                  * is attached to a different process than its owner,
 4984                  * or a system-wide sampling PMC. Dispatch a log
 4985                  * entry to the PMC's owner process.
 4986                  */
 4987                 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
 4988                         if (ps->ps_flags & PMC_CC_F_USERSPACE) {
 4989                                 td = FIRST_THREAD_IN_PROC(po->po_owner);
 4990                                 addupc_intr(td, ps->ps_pc[0], 1);
 4991                         }
 4992                 } else
 4993                         pmclog_process_callchain(pm, ps);
 4994 
 4995         entrydone:
 4996                 ps->ps_nsamples = 0; /* mark entry as free */
 4997                 KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
 4998                                 ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
 4999                                  (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 5000 
 5001                 counter_u64_add(pm->pm_runcount, -1);
 5002         }
 5003 
 5004         counter_u64_add(pmc_stats.pm_log_sweeps, 1);
 5005 
 5006         /* Do not re-enable stalled PMCs if we failed to process any samples */
 5007         if (n == 0)
 5008                 return;
 5009 
 5010         /*
 5011          * Restart any stalled sampling PMCs on this CPU.
 5012          *
 5013          * If the NMI handler sets the pm_stalled field of a PMC after
 5014          * the check below, we'll end up processing the stalled PMC at
 5015          * the next hardclock tick.
 5016          */
 5017         for (n = 0; n < md->pmd_npmc; n++) {
 5018                 pcd = pmc_ri_to_classdep(md, n, &adjri);
 5019                 KASSERT(pcd != NULL,
 5020                     ("[pmc,%d] null pcd ri=%d", __LINE__, n));
 5021                 (void) (*pcd->pcd_get_config)(cpu,adjri,&pm);
 5022 
 5023                 if (pm == NULL ||                        /* !cfg'ed */
 5024                     pm->pm_state != PMC_STATE_RUNNING || /* !active */
 5025                     !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */
 5026                         !pm->pm_pcpu_state[cpu].pps_cpustate  || /* !desired */
 5027                     !pm->pm_pcpu_state[cpu].pps_stalled) /* !stalled */
 5028                         continue;
 5029 
 5030                 pm->pm_pcpu_state[cpu].pps_stalled = 0;
 5031                 (*pcd->pcd_start_pmc)(cpu, adjri);
 5032         }
 5033 }
 5034 
 5035 /*
 5036  * Event handlers.
 5037  */
 5038 
 5039 /*
 5040  * Handle a process exit.
 5041  *
 5042  * Remove this process from all hash tables.  If this process
 5043  * owned any PMCs, turn off those PMCs and deallocate them,
 5044  * removing any associations with target processes.
 5045  *
 5046  * This function will be called by the last 'thread' of a
 5047  * process.
 5048  *
 5049  * XXX This eventhandler gets called early in the exit process.
 5050  * Consider using a 'hook' invocation from thread_exit() or equivalent
 5051  * spot.  Another negative is that kse_exit doesn't seem to call
 5052  * exit1() [??].
 5053  *
 5054  */
 5055 
 5056 static void
 5057 pmc_process_exit(void *arg __unused, struct proc *p)
 5058 {
 5059         struct pmc *pm;
 5060         int adjri, cpu;
 5061         unsigned int ri;
 5062         int is_using_hwpmcs;
 5063         struct pmc_owner *po;
 5064         struct pmc_process *pp;
 5065         struct pmc_classdep *pcd;
 5066         pmc_value_t newvalue, tmp;
 5067 
 5068         PROC_LOCK(p);
 5069         is_using_hwpmcs = p->p_flag & P_HWPMC;
 5070         PROC_UNLOCK(p);
 5071 
 5072         /*
 5073          * Log a sysexit event to all SS PMC owners.
 5074          */
 5075         PMC_EPOCH_ENTER();
 5076         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 5077             if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 5078                     pmclog_process_sysexit(po, p->p_pid);
 5079         PMC_EPOCH_EXIT();
 5080 
 5081         if (!is_using_hwpmcs)
 5082                 return;
 5083 
 5084         PMC_GET_SX_XLOCK();
 5085         PMCDBG3(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid,
 5086             p->p_comm);
 5087 
 5088         /*
 5089          * Since this code is invoked by the last thread in an exiting
 5090          * process, we would have context switched IN at some prior
 5091          * point.  However, with PREEMPTION, kernel mode context
 5092          * switches may happen any time, so we want to disable a
 5093          * context switch OUT till we get any PMCs targeting this
 5094          * process off the hardware.
 5095          *
 5096          * We also need to atomically remove this process'
 5097          * entry from our target process hash table, using
 5098          * PMC_FLAG_REMOVE.
 5099          */
 5100         PMCDBG3(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid,
 5101             p->p_comm);
 5102 
 5103         critical_enter(); /* no preemption */
 5104 
 5105         cpu = curthread->td_oncpu;
 5106 
 5107         if ((pp = pmc_find_process_descriptor(p,
 5108                  PMC_FLAG_REMOVE)) != NULL) {
 5109 
 5110                 PMCDBG2(PRC,EXT,2,
 5111                     "process-exit proc=%p pmc-process=%p", p, pp);
 5112 
 5113                 /*
 5114                  * The exiting process could the target of
 5115                  * some PMCs which will be running on
 5116                  * currently executing CPU.
 5117                  *
 5118                  * We need to turn these PMCs off like we
 5119                  * would do at context switch OUT time.
 5120                  */
 5121                 for (ri = 0; ri < md->pmd_npmc; ri++) {
 5122 
 5123                         /*
 5124                          * Pick up the pmc pointer from hardware
 5125                          * state similar to the CSW_OUT code.
 5126                          */
 5127                         pm = NULL;
 5128 
 5129                         pcd = pmc_ri_to_classdep(md, ri, &adjri);
 5130 
 5131                         (void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
 5132 
 5133                         PMCDBG2(PRC,EXT,2, "ri=%d pm=%p", ri, pm);
 5134 
 5135                         if (pm == NULL ||
 5136                             !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
 5137                                 continue;
 5138 
 5139                         PMCDBG4(PRC,EXT,2, "ppmcs[%d]=%p pm=%p "
 5140                             "state=%d", ri, pp->pp_pmcs[ri].pp_pmc,
 5141                             pm, pm->pm_state);
 5142 
 5143                         KASSERT(PMC_TO_ROWINDEX(pm) == ri,
 5144                             ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
 5145                                 __LINE__, PMC_TO_ROWINDEX(pm), ri));
 5146 
 5147                         KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
 5148                             ("[pmc,%d] pm %p != pp_pmcs[%d] %p",
 5149                                 __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc));
 5150 
 5151                         KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
 5152                             ("[pmc,%d] bad runcount ri %d rc %ld",
 5153                                  __LINE__, ri, (unsigned long)counter_u64_fetch(pm->pm_runcount)));
 5154 
 5155                         /*
 5156                          * Change desired state, and then stop if not
 5157                          * stalled. This two-step dance should avoid
 5158                          * race conditions where an interrupt re-enables
 5159                          * the PMC after this code has already checked
 5160                          * the pm_stalled flag.
 5161                          */
 5162                         if (pm->pm_pcpu_state[cpu].pps_cpustate) {
 5163                                 pm->pm_pcpu_state[cpu].pps_cpustate = 0;
 5164                                 if (!pm->pm_pcpu_state[cpu].pps_stalled) {
 5165                                         (void) pcd->pcd_stop_pmc(cpu, adjri);
 5166 
 5167                                         if (PMC_TO_MODE(pm) == PMC_MODE_TC) {
 5168                                                 pcd->pcd_read_pmc(cpu, adjri,
 5169                                                     &newvalue);
 5170                                                 tmp = newvalue -
 5171                                                     PMC_PCPU_SAVED(cpu,ri);
 5172 
 5173                                                 mtx_pool_lock_spin(pmc_mtxpool,
 5174                                                     pm);
 5175                                                 pm->pm_gv.pm_savedvalue += tmp;
 5176                                                 pp->pp_pmcs[ri].pp_pmcval +=
 5177                                                     tmp;
 5178                                                 mtx_pool_unlock_spin(
 5179                                                     pmc_mtxpool, pm);
 5180                                         }
 5181                                 }
 5182                         }
 5183 
 5184                         KASSERT((int64_t) counter_u64_fetch(pm->pm_runcount) > 0,
 5185                             ("[pmc,%d] runcount is %d", __LINE__, ri));
 5186 
 5187                         counter_u64_add(pm->pm_runcount, -1);
 5188 
 5189                         (void) pcd->pcd_config_pmc(cpu, adjri, NULL);
 5190                 }
 5191 
 5192                 /*
 5193                  * Inform the MD layer of this pseudo "context switch
 5194                  * out"
 5195                  */
 5196                 (void) md->pmd_switch_out(pmc_pcpu[cpu], pp);
 5197 
 5198                 critical_exit(); /* ok to be pre-empted now */
 5199 
 5200                 /*
 5201                  * Unlink this process from the PMCs that are
 5202                  * targeting it.  This will send a signal to
 5203                  * all PMC owner's whose PMCs are orphaned.
 5204                  *
 5205                  * Log PMC value at exit time if requested.
 5206                  */
 5207                 for (ri = 0; ri < md->pmd_npmc; ri++)
 5208                         if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
 5209                                 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
 5210                                     PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)))
 5211                                         pmclog_process_procexit(pm, pp);
 5212                                 pmc_unlink_target_process(pm, pp);
 5213                         }
 5214                 free(pp, M_PMC);
 5215 
 5216         } else
 5217                 critical_exit(); /* pp == NULL */
 5218 
 5219 
 5220         /*
 5221          * If the process owned PMCs, free them up and free up
 5222          * memory.
 5223          */
 5224         if ((po = pmc_find_owner_descriptor(p)) != NULL) {
 5225                 pmc_remove_owner(po);
 5226                 pmc_destroy_owner_descriptor(po);
 5227         }
 5228 
 5229         sx_xunlock(&pmc_sx);
 5230 }
 5231 
 5232 /*
 5233  * Handle a process fork.
 5234  *
 5235  * If the parent process 'p1' is under HWPMC monitoring, then copy
 5236  * over any attached PMCs that have 'do_descendants' semantics.
 5237  */
 5238 
 5239 static void
 5240 pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc,
 5241     int flags)
 5242 {
 5243         int is_using_hwpmcs;
 5244         unsigned int ri;
 5245         uint32_t do_descendants;
 5246         struct pmc *pm;
 5247         struct pmc_owner *po;
 5248         struct pmc_process *ppnew, *ppold;
 5249 
 5250         (void) flags;           /* unused parameter */
 5251 
 5252         PROC_LOCK(p1);
 5253         is_using_hwpmcs = p1->p_flag & P_HWPMC;
 5254         PROC_UNLOCK(p1);
 5255 
 5256         /*
 5257          * If there are system-wide sampling PMCs active, we need to
 5258          * log all fork events to their owner's logs.
 5259          */
 5260         PMC_EPOCH_ENTER();
 5261         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 5262             if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
 5263                     pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
 5264                         pmclog_process_proccreate(po, newproc, 1);
 5265                 }
 5266         PMC_EPOCH_EXIT();
 5267 
 5268         if (!is_using_hwpmcs)
 5269                 return;
 5270 
 5271         PMC_GET_SX_XLOCK();
 5272         PMCDBG4(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1,
 5273             p1->p_pid, p1->p_comm, newproc);
 5274 
 5275         /*
 5276          * If the parent process (curthread->td_proc) is a
 5277          * target of any PMCs, look for PMCs that are to be
 5278          * inherited, and link these into the new process
 5279          * descriptor.
 5280          */
 5281         if ((ppold = pmc_find_process_descriptor(curthread->td_proc,
 5282                  PMC_FLAG_NONE)) == NULL)
 5283                 goto done;              /* nothing to do */
 5284 
 5285         do_descendants = 0;
 5286         for (ri = 0; ri < md->pmd_npmc; ri++)
 5287                 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL)
 5288                         do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS;
 5289         if (do_descendants == 0) /* nothing to do */
 5290                 goto done;
 5291 
 5292         /*
 5293          * Now mark the new process as being tracked by this driver.
 5294          */
 5295         PROC_LOCK(newproc);
 5296         newproc->p_flag |= P_HWPMC;
 5297         PROC_UNLOCK(newproc);
 5298 
 5299         /* allocate a descriptor for the new process  */
 5300         if ((ppnew = pmc_find_process_descriptor(newproc,
 5301                  PMC_FLAG_ALLOCATE)) == NULL)
 5302                 goto done;
 5303 
 5304         /*
 5305          * Run through all PMCs that were targeting the old process
 5306          * and which specified F_DESCENDANTS and attach them to the
 5307          * new process.
 5308          *
 5309          * Log the fork event to all owners of PMCs attached to this
 5310          * process, if not already logged.
 5311          */
 5312         for (ri = 0; ri < md->pmd_npmc; ri++)
 5313                 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL &&
 5314                     (pm->pm_flags & PMC_F_DESCENDANTS)) {
 5315                         pmc_link_target_process(pm, ppnew);
 5316                         po = pm->pm_owner;
 5317                         if (po->po_sscount == 0 &&
 5318                             po->po_flags & PMC_PO_OWNS_LOGFILE)
 5319                                 pmclog_process_procfork(po, p1->p_pid,
 5320                                     newproc->p_pid);
 5321                 }
 5322 
 5323  done:
 5324         sx_xunlock(&pmc_sx);
 5325 }
 5326 
 5327 static void
 5328 pmc_process_threadcreate(struct thread *td)
 5329 {
 5330         struct pmc_owner *po;
 5331 
 5332         PMC_EPOCH_ENTER();
 5333         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 5334             if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 5335                         pmclog_process_threadcreate(po, td, 1);
 5336         PMC_EPOCH_EXIT();
 5337 }
 5338 
 5339 static void
 5340 pmc_process_threadexit(struct thread *td)
 5341 {
 5342         struct pmc_owner *po;
 5343 
 5344         PMC_EPOCH_ENTER();
 5345         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 5346             if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 5347                         pmclog_process_threadexit(po, td);
 5348         PMC_EPOCH_EXIT();
 5349 }
 5350 
 5351 static void
 5352 pmc_process_proccreate(struct proc *p)
 5353 {
 5354         struct pmc_owner *po;
 5355 
 5356         PMC_EPOCH_ENTER();
 5357         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 5358             if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 5359                         pmclog_process_proccreate(po, p, 1 /* sync */);
 5360         PMC_EPOCH_EXIT();
 5361 }
 5362 
 5363 static void
 5364 pmc_process_allproc(struct pmc *pm)
 5365 {
 5366         struct pmc_owner *po;
 5367         struct thread *td;
 5368         struct proc *p;
 5369 
 5370         po = pm->pm_owner;
 5371         if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
 5372                 return;
 5373         sx_slock(&allproc_lock);
 5374         FOREACH_PROC_IN_SYSTEM(p) {
 5375                 pmclog_process_proccreate(po, p, 0 /* sync */);
 5376                 PROC_LOCK(p);
 5377                 FOREACH_THREAD_IN_PROC(p, td)
 5378                         pmclog_process_threadcreate(po, td, 0 /* sync */);
 5379                 PROC_UNLOCK(p);
 5380         }
 5381         sx_sunlock(&allproc_lock);
 5382         pmclog_flush(po, 0);
 5383 }
 5384 
 5385 static void
 5386 pmc_kld_load(void *arg __unused, linker_file_t lf)
 5387 {
 5388         struct pmc_owner *po;
 5389 
 5390         /*
 5391          * Notify owners of system sampling PMCs about KLD operations.
 5392          */
 5393         PMC_EPOCH_ENTER();
 5394         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 5395                 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 5396                         pmclog_process_map_in(po, (pid_t) -1,
 5397                             (uintfptr_t) lf->address, lf->filename);
 5398         PMC_EPOCH_EXIT();
 5399 
 5400         /*
 5401          * TODO: Notify owners of (all) process-sampling PMCs too.
 5402          */
 5403 }
 5404 
 5405 static void
 5406 pmc_kld_unload(void *arg __unused, const char *filename __unused,
 5407     caddr_t address, size_t size)
 5408 {
 5409         struct pmc_owner *po;
 5410 
 5411         PMC_EPOCH_ENTER();
 5412         CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 5413                 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 5414                         pmclog_process_map_out(po, (pid_t) -1,
 5415                             (uintfptr_t) address, (uintfptr_t) address + size);
 5416         PMC_EPOCH_EXIT();
 5417 
 5418         /*
 5419          * TODO: Notify owners of process-sampling PMCs.
 5420          */
 5421 }
 5422 
 5423 /*
 5424  * initialization
 5425  */
 5426 static const char *
 5427 pmc_name_of_pmcclass(enum pmc_class class)
 5428 {
 5429 
 5430         switch (class) {
 5431 #undef  __PMC_CLASS
 5432 #define __PMC_CLASS(S,V,D)                                              \
 5433         case PMC_CLASS_##S:                                             \
 5434                 return #S;
 5435         __PMC_CLASSES();
 5436         default:
 5437                 return ("<unknown>");
 5438         }
 5439 }
 5440 
 5441 /*
 5442  * Base class initializer: allocate structure and set default classes.
 5443  */
 5444 struct pmc_mdep *
 5445 pmc_mdep_alloc(int nclasses)
 5446 {
 5447         struct pmc_mdep *md;
 5448         int     n;
 5449 
 5450         /* SOFT + md classes */
 5451         n = 1 + nclasses;
 5452         md = malloc(sizeof(struct pmc_mdep) + n *
 5453             sizeof(struct pmc_classdep), M_PMC, M_WAITOK|M_ZERO);
 5454         md->pmd_nclass = n;
 5455 
 5456         /* Add base class. */
 5457         pmc_soft_initialize(md);
 5458         return md;
 5459 }
 5460 
 5461 void
 5462 pmc_mdep_free(struct pmc_mdep *md)
 5463 {
 5464         pmc_soft_finalize(md);
 5465         free(md, M_PMC);
 5466 }
 5467 
 5468 static int
 5469 generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
 5470 {
 5471         (void) pc; (void) pp;
 5472 
 5473         return (0);
 5474 }
 5475 
 5476 static int
 5477 generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
 5478 {
 5479         (void) pc; (void) pp;
 5480 
 5481         return (0);
 5482 }
 5483 
 5484 static struct pmc_mdep *
 5485 pmc_generic_cpu_initialize(void)
 5486 {
 5487         struct pmc_mdep *md;
 5488 
 5489         md = pmc_mdep_alloc(0);
 5490 
 5491         md->pmd_cputype    = PMC_CPU_GENERIC;
 5492 
 5493         md->pmd_pcpu_init  = NULL;
 5494         md->pmd_pcpu_fini  = NULL;
 5495         md->pmd_switch_in  = generic_switch_in;
 5496         md->pmd_switch_out = generic_switch_out;
 5497 
 5498         return (md);
 5499 }
 5500 
 5501 static void
 5502 pmc_generic_cpu_finalize(struct pmc_mdep *md)
 5503 {
 5504         (void) md;
 5505 }
 5506 
 5507 
 5508 static int
 5509 pmc_initialize(void)
 5510 {
 5511         int c, cpu, error, n, ri;
 5512         unsigned int maxcpu, domain;
 5513         struct pcpu *pc;
 5514         struct pmc_binding pb;
 5515         struct pmc_sample *ps;
 5516         struct pmc_classdep *pcd;
 5517         struct pmc_samplebuffer *sb;
 5518 
 5519         md = NULL;
 5520         error = 0;
 5521 
 5522         pmc_stats.pm_intr_ignored = counter_u64_alloc(M_WAITOK);
 5523         pmc_stats.pm_intr_processed = counter_u64_alloc(M_WAITOK);
 5524         pmc_stats.pm_intr_bufferfull = counter_u64_alloc(M_WAITOK);
 5525         pmc_stats.pm_syscalls = counter_u64_alloc(M_WAITOK);
 5526         pmc_stats.pm_syscall_errors = counter_u64_alloc(M_WAITOK);
 5527         pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK);
 5528         pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK);
 5529         pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK);
 5530         pmc_stats.pm_merges = counter_u64_alloc(M_WAITOK);
 5531         pmc_stats.pm_overwrites = counter_u64_alloc(M_WAITOK);
 5532 
 5533 #ifdef  HWPMC_DEBUG
 5534         /* parse debug flags first */
 5535         if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags",
 5536                 pmc_debugstr, sizeof(pmc_debugstr)))
 5537                 pmc_debugflags_parse(pmc_debugstr,
 5538                     pmc_debugstr+strlen(pmc_debugstr));
 5539 #endif
 5540 
 5541         PMCDBG1(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION);
 5542 
 5543         /* check kernel version */
 5544         if (pmc_kernel_version != PMC_VERSION) {
 5545                 if (pmc_kernel_version == 0)
 5546                         printf("hwpmc: this kernel has not been compiled with "
 5547                             "'options HWPMC_HOOKS'.\n");
 5548                 else
 5549                         printf("hwpmc: kernel version (0x%x) does not match "
 5550                             "module version (0x%x).\n", pmc_kernel_version,
 5551                             PMC_VERSION);
 5552                 return EPROGMISMATCH;
 5553         }
 5554 
 5555         /*
 5556          * check sysctl parameters
 5557          */
 5558 
 5559         if (pmc_hashsize <= 0) {
 5560                 (void) printf("hwpmc: tunable \"hashsize\"=%d must be "
 5561                     "greater than zero.\n", pmc_hashsize);
 5562                 pmc_hashsize = PMC_HASH_SIZE;
 5563         }
 5564 
 5565         if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
 5566                 (void) printf("hwpmc: tunable \"nsamples\"=%d out of "
 5567                     "range.\n", pmc_nsamples);
 5568                 pmc_nsamples = PMC_NSAMPLES;
 5569         }
 5570         pmc_sample_mask = pmc_nsamples-1;
 5571 
 5572         if (pmc_callchaindepth <= 0 ||
 5573             pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
 5574                 (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of "
 5575                     "range - using %d.\n", pmc_callchaindepth,
 5576                     PMC_CALLCHAIN_DEPTH_MAX);
 5577                 pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX;
 5578         }
 5579 
 5580         md = pmc_md_initialize();
 5581         if (md == NULL) {
 5582                 /* Default to generic CPU. */
 5583                 md = pmc_generic_cpu_initialize();
 5584                 if (md == NULL)
 5585                         return (ENOSYS);
 5586         }
 5587 
 5588         /*
 5589          * Refresh classes base ri. Optional classes may come in different
 5590          * order.
 5591          */
 5592         for (ri = c = 0; c < md->pmd_nclass; c++) {
 5593                 pcd = &md->pmd_classdep[c];
 5594                 pcd->pcd_ri = ri;
 5595                 ri += pcd->pcd_num;
 5596         }
 5597 
 5598         KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1,
 5599             ("[pmc,%d] no classes or pmcs", __LINE__));
 5600 
 5601         /* Compute the map from row-indices to classdep pointers. */
 5602         pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) *
 5603             md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO);
 5604 
 5605         for (n = 0; n < md->pmd_npmc; n++)
 5606                 pmc_rowindex_to_classdep[n] = NULL;
 5607         for (ri = c = 0; c < md->pmd_nclass; c++) {
 5608                 pcd = &md->pmd_classdep[c];
 5609                 for (n = 0; n < pcd->pcd_num; n++, ri++)
 5610                         pmc_rowindex_to_classdep[ri] = pcd;
 5611         }
 5612 
 5613         KASSERT(ri == md->pmd_npmc,
 5614             ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__,
 5615             ri, md->pmd_npmc));
 5616 
 5617         maxcpu = pmc_cpu_max();
 5618 
 5619         /* allocate space for the per-cpu array */
 5620         pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC,
 5621             M_WAITOK|M_ZERO);
 5622 
 5623         /* per-cpu 'saved values' for managing process-mode PMCs */
 5624         pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc,
 5625             M_PMC, M_WAITOK);
 5626 
 5627         /* Perform CPU-dependent initialization. */
 5628         pmc_save_cpu_binding(&pb);
 5629         error = 0;
 5630         for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) {
 5631                 if (!pmc_cpu_is_active(cpu))
 5632                         continue;
 5633                 pmc_select_cpu(cpu);
 5634                 pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) +
 5635                     md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC,
 5636                     M_WAITOK|M_ZERO);
 5637                 if (md->pmd_pcpu_init)
 5638                         error = md->pmd_pcpu_init(md, cpu);
 5639                 for (n = 0; error == 0 && n < md->pmd_nclass; n++)
 5640                         if (md->pmd_classdep[n].pcd_num > 0)
 5641                                 error = md->pmd_classdep[n].pcd_pcpu_init(md,
 5642                                     cpu);
 5643         }
 5644         pmc_restore_cpu_binding(&pb);
 5645 
 5646         if (error)
 5647                 return (error);
 5648 
 5649         /* allocate space for the sample array */
 5650         for (cpu = 0; cpu < maxcpu; cpu++) {
 5651                 if (!pmc_cpu_is_active(cpu))
 5652                         continue;
 5653                 pc = pcpu_find(cpu);
 5654                 domain = pc->pc_domain;
 5655                 sb = malloc_domainset(sizeof(struct pmc_samplebuffer) +
 5656                     pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
 5657                     DOMAINSET_PREF(domain), M_WAITOK | M_ZERO);
 5658 
 5659                 KASSERT(pmc_pcpu[cpu] != NULL,
 5660                     ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
 5661 
 5662                 sb->ps_callchains = malloc_domainset(pmc_callchaindepth *
 5663                     pmc_nsamples * sizeof(uintptr_t), M_PMC,
 5664                     DOMAINSET_PREF(domain), M_WAITOK | M_ZERO);
 5665 
 5666                 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
 5667                         ps->ps_pc = sb->ps_callchains +
 5668                             (n * pmc_callchaindepth);
 5669 
 5670                 pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb;
 5671 
 5672                 sb = malloc_domainset(sizeof(struct pmc_samplebuffer) +
 5673                     pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
 5674                     DOMAINSET_PREF(domain), M_WAITOK | M_ZERO);
 5675 
 5676                 sb->ps_callchains = malloc_domainset(pmc_callchaindepth *
 5677                     pmc_nsamples * sizeof(uintptr_t), M_PMC,
 5678                     DOMAINSET_PREF(domain), M_WAITOK | M_ZERO);
 5679                 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
 5680                         ps->ps_pc = sb->ps_callchains +
 5681                             (n * pmc_callchaindepth);
 5682 
 5683                 pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb;
 5684 
 5685                 sb = malloc_domainset(sizeof(struct pmc_samplebuffer) +
 5686                     pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
 5687                     DOMAINSET_PREF(domain), M_WAITOK | M_ZERO);
 5688                 sb->ps_callchains = malloc_domainset(pmc_callchaindepth *
 5689                     pmc_nsamples * sizeof(uintptr_t), M_PMC,
 5690                     DOMAINSET_PREF(domain), M_WAITOK | M_ZERO);
 5691                 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
 5692                         ps->ps_pc = sb->ps_callchains + n * pmc_callchaindepth;
 5693 
 5694                 pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb;
 5695         }
 5696 
 5697         /* allocate space for the row disposition array */
 5698         pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc,
 5699             M_PMC, M_WAITOK|M_ZERO);
 5700 
 5701         /* mark all PMCs as available */
 5702         for (n = 0; n < (int) md->pmd_npmc; n++)
 5703                 PMC_MARK_ROW_FREE(n);
 5704 
 5705         /* allocate thread hash tables */
 5706         pmc_ownerhash = hashinit(pmc_hashsize, M_PMC,
 5707             &pmc_ownerhashmask);
 5708 
 5709         pmc_processhash = hashinit(pmc_hashsize, M_PMC,
 5710             &pmc_processhashmask);
 5711         mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf",
 5712             MTX_SPIN);
 5713 
 5714         CK_LIST_INIT(&pmc_ss_owners);
 5715         pmc_ss_count = 0;
 5716 
 5717         /* allocate a pool of spin mutexes */
 5718         pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size,
 5719             MTX_SPIN);
 5720 
 5721         PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx "
 5722             "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask,
 5723             pmc_processhash, pmc_processhashmask);
 5724 
 5725         /* Initialize a spin mutex for the thread free list. */
 5726         mtx_init(&pmc_threadfreelist_mtx, "pmc-threadfreelist", "pmc-leaf",
 5727             MTX_SPIN);
 5728 
 5729         /* Initialize the task to prune the thread free list. */
 5730         TASK_INIT(&free_task, 0, pmc_thread_descriptor_pool_free_task, NULL);
 5731 
 5732         /* register process {exit,fork,exec} handlers */
 5733         pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit,
 5734             pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY);
 5735         pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork,
 5736             pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY);
 5737 
 5738         /* register kld event handlers */
 5739         pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load,
 5740             NULL, EVENTHANDLER_PRI_ANY);
 5741         pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload,
 5742             NULL, EVENTHANDLER_PRI_ANY);
 5743 
 5744         /* initialize logging */
 5745         pmclog_initialize();
 5746 
 5747         /* set hook functions */
 5748         pmc_intr = md->pmd_intr;
 5749         wmb();
 5750         pmc_hook = pmc_hook_handler;
 5751 
 5752         if (error == 0) {
 5753                 printf(PMC_MODULE_NAME ":");
 5754                 for (n = 0; n < (int) md->pmd_nclass; n++) {
 5755                         if (md->pmd_classdep[n].pcd_num == 0)
 5756                                 continue;
 5757                         pcd = &md->pmd_classdep[n];
 5758                         printf(" %s/%d/%d/0x%b",
 5759                             pmc_name_of_pmcclass(pcd->pcd_class),
 5760                             pcd->pcd_num,
 5761                             pcd->pcd_width,
 5762                             pcd->pcd_caps,
 5763                             "\2"
 5764                             "\1INT\2USR\3SYS\4EDG\5THR"
 5765                             "\6REA\7WRI\10INV\11QUA\12PRC"
 5766                             "\13TAG\14CSC");
 5767                 }
 5768                 printf("\n");
 5769         }
 5770 
 5771         return (error);
 5772 }
 5773 
 5774 /* prepare to be unloaded */
 5775 static void
 5776 pmc_cleanup(void)
 5777 {
 5778         int c, cpu;
 5779         unsigned int maxcpu;
 5780         struct pmc_ownerhash *ph;
 5781         struct pmc_owner *po, *tmp;
 5782         struct pmc_binding pb;
 5783 #ifdef  HWPMC_DEBUG
 5784         struct pmc_processhash *prh;
 5785 #endif
 5786 
 5787         PMCDBG0(MOD,INI,0, "cleanup");
 5788 
 5789         /* switch off sampling */
 5790         CPU_FOREACH(cpu)
 5791                 DPCPU_ID_SET(cpu, pmc_sampled, 0);
 5792         pmc_intr = NULL;
 5793 
 5794         sx_xlock(&pmc_sx);
 5795         if (pmc_hook == NULL) { /* being unloaded already */
 5796                 sx_xunlock(&pmc_sx);
 5797                 return;
 5798         }
 5799 
 5800         pmc_hook = NULL; /* prevent new threads from entering module */
 5801 
 5802         /* deregister event handlers */
 5803         EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag);
 5804         EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag);
 5805         EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag);
 5806         EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag);
 5807 
 5808         /* send SIGBUS to all owner threads, free up allocations */
 5809         if (pmc_ownerhash)
 5810                 for (ph = pmc_ownerhash;
 5811                      ph <= &pmc_ownerhash[pmc_ownerhashmask];
 5812                      ph++) {
 5813                         LIST_FOREACH_SAFE(po, ph, po_next, tmp) {
 5814                                 pmc_remove_owner(po);
 5815 
 5816                                 /* send SIGBUS to owner processes */
 5817                                 PMCDBG3(MOD,INI,2, "cleanup signal proc=%p "
 5818                                     "(%d, %s)", po->po_owner,
 5819                                     po->po_owner->p_pid,
 5820                                     po->po_owner->p_comm);
 5821 
 5822                                 PROC_LOCK(po->po_owner);
 5823                                 kern_psignal(po->po_owner, SIGBUS);
 5824                                 PROC_UNLOCK(po->po_owner);
 5825 
 5826                                 pmc_destroy_owner_descriptor(po);
 5827                         }
 5828                 }
 5829 
 5830         /* reclaim allocated data structures */
 5831         taskqueue_drain(taskqueue_fast, &free_task);
 5832         mtx_destroy(&pmc_threadfreelist_mtx);
 5833         pmc_thread_descriptor_pool_drain();
 5834 
 5835         if (pmc_mtxpool)
 5836                 mtx_pool_destroy(&pmc_mtxpool);
 5837 
 5838         mtx_destroy(&pmc_processhash_mtx);
 5839         if (pmc_processhash) {
 5840 #ifdef  HWPMC_DEBUG
 5841                 struct pmc_process *pp;
 5842 
 5843                 PMCDBG0(MOD,INI,3, "destroy process hash");
 5844                 for (prh = pmc_processhash;
 5845                      prh <= &pmc_processhash[pmc_processhashmask];
 5846                      prh++)
 5847                         LIST_FOREACH(pp, prh, pp_next)
 5848                             PMCDBG1(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid);
 5849 #endif
 5850 
 5851                 hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask);
 5852                 pmc_processhash = NULL;
 5853         }
 5854 
 5855         if (pmc_ownerhash) {
 5856                 PMCDBG0(MOD,INI,3, "destroy owner hash");
 5857                 hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask);
 5858                 pmc_ownerhash = NULL;
 5859         }
 5860 
 5861         KASSERT(CK_LIST_EMPTY(&pmc_ss_owners),
 5862             ("[pmc,%d] Global SS owner list not empty", __LINE__));
 5863         KASSERT(pmc_ss_count == 0,
 5864             ("[pmc,%d] Global SS count not empty", __LINE__));
 5865 
 5866         /* do processor and pmc-class dependent cleanup */
 5867         maxcpu = pmc_cpu_max();
 5868 
 5869         PMCDBG0(MOD,INI,3, "md cleanup");
 5870         if (md) {
 5871                 pmc_save_cpu_binding(&pb);
 5872                 for (cpu = 0; cpu < maxcpu; cpu++) {
 5873                         PMCDBG2(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p",
 5874                             cpu, pmc_pcpu[cpu]);
 5875                         if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL)
 5876                                 continue;
 5877                         pmc_select_cpu(cpu);
 5878                         for (c = 0; c < md->pmd_nclass; c++)
 5879                                 if (md->pmd_classdep[c].pcd_num > 0)
 5880                                         md->pmd_classdep[c].pcd_pcpu_fini(md,
 5881                                             cpu);
 5882                         if (md->pmd_pcpu_fini)
 5883                                 md->pmd_pcpu_fini(md, cpu);
 5884                 }
 5885 
 5886                 if (md->pmd_cputype == PMC_CPU_GENERIC)
 5887                         pmc_generic_cpu_finalize(md);
 5888                 else
 5889                         pmc_md_finalize(md);
 5890 
 5891                 pmc_mdep_free(md);
 5892                 md = NULL;
 5893                 pmc_restore_cpu_binding(&pb);
 5894         }
 5895 
 5896         /* Free per-cpu descriptors. */
 5897         for (cpu = 0; cpu < maxcpu; cpu++) {
 5898                 if (!pmc_cpu_is_active(cpu))
 5899                         continue;
 5900                 KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL,
 5901                     ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__,
 5902                         cpu));
 5903                 KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL,
 5904                     ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__,
 5905                         cpu));
 5906                 KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_UR] != NULL,
 5907                     ("[pmc,%d] Null userret cpu sample buffer cpu=%d", __LINE__,
 5908                         cpu));
 5909                 free(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC);
 5910                 free(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC);
 5911                 free(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC);
 5912                 free(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC);
 5913                 free(pmc_pcpu[cpu]->pc_sb[PMC_UR]->ps_callchains, M_PMC);
 5914                 free(pmc_pcpu[cpu]->pc_sb[PMC_UR], M_PMC);
 5915                 free(pmc_pcpu[cpu], M_PMC);
 5916         }
 5917 
 5918         free(pmc_pcpu, M_PMC);
 5919         pmc_pcpu = NULL;
 5920 
 5921         free(pmc_pcpu_saved, M_PMC);
 5922         pmc_pcpu_saved = NULL;
 5923 
 5924         if (pmc_pmcdisp) {
 5925                 free(pmc_pmcdisp, M_PMC);
 5926                 pmc_pmcdisp = NULL;
 5927         }
 5928 
 5929         if (pmc_rowindex_to_classdep) {
 5930                 free(pmc_rowindex_to_classdep, M_PMC);
 5931                 pmc_rowindex_to_classdep = NULL;
 5932         }
 5933 
 5934         pmclog_shutdown();
 5935         counter_u64_free(pmc_stats.pm_intr_ignored);
 5936         counter_u64_free(pmc_stats.pm_intr_processed);
 5937         counter_u64_free(pmc_stats.pm_intr_bufferfull);
 5938         counter_u64_free(pmc_stats.pm_syscalls);
 5939         counter_u64_free(pmc_stats.pm_syscall_errors);
 5940         counter_u64_free(pmc_stats.pm_buffer_requests);
 5941         counter_u64_free(pmc_stats.pm_buffer_requests_failed);
 5942         counter_u64_free(pmc_stats.pm_log_sweeps);
 5943         counter_u64_free(pmc_stats.pm_merges);
 5944         counter_u64_free(pmc_stats.pm_overwrites);
 5945         sx_xunlock(&pmc_sx);    /* we are done */
 5946 }
 5947 
 5948 /*
 5949  * The function called at load/unload.
 5950  */
 5951 
 5952 static int
 5953 load (struct module *module __unused, int cmd, void *arg __unused)
 5954 {
 5955         int error;
 5956 
 5957         error = 0;
 5958 
 5959         switch (cmd) {
 5960         case MOD_LOAD :
 5961                 /* initialize the subsystem */
 5962                 error = pmc_initialize();
 5963                 if (error != 0)
 5964                         break;
 5965                 PMCDBG2(MOD,INI,1, "syscall=%d maxcpu=%d",
 5966                     pmc_syscall_num, pmc_cpu_max());
 5967                 break;
 5968 
 5969 
 5970         case MOD_UNLOAD :
 5971         case MOD_SHUTDOWN:
 5972                 pmc_cleanup();
 5973                 PMCDBG0(MOD,INI,1, "unloaded");
 5974                 break;
 5975 
 5976         default :
 5977                 error = EINVAL; /* XXX should panic(9) */
 5978                 break;
 5979         }
 5980 
 5981         return error;
 5982 }

Cache object: 778bf26580feb4a9e32391866f8dd6b8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.