The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/cddl/dev/dtrace/amd64/dtrace_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License, Version 1.0 only
    6  * (the "License").  You may not use this file except in compliance
    7  * with the License.
    8  *
    9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   10  * or http://www.opensolaris.org/os/licensing.
   11  * See the License for the specific language governing permissions
   12  * and limitations under the License.
   13  *
   14  * When distributing Covered Code, include this CDDL HEADER in each
   15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   16  * If applicable, add the following below this CDDL HEADER, with the
   17  * fields enclosed by brackets "[]" replaced with your own identifying
   18  * information: Portions Copyright [yyyy] [name of copyright owner]
   19  *
   20  * CDDL HEADER END
   21  *
   22  * $FreeBSD$
   23  *
   24  */
   25 /*
   26  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
   27  * Use is subject to license terms.
   28  */
   29 
   30 /*
   31  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
   32  */
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/kernel.h>
   37 #include <sys/malloc.h>
   38 #include <sys/proc.h>
   39 #include <sys/smp.h>
   40 #include <sys/dtrace_impl.h>
   41 #include <sys/dtrace_bsd.h>
   42 #include <cddl/dev/dtrace/dtrace_cddl.h>
   43 #include <machine/clock.h>
   44 #include <machine/cpufunc.h>
   45 #include <machine/frame.h>
   46 #include <machine/md_var.h>
   47 #include <machine/psl.h>
   48 #include <machine/trap.h>
   49 #include <vm/pmap.h>
   50 
   51 extern void dtrace_getnanotime(struct timespec *tsp);
   52 extern int (*dtrace_invop_jump_addr)(struct trapframe *);
   53 
   54 int     dtrace_invop(uintptr_t, struct trapframe *, void **);
   55 int     dtrace_invop_start(struct trapframe *frame);
   56 void    dtrace_invop_init(void);
   57 void    dtrace_invop_uninit(void);
   58 
   59 typedef struct dtrace_invop_hdlr {
   60         int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t);
   61         struct dtrace_invop_hdlr *dtih_next;
   62 } dtrace_invop_hdlr_t;
   63 
   64 dtrace_invop_hdlr_t *dtrace_invop_hdlr;
   65 
   66 int
   67 dtrace_invop(uintptr_t addr, struct trapframe *frame, void **scratch)
   68 {
   69         struct thread *td;
   70         dtrace_invop_hdlr_t *hdlr;
   71         int rval;
   72 
   73         td = curthread;
   74         td->t_dtrace_trapframe = frame;
   75         rval = 0;
   76         for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) {
   77                 rval = hdlr->dtih_func(addr, frame, (uintptr_t)scratch);
   78                 if (rval != 0)
   79                         break;
   80         }
   81         td->t_dtrace_trapframe = NULL;
   82         return (rval);
   83 }
   84 
   85 void
   86 dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t))
   87 {
   88         dtrace_invop_hdlr_t *hdlr;
   89 
   90         hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP);
   91         hdlr->dtih_func = func;
   92         hdlr->dtih_next = dtrace_invop_hdlr;
   93         dtrace_invop_hdlr = hdlr;
   94 }
   95 
   96 void
   97 dtrace_invop_remove(int (*func)(uintptr_t, struct trapframe *, uintptr_t))
   98 {
   99         dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL;
  100 
  101         for (;;) {
  102                 if (hdlr == NULL)
  103                         panic("attempt to remove non-existent invop handler");
  104 
  105                 if (hdlr->dtih_func == func)
  106                         break;
  107 
  108                 prev = hdlr;
  109                 hdlr = hdlr->dtih_next;
  110         }
  111 
  112         if (prev == NULL) {
  113                 ASSERT(dtrace_invop_hdlr == hdlr);
  114                 dtrace_invop_hdlr = hdlr->dtih_next;
  115         } else {
  116                 ASSERT(dtrace_invop_hdlr != hdlr);
  117                 prev->dtih_next = hdlr->dtih_next;
  118         }
  119 
  120         kmem_free(hdlr, 0);
  121 }
  122 
  123 void
  124 dtrace_invop_init(void)
  125 {
  126 
  127         dtrace_invop_jump_addr = dtrace_invop_start;
  128 }
  129 
  130 void
  131 dtrace_invop_uninit(void)
  132 {
  133 
  134         dtrace_invop_jump_addr = NULL;
  135 }
  136 
  137 /*ARGSUSED*/
  138 void
  139 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
  140 {
  141         (*func)(0, la57 ? (uintptr_t)addr_P5Tmap : (uintptr_t)addr_P4Tmap);
  142 }
  143 
  144 void
  145 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
  146 {
  147         cpuset_t cpus;
  148 
  149         if (cpu == DTRACE_CPUALL)
  150                 cpus = all_cpus;
  151         else
  152                 CPU_SETOF(cpu, &cpus);
  153 
  154         smp_rendezvous_cpus(cpus, smp_no_rendezvous_barrier, func,
  155             smp_no_rendezvous_barrier, arg);
  156 }
  157 
  158 static void
  159 dtrace_sync_func(void)
  160 {
  161 }
  162 
  163 void
  164 dtrace_sync(void)
  165 {
  166         dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
  167 }
  168 
  169 #ifdef notyet
  170 void
  171 dtrace_safe_synchronous_signal(void)
  172 {
  173         kthread_t *t = curthread;
  174         struct regs *rp = lwptoregs(ttolwp(t));
  175         size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
  176 
  177         ASSERT(t->t_dtrace_on);
  178 
  179         /*
  180          * If we're not in the range of scratch addresses, we're not actually
  181          * tracing user instructions so turn off the flags. If the instruction
  182          * we copied out caused a synchonous trap, reset the pc back to its
  183          * original value and turn off the flags.
  184          */
  185         if (rp->r_pc < t->t_dtrace_scrpc ||
  186             rp->r_pc > t->t_dtrace_astpc + isz) {
  187                 t->t_dtrace_ft = 0;
  188         } else if (rp->r_pc == t->t_dtrace_scrpc ||
  189             rp->r_pc == t->t_dtrace_astpc) {
  190                 rp->r_pc = t->t_dtrace_pc;
  191                 t->t_dtrace_ft = 0;
  192         }
  193 }
  194 
  195 int
  196 dtrace_safe_defer_signal(void)
  197 {
  198         kthread_t *t = curthread;
  199         struct regs *rp = lwptoregs(ttolwp(t));
  200         size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
  201 
  202         ASSERT(t->t_dtrace_on);
  203 
  204         /*
  205          * If we're not in the range of scratch addresses, we're not actually
  206          * tracing user instructions so turn off the flags.
  207          */
  208         if (rp->r_pc < t->t_dtrace_scrpc ||
  209             rp->r_pc > t->t_dtrace_astpc + isz) {
  210                 t->t_dtrace_ft = 0;
  211                 return (0);
  212         }
  213 
  214         /*
  215          * If we have executed the original instruction, but we have performed
  216          * neither the jmp back to t->t_dtrace_npc nor the clean up of any
  217          * registers used to emulate %rip-relative instructions in 64-bit mode,
  218          * we'll save ourselves some effort by doing that here and taking the
  219          * signal right away.  We detect this condition by seeing if the program
  220          * counter is the range [scrpc + isz, astpc).
  221          */
  222         if (rp->r_pc >= t->t_dtrace_scrpc + isz &&
  223             rp->r_pc < t->t_dtrace_astpc) {
  224 #ifdef __amd64
  225                 /*
  226                  * If there is a scratch register and we're on the
  227                  * instruction immediately after the modified instruction,
  228                  * restore the value of that scratch register.
  229                  */
  230                 if (t->t_dtrace_reg != 0 &&
  231                     rp->r_pc == t->t_dtrace_scrpc + isz) {
  232                         switch (t->t_dtrace_reg) {
  233                         case REG_RAX:
  234                                 rp->r_rax = t->t_dtrace_regv;
  235                                 break;
  236                         case REG_RCX:
  237                                 rp->r_rcx = t->t_dtrace_regv;
  238                                 break;
  239                         case REG_R8:
  240                                 rp->r_r8 = t->t_dtrace_regv;
  241                                 break;
  242                         case REG_R9:
  243                                 rp->r_r9 = t->t_dtrace_regv;
  244                                 break;
  245                         }
  246                 }
  247 #endif
  248                 rp->r_pc = t->t_dtrace_npc;
  249                 t->t_dtrace_ft = 0;
  250                 return (0);
  251         }
  252 
  253         /*
  254          * Otherwise, make sure we'll return to the kernel after executing
  255          * the copied out instruction and defer the signal.
  256          */
  257         if (!t->t_dtrace_step) {
  258                 ASSERT(rp->r_pc < t->t_dtrace_astpc);
  259                 rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc;
  260                 t->t_dtrace_step = 1;
  261         }
  262 
  263         t->t_dtrace_ast = 1;
  264 
  265         return (1);
  266 }
  267 #endif
  268 
  269 static int64_t  tgt_cpu_tsc;
  270 static int64_t  hst_cpu_tsc;
  271 static int64_t  tsc_skew[MAXCPU];
  272 static uint64_t nsec_scale;
  273 
  274 /* See below for the explanation of this macro. */
  275 #define SCALE_SHIFT     28
  276 
  277 static void
  278 dtrace_gethrtime_init_cpu(void *arg)
  279 {
  280         uintptr_t cpu = (uintptr_t) arg;
  281 
  282         if (cpu == curcpu)
  283                 tgt_cpu_tsc = rdtsc();
  284         else
  285                 hst_cpu_tsc = rdtsc();
  286 }
  287 
  288 #ifdef EARLY_AP_STARTUP
  289 static void
  290 dtrace_gethrtime_init(void *arg)
  291 {
  292         struct pcpu *pc;
  293         uint64_t tsc_f;
  294         cpuset_t map;
  295         int i;
  296 #else
  297 /*
  298  * Get the frequency and scale factor as early as possible so that they can be
  299  * used for boot-time tracing.
  300  */
  301 static void
  302 dtrace_gethrtime_init_early(void *arg)
  303 {
  304         uint64_t tsc_f;
  305 #endif
  306 
  307         /*
  308          * Get TSC frequency known at this moment.
  309          * This should be constant if TSC is invariant.
  310          * Otherwise tick->time conversion will be inaccurate, but
  311          * will preserve monotonic property of TSC.
  312          */
  313         tsc_f = atomic_load_acq_64(&tsc_freq);
  314 
  315         /*
  316          * The following line checks that nsec_scale calculated below
  317          * doesn't overflow 32-bit unsigned integer, so that it can multiply
  318          * another 32-bit integer without overflowing 64-bit.
  319          * Thus minimum supported TSC frequency is 62.5MHz.
  320          */
  321         KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)),
  322             ("TSC frequency is too low"));
  323 
  324         /*
  325          * We scale up NANOSEC/tsc_f ratio to preserve as much precision
  326          * as possible.
  327          * 2^28 factor was chosen quite arbitrarily from practical
  328          * considerations:
  329          * - it supports TSC frequencies as low as 62.5MHz (see above);
  330          * - it provides quite good precision (e < 0.01%) up to THz
  331          *   (terahertz) values;
  332          */
  333         nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;
  334 #ifndef EARLY_AP_STARTUP
  335 }
  336 SYSINIT(dtrace_gethrtime_init_early, SI_SUB_CPU, SI_ORDER_ANY,
  337     dtrace_gethrtime_init_early, NULL);
  338 
  339 static void
  340 dtrace_gethrtime_init(void *arg)
  341 {
  342         struct pcpu *pc;
  343         cpuset_t map;
  344         int i;
  345 #endif
  346 
  347         if (vm_guest != VM_GUEST_NO)
  348                 return;
  349 
  350         /* The current CPU is the reference one. */
  351         sched_pin();
  352         tsc_skew[curcpu] = 0;
  353         CPU_FOREACH(i) {
  354                 if (i == curcpu)
  355                         continue;
  356 
  357                 pc = pcpu_find(i);
  358                 CPU_SETOF(PCPU_GET(cpuid), &map);
  359                 CPU_SET(pc->pc_cpuid, &map);
  360 
  361                 smp_rendezvous_cpus(map, NULL,
  362                     dtrace_gethrtime_init_cpu,
  363                     smp_no_rendezvous_barrier, (void *)(uintptr_t) i);
  364 
  365                 tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc;
  366         }
  367         sched_unpin();
  368 }
  369 #ifdef EARLY_AP_STARTUP
  370 SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY,
  371     dtrace_gethrtime_init, NULL);
  372 #else
  373 SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init,
  374     NULL);
  375 #endif
  376 
  377 /*
  378  * DTrace needs a high resolution time function which can
  379  * be called from a probe context and guaranteed not to have
  380  * instrumented with probes itself.
  381  *
  382  * Returns nanoseconds since boot.
  383  */
  384 uint64_t
  385 dtrace_gethrtime(void)
  386 {
  387         uint64_t tsc;
  388         uint32_t lo, hi;
  389         register_t rflags;
  390 
  391         /*
  392          * We split TSC value into lower and higher 32-bit halves and separately
  393          * scale them with nsec_scale, then we scale them down by 2^28
  394          * (see nsec_scale calculations) taking into account 32-bit shift of
  395          * the higher half and finally add.
  396          */
  397         rflags = intr_disable();
  398         tsc = rdtsc() - tsc_skew[curcpu];
  399         intr_restore(rflags);
  400 
  401         lo = tsc;
  402         hi = tsc >> 32;
  403         return (((lo * nsec_scale) >> SCALE_SHIFT) +
  404             ((hi * nsec_scale) << (32 - SCALE_SHIFT)));
  405 }
  406 
  407 uint64_t
  408 dtrace_gethrestime(void)
  409 {
  410         struct timespec current_time;
  411 
  412         dtrace_getnanotime(&current_time);
  413 
  414         return (current_time.tv_sec * 1000000000ULL + current_time.tv_nsec);
  415 }
  416 
  417 /* Function to handle DTrace traps during probes. See amd64/amd64/trap.c. */
  418 int
  419 dtrace_trap(struct trapframe *frame, u_int type)
  420 {
  421         uint16_t nofault;
  422 
  423         /*
  424          * A trap can occur while DTrace executes a probe. Before
  425          * executing the probe, DTrace blocks re-scheduling and sets
  426          * a flag in its per-cpu flags to indicate that it doesn't
  427          * want to fault. On returning from the probe, the no-fault
  428          * flag is cleared and finally re-scheduling is enabled.
  429          *
  430          * Check if DTrace has enabled 'no-fault' mode:
  431          */
  432         sched_pin();
  433         nofault = cpu_core[curcpu].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT;
  434         sched_unpin();
  435         if (nofault) {
  436                 KASSERT((read_rflags() & PSL_I) == 0, ("interrupts enabled"));
  437 
  438                 /*
  439                  * There are only a couple of trap types that are expected.
  440                  * All the rest will be handled in the usual way.
  441                  */
  442                 switch (type) {
  443                 /* General protection fault. */
  444                 case T_PROTFLT:
  445                         /* Flag an illegal operation. */
  446                         cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
  447 
  448                         /*
  449                          * Offset the instruction pointer to the instruction
  450                          * following the one causing the fault.
  451                          */
  452                         frame->tf_rip += dtrace_instr_size((u_char *) frame->tf_rip);
  453                         return (1);
  454                 /* Page fault. */
  455                 case T_PAGEFLT:
  456                         /* Flag a bad address. */
  457                         cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;
  458                         cpu_core[curcpu].cpuc_dtrace_illval = frame->tf_addr;
  459 
  460                         /*
  461                          * Offset the instruction pointer to the instruction
  462                          * following the one causing the fault.
  463                          */
  464                         frame->tf_rip += dtrace_instr_size((u_char *) frame->tf_rip);
  465                         return (1);
  466                 default:
  467                         /* Handle all other traps in the usual way. */
  468                         break;
  469                 }
  470         }
  471 
  472         /* Handle the trap in the usual way. */
  473         return (0);
  474 }

Cache object: 3a3bf3f4196b23d1dcaf15b22bae370a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.