FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_prof.c
1 /* $NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)subr_prof.c 8.4 (Berkeley) 2/14/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $");
36
37 #ifdef _KERNEL_OPT
38 #include "opt_gprof.h"
39 #include "opt_multiprocessor.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/mount.h>
47 #include <sys/syscallargs.h>
48 #include <sys/sysctl.h>
49
50 #include <sys/cpu.h>
51
52 #ifdef GPROF
53 #include <sys/malloc.h>
54 #include <sys/gmon.h>
55 #include <sys/xcall.h>
56
57 MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer");
58
59 static int sysctl_kern_profiling(SYSCTLFN_ARGS);
60 #ifdef MULTIPROCESSOR
61 void _gmonparam_merge(struct gmonparam *, struct gmonparam *);
62 #endif
63
64 /*
65 * Froms is actually a bunch of unsigned shorts indexing tos
66 */
67 struct gmonparam _gmonparam = { .state = GMON_PROF_OFF };
68
69 /* Actual start of the kernel text segment. */
70 extern char kernel_text[];
71
72 extern char etext[];
73
74
75 void
76 kmstartup(void)
77 {
78 char *cp;
79 struct gmonparam *p = &_gmonparam;
80 unsigned long size;
81 /*
82 * Round lowpc and highpc to multiples of the density we're using
83 * so the rest of the scaling (here and in gprof) stays in ints.
84 */
85 p->lowpc = rounddown(((u_long)kernel_text),
86 HISTFRACTION * sizeof(HISTCOUNTER));
87 p->highpc = roundup((u_long)etext,
88 HISTFRACTION * sizeof(HISTCOUNTER));
89 p->textsize = p->highpc - p->lowpc;
90 printf("Profiling kernel, textsize=%ld [%lx..%lx]\n",
91 p->textsize, p->lowpc, p->highpc);
92 p->kcountsize = p->textsize / HISTFRACTION;
93 p->hashfraction = HASHFRACTION;
94 p->fromssize = p->textsize / HASHFRACTION;
95 p->tolimit = p->textsize * ARCDENSITY / 100;
96 if (p->tolimit < MINARCS)
97 p->tolimit = MINARCS;
98 else if (p->tolimit > MAXARCS)
99 p->tolimit = MAXARCS;
100 p->tossize = p->tolimit * sizeof(struct tostruct);
101
102 size = p->kcountsize + p->fromssize + p->tossize;
103 #ifdef MULTIPROCESSOR
104 CPU_INFO_ITERATOR cii;
105 struct cpu_info *ci;
106 for (CPU_INFO_FOREACH(cii, ci)) {
107 p = malloc(sizeof(struct gmonparam) + size, M_GPROF,
108 M_NOWAIT | M_ZERO);
109 if (p == NULL) {
110 printf("No memory for profiling on %s\n",
111 cpu_name(ci));
112 /* cannot profile on this cpu */
113 continue;
114 }
115 memcpy(p, &_gmonparam, sizeof(_gmonparam));
116 ci->ci_gmon = p;
117
118 /*
119 * To allow profiling to be controlled only by the global
120 * _gmonparam.state, set the default value for each CPU to
121 * GMON_PROF_ON. If _gmonparam.state is not ON, mcount will
122 * not be executed.
123 * This is For compatibility of the kgmon(8) kmem interface.
124 */
125 p->state = GMON_PROF_ON;
126
127 cp = (char *)(p + 1);
128 p->tos = (struct tostruct *)cp;
129 p->kcount = (u_short *)(cp + p->tossize);
130 p->froms = (u_short *)(cp + p->tossize + p->kcountsize);
131 }
132
133 sysctl_createv(NULL, 0, NULL, NULL,
134 0, CTLTYPE_NODE, "percpu",
135 SYSCTL_DESCR("per cpu profiling information"),
136 NULL, 0, NULL, 0,
137 CTL_KERN, KERN_PROF, GPROF_PERCPU, CTL_EOL);
138
139 for (CPU_INFO_FOREACH(cii, ci)) {
140 if (ci->ci_gmon == NULL)
141 continue;
142
143 sysctl_createv(NULL, 0, NULL, NULL,
144 0, CTLTYPE_NODE, cpu_name(ci),
145 NULL,
146 NULL, 0, NULL, 0,
147 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), CTL_EOL);
148
149 sysctl_createv(NULL, 0, NULL, NULL,
150 CTLFLAG_READWRITE, CTLTYPE_INT, "state",
151 SYSCTL_DESCR("Profiling state"),
152 sysctl_kern_profiling, 0, (void *)ci, 0,
153 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
154 GPROF_STATE, CTL_EOL);
155 sysctl_createv(NULL, 0, NULL, NULL,
156 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "count",
157 SYSCTL_DESCR("Array of statistical program counters"),
158 sysctl_kern_profiling, 0, (void *)ci, 0,
159 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
160 GPROF_COUNT, CTL_EOL);
161 sysctl_createv(NULL, 0, NULL, NULL,
162 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "froms",
163 SYSCTL_DESCR("Array indexed by program counter of "
164 "call-from points"),
165 sysctl_kern_profiling, 0, (void *)ci, 0,
166 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
167 GPROF_FROMS, CTL_EOL);
168 sysctl_createv(NULL, 0, NULL, NULL,
169 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "tos",
170 SYSCTL_DESCR("Array of structures describing "
171 "destination of calls and their counts"),
172 sysctl_kern_profiling, 0, (void *)ci, 0,
173 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
174 GPROF_TOS, CTL_EOL);
175 sysctl_createv(NULL, 0, NULL, NULL,
176 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "gmonparam",
177 SYSCTL_DESCR("Structure giving the sizes of the above "
178 "arrays"),
179 sysctl_kern_profiling, 0, (void *)ci, 0,
180 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
181 GPROF_GMONPARAM, CTL_EOL);
182 }
183
184 /*
185 * For minimal compatibility of the kgmon(8) kmem interface,
186 * the _gmonparam and cpu0:ci_gmon share buffers.
187 */
188 p = curcpu()->ci_gmon;
189 if (p != NULL) {
190 _gmonparam.tos = p->tos;
191 _gmonparam.kcount = p->kcount;
192 _gmonparam.froms = p->froms;
193 }
194 #else /* MULTIPROCESSOR */
195 cp = malloc(size, M_GPROF, M_NOWAIT | M_ZERO);
196 if (cp == 0) {
197 printf("No memory for profiling.\n");
198 return;
199 }
200 p->tos = (struct tostruct *)cp;
201 cp += p->tossize;
202 p->kcount = (u_short *)cp;
203 cp += p->kcountsize;
204 p->froms = (u_short *)cp;
205 #endif /* MULTIPROCESSOR */
206 }
207
208 #ifdef MULTIPROCESSOR
209 static void
210 prof_set_state_xc(void *arg1, void *arg2 __unused)
211 {
212 int state = PTRTOUINT64(arg1);
213 struct gmonparam *gp = curcpu()->ci_gmon;
214
215 if (gp != NULL)
216 gp->state = state;
217 }
218 #endif /* MULTIPROCESSOR */
219
220 /*
221 * Return kernel profiling information.
222 */
223 /*
224 * sysctl helper routine for kern.profiling subtree. enables/disables
225 * kernel profiling and gives out copies of the profiling data.
226 */
227 static int
228 sysctl_kern_profiling(SYSCTLFN_ARGS)
229 {
230 struct sysctlnode node = *rnode;
231 struct gmonparam *gp;
232 int error;
233 #ifdef MULTIPROCESSOR
234 CPU_INFO_ITERATOR cii;
235 struct cpu_info *ci, *target_ci;
236 uint64_t where;
237 int state;
238 bool prof_on, do_merge;
239
240 target_ci = (struct cpu_info *)rnode->sysctl_data;
241 do_merge = (oldp != NULL) && (target_ci == NULL) &&
242 ((node.sysctl_num == GPROF_COUNT) ||
243 (node.sysctl_num == GPROF_FROMS) ||
244 (node.sysctl_num == GPROF_TOS));
245
246 if (do_merge) {
247 /* kern.profiling.{count,froms,tos} */
248 unsigned long size;
249 char *cp;
250
251 /* allocate temporary gmonparam, and merge results of all CPU */
252 size = _gmonparam.kcountsize + _gmonparam.fromssize +
253 _gmonparam.tossize;
254 gp = malloc(sizeof(struct gmonparam) + size, M_GPROF,
255 M_NOWAIT | M_ZERO);
256 if (gp == NULL)
257 return ENOMEM;
258 memcpy(gp, &_gmonparam, sizeof(_gmonparam));
259 cp = (char *)(gp + 1);
260 gp->tos = (struct tostruct *)cp;
261 gp->kcount = (u_short *)(cp + gp->tossize);
262 gp->froms = (u_short *)(cp + gp->tossize + gp->kcountsize);
263
264 for (CPU_INFO_FOREACH(cii, ci)) {
265 if (ci->ci_gmon == NULL)
266 continue;
267 _gmonparam_merge(gp, ci->ci_gmon);
268 }
269 } else if (target_ci != NULL) {
270 /* kern.profiling.percpu.* */
271 gp = target_ci->ci_gmon;
272 } else {
273 /* kern.profiling.{state,gmonparam} */
274 gp = &_gmonparam;
275 }
276 #else /* MULTIPROCESSOR */
277 gp = &_gmonparam;
278 #endif
279
280 switch (node.sysctl_num) {
281 case GPROF_STATE:
282 #ifdef MULTIPROCESSOR
283 /*
284 * if _gmonparam.state is OFF, the state of each CPU is
285 * considered to be OFF, even if it is actually ON.
286 */
287 if (_gmonparam.state == GMON_PROF_OFF ||
288 gp->state == GMON_PROF_OFF)
289 state = GMON_PROF_OFF;
290 else
291 state = GMON_PROF_ON;
292 node.sysctl_data = &state;
293 #else
294 node.sysctl_data = &gp->state;
295 #endif
296 break;
297 case GPROF_COUNT:
298 node.sysctl_data = gp->kcount;
299 node.sysctl_size = gp->kcountsize;
300 break;
301 case GPROF_FROMS:
302 node.sysctl_data = gp->froms;
303 node.sysctl_size = gp->fromssize;
304 break;
305 case GPROF_TOS:
306 node.sysctl_data = gp->tos;
307 node.sysctl_size = gp->tossize;
308 break;
309 case GPROF_GMONPARAM:
310 node.sysctl_data = gp;
311 node.sysctl_size = sizeof(*gp);
312 break;
313 default:
314 return (EOPNOTSUPP);
315 }
316
317 error = sysctl_lookup(SYSCTLFN_CALL(&node));
318 if (error || newp == NULL)
319 goto done;
320
321 #ifdef MULTIPROCESSOR
322 switch (node.sysctl_num) {
323 case GPROF_STATE:
324 if (target_ci != NULL) {
325 where = xc_unicast(0, prof_set_state_xc,
326 UINT64TOPTR(state), NULL, target_ci);
327 xc_wait(where);
328
329 /* if even one CPU being profiled, enable perfclock. */
330 prof_on = false;
331 for (CPU_INFO_FOREACH(cii, ci)) {
332 if (ci->ci_gmon == NULL)
333 continue;
334 if (ci->ci_gmon->state != GMON_PROF_OFF) {
335 prof_on = true;
336 break;
337 }
338 }
339 mutex_spin_enter(&proc0.p_stmutex);
340 if (prof_on)
341 startprofclock(&proc0);
342 else
343 stopprofclock(&proc0);
344 mutex_spin_exit(&proc0.p_stmutex);
345
346 if (prof_on) {
347 _gmonparam.state = GMON_PROF_ON;
348 } else {
349 _gmonparam.state = GMON_PROF_OFF;
350 /*
351 * when _gmonparam.state and all CPU gmon state
352 * are OFF, all CPU states should be ON so that
353 * the entire CPUs profiling can be controlled
354 * by _gmonparam.state only.
355 */
356 for (CPU_INFO_FOREACH(cii, ci)) {
357 if (ci->ci_gmon == NULL)
358 continue;
359 ci->ci_gmon->state = GMON_PROF_ON;
360 }
361 }
362 } else {
363 _gmonparam.state = state;
364 where = xc_broadcast(0, prof_set_state_xc,
365 UINT64TOPTR(state), NULL);
366 xc_wait(where);
367
368 mutex_spin_enter(&proc0.p_stmutex);
369 if (state == GMON_PROF_OFF)
370 stopprofclock(&proc0);
371 else
372 startprofclock(&proc0);
373 mutex_spin_exit(&proc0.p_stmutex);
374 }
375 break;
376 case GPROF_COUNT:
377 /*
378 * if 'kern.profiling.{count,froms,tos}' is written, the same
379 * data will be written to 'kern.profiling.percpu.cpuN.xxx'
380 */
381 if (target_ci == NULL) {
382 for (CPU_INFO_FOREACH(cii, ci)) {
383 if (ci->ci_gmon == NULL)
384 continue;
385 memmove(ci->ci_gmon->kcount, gp->kcount,
386 newlen);
387 }
388 }
389 break;
390 case GPROF_FROMS:
391 if (target_ci == NULL) {
392 for (CPU_INFO_FOREACH(cii, ci)) {
393 if (ci->ci_gmon == NULL)
394 continue;
395 memmove(ci->ci_gmon->froms, gp->froms, newlen);
396 }
397 }
398 break;
399 case GPROF_TOS:
400 if (target_ci == NULL) {
401 for (CPU_INFO_FOREACH(cii, ci)) {
402 if (ci->ci_gmon == NULL)
403 continue;
404 memmove(ci->ci_gmon->tos, gp->tos, newlen);
405 }
406 }
407 break;
408 }
409 #else
410 if (node.sysctl_num == GPROF_STATE) {
411 mutex_spin_enter(&proc0.p_stmutex);
412 if (gp->state == GMON_PROF_OFF)
413 stopprofclock(&proc0);
414 else
415 startprofclock(&proc0);
416 mutex_spin_exit(&proc0.p_stmutex);
417 }
418 #endif
419
420 done:
421 #ifdef MULTIPROCESSOR
422 if (do_merge)
423 free(gp, M_GPROF);
424 #endif
425 return error;
426 }
427
428 SYSCTL_SETUP(sysctl_kern_gprof_setup, "sysctl kern.profiling subtree setup")
429 {
430
431 sysctl_createv(clog, 0, NULL, NULL,
432 CTLFLAG_PERMANENT,
433 CTLTYPE_NODE, "profiling",
434 SYSCTL_DESCR("Profiling information (available)"),
435 NULL, 0, NULL, 0,
436 CTL_KERN, KERN_PROF, CTL_EOL);
437
438 sysctl_createv(clog, 0, NULL, NULL,
439 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
440 CTLTYPE_INT, "state",
441 SYSCTL_DESCR("Profiling state"),
442 sysctl_kern_profiling, 0, NULL, 0,
443 CTL_KERN, KERN_PROF, GPROF_STATE, CTL_EOL);
444 sysctl_createv(clog, 0, NULL, NULL,
445 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
446 CTLTYPE_STRUCT, "count",
447 SYSCTL_DESCR("Array of statistical program counters"),
448 sysctl_kern_profiling, 0, NULL, 0,
449 CTL_KERN, KERN_PROF, GPROF_COUNT, CTL_EOL);
450 sysctl_createv(clog, 0, NULL, NULL,
451 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
452 CTLTYPE_STRUCT, "froms",
453 SYSCTL_DESCR("Array indexed by program counter of "
454 "call-from points"),
455 sysctl_kern_profiling, 0, NULL, 0,
456 CTL_KERN, KERN_PROF, GPROF_FROMS, CTL_EOL);
457 sysctl_createv(clog, 0, NULL, NULL,
458 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
459 CTLTYPE_STRUCT, "tos",
460 SYSCTL_DESCR("Array of structures describing "
461 "destination of calls and their counts"),
462 sysctl_kern_profiling, 0, NULL, 0,
463 CTL_KERN, KERN_PROF, GPROF_TOS, CTL_EOL);
464 sysctl_createv(clog, 0, NULL, NULL,
465 CTLFLAG_PERMANENT,
466 CTLTYPE_STRUCT, "gmonparam",
467 SYSCTL_DESCR("Structure giving the sizes of the above "
468 "arrays"),
469 sysctl_kern_profiling, 0, NULL, 0,
470 CTL_KERN, KERN_PROF, GPROF_GMONPARAM, CTL_EOL);
471 }
472 #endif /* GPROF */
473
474 /*
475 * Profiling system call.
476 *
477 * The scale factor is a fixed point number with 16 bits of fraction, so that
478 * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling.
479 */
480 /* ARGSUSED */
481 int
482 sys_profil(struct lwp *l, const struct sys_profil_args *uap, register_t *retval)
483 {
484 /* {
485 syscallarg(char *) samples;
486 syscallarg(size_t) size;
487 syscallarg(u_long) offset;
488 syscallarg(u_int) scale;
489 } */
490 struct proc *p = l->l_proc;
491 struct uprof *upp;
492
493 if (SCARG(uap, scale) > (1 << 16))
494 return (EINVAL);
495 if (SCARG(uap, scale) == 0) {
496 mutex_spin_enter(&p->p_stmutex);
497 stopprofclock(p);
498 mutex_spin_exit(&p->p_stmutex);
499 return (0);
500 }
501 upp = &p->p_stats->p_prof;
502
503 /* Block profile interrupts while changing state. */
504 mutex_spin_enter(&p->p_stmutex);
505 upp->pr_off = SCARG(uap, offset);
506 upp->pr_scale = SCARG(uap, scale);
507 upp->pr_base = SCARG(uap, samples);
508 upp->pr_size = SCARG(uap, size);
509 startprofclock(p);
510 mutex_spin_exit(&p->p_stmutex);
511
512 return (0);
513 }
514
515 /*
516 * Scale is a fixed-point number with the binary point 16 bits
517 * into the value, and is <= 1.0. pc is at most 32 bits, so the
518 * intermediate result is at most 48 bits.
519 */
520 #define PC_TO_INDEX(pc, prof) \
521 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
522 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
523
524 /*
525 * Collect user-level profiling statistics; called on a profiling tick,
526 * when a process is running in user-mode. This routine may be called
527 * from an interrupt context. We schedule an AST that will vector us
528 * to trap() with a context in which copyin and copyout will work.
529 * Trap will then call addupc_task().
530 *
531 * XXX We could use ufetch/ustore here if the profile buffers were
532 * wired.
533 *
534 * Note that we may (rarely) not get around to the AST soon enough, and
535 * lose profile ticks when the next tick overwrites this one, but in this
536 * case the system is overloaded and the profile is probably already
537 * inaccurate.
538 */
539 void
540 addupc_intr(struct lwp *l, u_long pc)
541 {
542 struct uprof *prof;
543 struct proc *p;
544 u_int i;
545
546 p = l->l_proc;
547
548 KASSERT(mutex_owned(&p->p_stmutex));
549
550 prof = &p->p_stats->p_prof;
551 if (pc < prof->pr_off ||
552 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
553 return; /* out of range; ignore */
554
555 mutex_spin_exit(&p->p_stmutex);
556
557 /* XXXSMP */
558 prof->pr_addr = pc;
559 prof->pr_ticks++;
560 cpu_need_proftick(l);
561
562 mutex_spin_enter(&p->p_stmutex);
563 }
564
565 /*
566 * Much like before, but we can afford to take faults here. If the
567 * update fails, we simply turn off profiling.
568 */
569 void
570 addupc_task(struct lwp *l, u_long pc, u_int ticks)
571 {
572 struct uprof *prof;
573 struct proc *p;
574 void *addr;
575 int error;
576 u_int i;
577 u_short v;
578
579 p = l->l_proc;
580
581 if (ticks == 0)
582 return;
583
584 mutex_spin_enter(&p->p_stmutex);
585 prof = &p->p_stats->p_prof;
586
587 /* Testing P_PROFIL may be unnecessary, but is certainly safe. */
588 if ((p->p_stflag & PST_PROFIL) == 0 || pc < prof->pr_off ||
589 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) {
590 mutex_spin_exit(&p->p_stmutex);
591 return;
592 }
593
594 addr = prof->pr_base + i;
595 mutex_spin_exit(&p->p_stmutex);
596 if ((error = copyin(addr, (void *)&v, sizeof(v))) == 0) {
597 v += ticks;
598 error = copyout((void *)&v, addr, sizeof(v));
599 }
600 if (error != 0) {
601 mutex_spin_enter(&p->p_stmutex);
602 stopprofclock(p);
603 mutex_spin_exit(&p->p_stmutex);
604 }
605 }
Cache object: c63681517233e282981ad8926e99509a
|