1 /* $OpenBSD: kern_ktrace.c,v 1.109 2022/12/05 23:18:37 deraadt Exp $ */
2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */
3
4 /*
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/sched.h>
39 #include <sys/fcntl.h>
40 #include <sys/namei.h>
41 #include <sys/vnode.h>
42 #include <sys/lock.h>
43 #include <sys/ktrace.h>
44 #include <sys/malloc.h>
45 #include <sys/syslog.h>
46 #include <sys/sysctl.h>
47 #include <sys/pledge.h>
48
49 #include <sys/mount.h>
50 #include <sys/syscall.h>
51 #include <sys/syscallargs.h>
52
53 void ktrinitheaderraw(struct ktr_header *, uint, pid_t, pid_t);
54 void ktrinitheader(struct ktr_header *, struct proc *, int);
55 int ktrstart(struct proc *, struct vnode *, struct ucred *);
56 int ktrops(struct proc *, struct process *, int, int, struct vnode *,
57 struct ucred *);
58 int ktrsetchildren(struct proc *, struct process *, int, int,
59 struct vnode *, struct ucred *);
60 int ktrwrite(struct proc *, struct ktr_header *, const void *, size_t);
61 int ktrwrite2(struct proc *, struct ktr_header *, const void *, size_t,
62 const void *, size_t);
63 int ktrwriteraw(struct proc *, struct vnode *, struct ucred *,
64 struct ktr_header *, struct iovec *);
65 int ktrcanset(struct proc *, struct process *);
66
67 /*
68 * Clear the trace settings in a correct way (to avoid races).
69 */
70 void
71 ktrcleartrace(struct process *pr)
72 {
73 struct vnode *vp;
74 struct ucred *cred;
75
76 if (pr->ps_tracevp != NULL) {
77 vp = pr->ps_tracevp;
78 cred = pr->ps_tracecred;
79
80 pr->ps_traceflag = 0;
81 pr->ps_tracevp = NULL;
82 pr->ps_tracecred = NULL;
83
84 vp->v_writecount--;
85 vrele(vp);
86 crfree(cred);
87 }
88 }
89
90 /*
91 * Change the trace setting in a correct way (to avoid races).
92 */
93 void
94 ktrsettrace(struct process *pr, int facs, struct vnode *newvp,
95 struct ucred *newcred)
96 {
97 struct vnode *oldvp;
98 struct ucred *oldcred;
99
100 KASSERT(newvp != NULL);
101 KASSERT(newcred != NULL);
102
103 pr->ps_traceflag |= facs;
104
105 /* nothing to change about where the trace goes? */
106 if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred)
107 return;
108
109 vref(newvp);
110 crhold(newcred);
111 newvp->v_writecount++;
112
113 oldvp = pr->ps_tracevp;
114 oldcred = pr->ps_tracecred;
115
116 pr->ps_tracevp = newvp;
117 pr->ps_tracecred = newcred;
118
119 if (oldvp != NULL) {
120 oldvp->v_writecount--;
121 vrele(oldvp);
122 crfree(oldcred);
123 }
124 }
125
126 void
127 ktrinitheaderraw(struct ktr_header *kth, uint type, pid_t pid, pid_t tid)
128 {
129 memset(kth, 0, sizeof(struct ktr_header));
130 kth->ktr_type = type;
131 nanotime(&kth->ktr_time);
132 kth->ktr_pid = pid;
133 kth->ktr_tid = tid;
134 }
135
136 void
137 ktrinitheader(struct ktr_header *kth, struct proc *p, int type)
138 {
139 struct process *pr = p->p_p;
140
141 ktrinitheaderraw(kth, type, pr->ps_pid, p->p_tid + THREAD_PID_OFFSET);
142 memcpy(kth->ktr_comm, pr->ps_comm, sizeof(kth->ktr_comm));
143 }
144
145 int
146 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred)
147 {
148 struct ktr_header kth;
149
150 ktrinitheaderraw(&kth, htobe32(KTR_START), -1, -1);
151 return (ktrwriteraw(p, vp, cred, &kth, NULL));
152 }
153
154 void
155 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[])
156 {
157 struct ktr_header kth;
158 struct ktr_syscall *ktp;
159 size_t len = sizeof(struct ktr_syscall) + argsize;
160 register_t *argp;
161 u_int nargs = 0;
162 int i;
163
164 if (code == SYS_sysctl) {
165 /*
166 * The sysctl encoding stores the mib[]
167 * array because it is interesting.
168 */
169 if (args[1] > 0)
170 nargs = lmin(args[1], CTL_MAXNAME);
171 len += nargs * sizeof(int);
172 }
173 atomic_setbits_int(&p->p_flag, P_INKTR);
174 ktrinitheader(&kth, p, KTR_SYSCALL);
175 ktp = malloc(len, M_TEMP, M_WAITOK);
176 ktp->ktr_code = code;
177 ktp->ktr_argsize = argsize;
178 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
179 for (i = 0; i < (argsize / sizeof *argp); i++)
180 *argp++ = args[i];
181 if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int)))
182 memset(argp, 0, nargs * sizeof(int));
183 ktrwrite(p, &kth, ktp, len);
184 free(ktp, M_TEMP, len);
185 atomic_clearbits_int(&p->p_flag, P_INKTR);
186 }
187
188 void
189 ktrsysret(struct proc *p, register_t code, int error,
190 const register_t retval[2])
191 {
192 struct ktr_header kth;
193 struct ktr_sysret ktp;
194 int len;
195
196 atomic_setbits_int(&p->p_flag, P_INKTR);
197 ktrinitheader(&kth, p, KTR_SYSRET);
198 ktp.ktr_code = code;
199 ktp.ktr_error = error;
200 if (error)
201 len = 0;
202 else if (code == SYS_lseek)
203 /* the one exception: lseek on ILP32 needs more */
204 len = sizeof(long long);
205 else
206 len = sizeof(register_t);
207 ktrwrite2(p, &kth, &ktp, sizeof(ktp), retval, len);
208 atomic_clearbits_int(&p->p_flag, P_INKTR);
209 }
210
211 void
212 ktrnamei(struct proc *p, char *path)
213 {
214 struct ktr_header kth;
215
216 atomic_setbits_int(&p->p_flag, P_INKTR);
217 ktrinitheader(&kth, p, KTR_NAMEI);
218 ktrwrite(p, &kth, path, strlen(path));
219 atomic_clearbits_int(&p->p_flag, P_INKTR);
220 }
221
222 void
223 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov,
224 ssize_t len)
225 {
226 struct ktr_header kth;
227 struct ktr_genio ktp;
228 caddr_t cp;
229 int count, error;
230 int buflen;
231
232 atomic_setbits_int(&p->p_flag, P_INKTR);
233
234 /* beware overflow */
235 if (len > PAGE_SIZE)
236 buflen = PAGE_SIZE;
237 else
238 buflen = len + sizeof(struct ktr_genio);
239
240 ktrinitheader(&kth, p, KTR_GENIO);
241 ktp.ktr_fd = fd;
242 ktp.ktr_rw = rw;
243
244 cp = malloc(buflen, M_TEMP, M_WAITOK);
245 while (len > 0) {
246 /*
247 * Don't allow this process to hog the cpu when doing
248 * huge I/O.
249 */
250 sched_pause(preempt);
251
252 count = lmin(iov->iov_len, buflen);
253 if (count > len)
254 count = len;
255 if (copyin(iov->iov_base, cp, count))
256 break;
257
258 KERNEL_LOCK();
259 error = ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count);
260 KERNEL_UNLOCK();
261 if (error != 0)
262 break;
263
264 iov->iov_len -= count;
265 iov->iov_base = (caddr_t)iov->iov_base + count;
266
267 if (iov->iov_len == 0)
268 iov++;
269
270 len -= count;
271 }
272
273 free(cp, M_TEMP, buflen);
274 atomic_clearbits_int(&p->p_flag, P_INKTR);
275 }
276
277 void
278 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code,
279 siginfo_t *si)
280 {
281 struct ktr_header kth;
282 struct ktr_psig kp;
283
284 atomic_setbits_int(&p->p_flag, P_INKTR);
285 ktrinitheader(&kth, p, KTR_PSIG);
286 kp.signo = (char)sig;
287 kp.action = action;
288 kp.mask = mask;
289 kp.code = code;
290 kp.si = *si;
291
292 KERNEL_LOCK();
293 ktrwrite(p, &kth, &kp, sizeof(kp));
294 KERNEL_UNLOCK();
295 atomic_clearbits_int(&p->p_flag, P_INKTR);
296 }
297
298 void
299 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen)
300 {
301 struct ktr_header kth;
302
303 atomic_setbits_int(&p->p_flag, P_INKTR);
304 ktrinitheader(&kth, p, KTR_STRUCT);
305
306 if (data == NULL)
307 datalen = 0;
308 KERNEL_LOCK();
309 ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen);
310 KERNEL_UNLOCK();
311 atomic_clearbits_int(&p->p_flag, P_INKTR);
312 }
313
314 int
315 ktruser(struct proc *p, const char *id, const void *addr, size_t len)
316 {
317 struct ktr_header kth;
318 struct ktr_user ktp;
319 int error;
320 void *memp;
321 #define STK_PARAMS 128
322 long long stkbuf[STK_PARAMS / sizeof(long long)];
323
324 if (!KTRPOINT(p, KTR_USER))
325 return (0);
326 if (len > KTR_USER_MAXLEN)
327 return (EINVAL);
328
329 atomic_setbits_int(&p->p_flag, P_INKTR);
330 ktrinitheader(&kth, p, KTR_USER);
331 memset(ktp.ktr_id, 0, KTR_USER_MAXIDLEN);
332 error = copyinstr(id, ktp.ktr_id, KTR_USER_MAXIDLEN, NULL);
333 if (error == 0) {
334 if (len > sizeof(stkbuf))
335 memp = malloc(len, M_TEMP, M_WAITOK);
336 else
337 memp = stkbuf;
338 error = copyin(addr, memp, len);
339 if (error == 0)
340 ktrwrite2(p, &kth, &ktp, sizeof(ktp), memp, len);
341 if (memp != stkbuf)
342 free(memp, M_TEMP, len);
343 }
344 atomic_clearbits_int(&p->p_flag, P_INKTR);
345 return (error);
346 }
347
348 void
349 ktrexec(struct proc *p, int type, const char *data, ssize_t len)
350 {
351 struct ktr_header kth;
352 int count;
353 int buflen;
354
355 assert(type == KTR_EXECARGS || type == KTR_EXECENV);
356 atomic_setbits_int(&p->p_flag, P_INKTR);
357
358 /* beware overflow */
359 if (len > PAGE_SIZE)
360 buflen = PAGE_SIZE;
361 else
362 buflen = len;
363
364 ktrinitheader(&kth, p, type);
365
366 while (len > 0) {
367 /*
368 * Don't allow this process to hog the cpu when doing
369 * huge I/O.
370 */
371 sched_pause(preempt);
372
373 count = lmin(len, buflen);
374 if (ktrwrite(p, &kth, data, count) != 0)
375 break;
376
377 len -= count;
378 data += count;
379 }
380
381 atomic_clearbits_int(&p->p_flag, P_INKTR);
382 }
383
384 void
385 ktrpledge(struct proc *p, int error, uint64_t code, int syscall)
386 {
387 struct ktr_header kth;
388 struct ktr_pledge kp;
389
390 atomic_setbits_int(&p->p_flag, P_INKTR);
391 ktrinitheader(&kth, p, KTR_PLEDGE);
392 kp.error = error;
393 kp.code = code;
394 kp.syscall = syscall;
395
396 KERNEL_LOCK();
397 ktrwrite(p, &kth, &kp, sizeof(kp));
398 KERNEL_UNLOCK();
399 atomic_clearbits_int(&p->p_flag, P_INKTR);
400 }
401
402 /* Interface and common routines */
403
404 int
405 doktrace(struct vnode *vp, int ops, int facs, pid_t pid, struct proc *p)
406 {
407 struct process *pr = NULL;
408 struct ucred *cred = NULL;
409 struct pgrp *pg;
410 int descend = ops & KTRFLAG_DESCEND;
411 int ret = 0;
412 int error = 0;
413
414 facs = facs & ~((unsigned)KTRFAC_ROOT);
415 ops = KTROP(ops);
416
417 if (ops != KTROP_CLEAR) {
418 /*
419 * an operation which requires a file argument.
420 */
421 cred = p->p_ucred;
422 if (!vp) {
423 error = EINVAL;
424 goto done;
425 }
426 if (vp->v_type != VREG) {
427 error = EACCES;
428 goto done;
429 }
430 }
431 /*
432 * Clear all uses of the tracefile
433 */
434 if (ops == KTROP_CLEARFILE) {
435 LIST_FOREACH(pr, &allprocess, ps_list) {
436 if (pr->ps_tracevp == vp) {
437 if (ktrcanset(p, pr))
438 ktrcleartrace(pr);
439 else
440 error = EPERM;
441 }
442 }
443 goto done;
444 }
445 /*
446 * need something to (un)trace (XXX - why is this here?)
447 */
448 if (!facs) {
449 error = EINVAL;
450 goto done;
451 }
452 if (ops == KTROP_SET) {
453 if (suser(p) == 0)
454 facs |= KTRFAC_ROOT;
455 error = ktrstart(p, vp, cred);
456 if (error != 0)
457 goto done;
458 }
459 /*
460 * do it
461 */
462 if (pid < 0) {
463 /*
464 * by process group
465 */
466 pg = pgfind(-pid);
467 if (pg == NULL) {
468 error = ESRCH;
469 goto done;
470 }
471 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) {
472 if (descend)
473 ret |= ktrsetchildren(p, pr, ops, facs, vp,
474 cred);
475 else
476 ret |= ktrops(p, pr, ops, facs, vp, cred);
477 }
478 } else {
479 /*
480 * by pid
481 */
482 pr = prfind(pid);
483 if (pr == NULL) {
484 error = ESRCH;
485 goto done;
486 }
487 if (descend)
488 ret |= ktrsetchildren(p, pr, ops, facs, vp, cred);
489 else
490 ret |= ktrops(p, pr, ops, facs, vp, cred);
491 }
492 if (!ret)
493 error = EPERM;
494 done:
495 return (error);
496 }
497
498 /*
499 * ktrace system call
500 */
501 int
502 sys_ktrace(struct proc *p, void *v, register_t *retval)
503 {
504 struct sys_ktrace_args /* {
505 syscallarg(const char *) fname;
506 syscallarg(int) ops;
507 syscallarg(int) facs;
508 syscallarg(pid_t) pid;
509 } */ *uap = v;
510 struct vnode *vp = NULL;
511 const char *fname = SCARG(uap, fname);
512 struct ucred *cred = NULL;
513 int error;
514
515 if (fname) {
516 struct nameidata nd;
517
518 cred = p->p_ucred;
519 NDINIT(&nd, 0, 0, UIO_USERSPACE, fname, p);
520 nd.ni_pledge = PLEDGE_CPATH | PLEDGE_WPATH;
521 nd.ni_unveil = UNVEIL_CREATE | UNVEIL_WRITE;
522 if ((error = vn_open(&nd, FWRITE|O_NOFOLLOW, 0)) != 0)
523 return error;
524 vp = nd.ni_vp;
525
526 VOP_UNLOCK(vp);
527 }
528
529 error = doktrace(vp, SCARG(uap, ops), SCARG(uap, facs),
530 SCARG(uap, pid), p);
531 if (vp != NULL)
532 (void)vn_close(vp, FWRITE, cred, p);
533
534 return error;
535 }
536
537 int
538 ktrops(struct proc *curp, struct process *pr, int ops, int facs,
539 struct vnode *vp, struct ucred *cred)
540 {
541 if (!ktrcanset(curp, pr))
542 return (0);
543 if (ops == KTROP_SET)
544 ktrsettrace(pr, facs, vp, cred);
545 else {
546 /* KTROP_CLEAR */
547 pr->ps_traceflag &= ~facs;
548 if ((pr->ps_traceflag & KTRFAC_MASK) == 0) {
549 /* cleared all the facility bits, so stop completely */
550 ktrcleartrace(pr);
551 }
552 }
553
554 return (1);
555 }
556
557 int
558 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs,
559 struct vnode *vp, struct ucred *cred)
560 {
561 struct process *pr;
562 int ret = 0;
563
564 pr = top;
565 for (;;) {
566 ret |= ktrops(curp, pr, ops, facs, vp, cred);
567 /*
568 * If this process has children, descend to them next,
569 * otherwise do any siblings, and if done with this level,
570 * follow back up the tree (but not past top).
571 */
572 if (!LIST_EMPTY(&pr->ps_children))
573 pr = LIST_FIRST(&pr->ps_children);
574 else for (;;) {
575 if (pr == top)
576 return (ret);
577 if (LIST_NEXT(pr, ps_sibling) != NULL) {
578 pr = LIST_NEXT(pr, ps_sibling);
579 break;
580 }
581 pr = pr->ps_pptr;
582 }
583 }
584 /*NOTREACHED*/
585 }
586
587 int
588 ktrwrite(struct proc *p, struct ktr_header *kth, const void *aux, size_t len)
589 {
590 struct vnode *vp = p->p_p->ps_tracevp;
591 struct ucred *cred = p->p_p->ps_tracecred;
592 struct iovec data[2];
593 int error;
594
595 if (vp == NULL)
596 return 0;
597 crhold(cred);
598 data[0].iov_base = (void *)aux;
599 data[0].iov_len = len;
600 data[1].iov_len = 0;
601 kth->ktr_len = len;
602 error = ktrwriteraw(p, vp, cred, kth, data);
603 crfree(cred);
604 return (error);
605 }
606
607 int
608 ktrwrite2(struct proc *p, struct ktr_header *kth, const void *aux1,
609 size_t len1, const void *aux2, size_t len2)
610 {
611 struct vnode *vp = p->p_p->ps_tracevp;
612 struct ucred *cred = p->p_p->ps_tracecred;
613 struct iovec data[2];
614 int error;
615
616 if (vp == NULL)
617 return 0;
618 crhold(cred);
619 data[0].iov_base = (void *)aux1;
620 data[0].iov_len = len1;
621 data[1].iov_base = (void *)aux2;
622 data[1].iov_len = len2;
623 kth->ktr_len = len1 + len2;
624 error = ktrwriteraw(p, vp, cred, kth, data);
625 crfree(cred);
626 return (error);
627 }
628
629 int
630 ktrwriteraw(struct proc *curp, struct vnode *vp, struct ucred *cred,
631 struct ktr_header *kth, struct iovec *data)
632 {
633 struct uio auio;
634 struct iovec aiov[3];
635 struct process *pr;
636 int error;
637
638 KERNEL_ASSERT_LOCKED();
639
640 auio.uio_iov = &aiov[0];
641 auio.uio_offset = 0;
642 auio.uio_segflg = UIO_SYSSPACE;
643 auio.uio_rw = UIO_WRITE;
644 aiov[0].iov_base = (caddr_t)kth;
645 aiov[0].iov_len = sizeof(struct ktr_header);
646 auio.uio_resid = sizeof(struct ktr_header);
647 auio.uio_iovcnt = 1;
648 auio.uio_procp = curp;
649 if (kth->ktr_len > 0) {
650 aiov[1] = data[0];
651 aiov[2] = data[1];
652 auio.uio_iovcnt++;
653 if (aiov[2].iov_len > 0)
654 auio.uio_iovcnt++;
655 auio.uio_resid += kth->ktr_len;
656 }
657 error = vget(vp, LK_EXCLUSIVE | LK_RETRY);
658 if (error)
659 goto bad;
660 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred);
661 vput(vp);
662 if (error)
663 goto bad;
664
665 return (0);
666
667 bad:
668 /*
669 * If error encountered, give up tracing on this vnode.
670 */
671 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
672 error);
673 LIST_FOREACH(pr, &allprocess, ps_list) {
674 if (pr == curp->p_p)
675 continue;
676 if (pr->ps_tracevp == vp && pr->ps_tracecred == cred)
677 ktrcleartrace(pr);
678 }
679 ktrcleartrace(curp->p_p);
680 return (error);
681 }
682
683 /*
684 * Return true if caller has permission to set the ktracing state
685 * of target. Essentially, the target can't possess any
686 * more permissions than the caller. KTRFAC_ROOT signifies that
687 * root previously set the tracing status on the target process, and
688 * so, only root may further change it.
689 *
690 * TODO: check groups. use caller effective gid.
691 */
692 int
693 ktrcanset(struct proc *callp, struct process *targetpr)
694 {
695 struct ucred *caller = callp->p_ucred;
696 struct ucred *target = targetpr->ps_ucred;
697
698 if ((caller->cr_uid == target->cr_ruid &&
699 target->cr_ruid == target->cr_svuid &&
700 caller->cr_rgid == target->cr_rgid && /* XXX */
701 target->cr_rgid == target->cr_svgid &&
702 (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 &&
703 !ISSET(targetpr->ps_flags, PS_SUGID)) ||
704 caller->cr_uid == 0)
705 return (1);
706
707 return (0);
708 }
Cache object: f56e59c8b8b66a2dc2d5d581505b4543
|