1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California.
4 * Copyright (c) 2005 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: releng/7.4/sys/kern/kern_ktrace.c 202765 2010-01-21 19:17:42Z jhb $");
36
37 #include "opt_ktrace.h"
38 #include "opt_mac.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/fcntl.h>
43 #include <sys/kernel.h>
44 #include <sys/kthread.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/malloc.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/unistd.h>
53 #include <sys/vnode.h>
54 #include <sys/socket.h>
55 #include <sys/stat.h>
56 #include <sys/ktrace.h>
57 #include <sys/sx.h>
58 #include <sys/sysctl.h>
59 #include <sys/syslog.h>
60 #include <sys/sysproto.h>
61
62 #include <security/mac/mac_framework.h>
63
64 /*
65 * The ktrace facility allows the tracing of certain key events in user space
66 * processes, such as system calls, signal delivery, context switches, and
67 * user generated events using utrace(2). It works by streaming event
68 * records and data to a vnode associated with the process using the
69 * ktrace(2) system call. In general, records can be written directly from
70 * the context that generates the event. One important exception to this is
71 * during a context switch, where sleeping is not permitted. To handle this
72 * case, trace events are generated using in-kernel ktr_request records, and
73 * then delivered to disk at a convenient moment -- either immediately, the
74 * next traceable event, at system call return, or at process exit.
75 *
76 * When dealing with multiple threads or processes writing to the same event
77 * log, ordering guarantees are weak: specifically, if an event has multiple
78 * records (i.e., system call enter and return), they may be interlaced with
79 * records from another event. Process and thread ID information is provided
80 * in the record, and user applications can de-interlace events if required.
81 */
82
83 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
84
85 #ifdef KTRACE
86
87 #ifndef KTRACE_REQUEST_POOL
88 #define KTRACE_REQUEST_POOL 100
89 #endif
90
91 struct ktr_request {
92 struct ktr_header ktr_header;
93 void *ktr_buffer;
94 union {
95 struct ktr_syscall ktr_syscall;
96 struct ktr_sysret ktr_sysret;
97 struct ktr_genio ktr_genio;
98 struct ktr_psig ktr_psig;
99 struct ktr_csw ktr_csw;
100 } ktr_data;
101 STAILQ_ENTRY(ktr_request) ktr_list;
102 };
103
104 static int data_lengths[] = {
105 0, /* none */
106 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */
107 sizeof(struct ktr_sysret), /* KTR_SYSRET */
108 0, /* KTR_NAMEI */
109 sizeof(struct ktr_genio), /* KTR_GENIO */
110 sizeof(struct ktr_psig), /* KTR_PSIG */
111 sizeof(struct ktr_csw), /* KTR_CSW */
112 0, /* KTR_USER */
113 0, /* KTR_STRUCT */
114 0, /* KTR_SYSCTL */
115 };
116
117 static STAILQ_HEAD(, ktr_request) ktr_free;
118
119 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
120
121 static u_int ktr_requestpool = KTRACE_REQUEST_POOL;
122 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
123
124 static u_int ktr_geniosize = PAGE_SIZE;
125 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
126 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
127 0, "Maximum size of genio event payload");
128
129 static int print_message = 1;
130 struct mtx ktrace_mtx;
131 static struct sx ktrace_sx;
132
133 static void ktrace_init(void *dummy);
134 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
135 static u_int ktrace_resize_pool(u_int newsize);
136 static struct ktr_request *ktr_getrequest(int type);
137 static void ktr_submitrequest(struct thread *td, struct ktr_request *req);
138 static void ktr_freerequest(struct ktr_request *req);
139 static void ktr_writerequest(struct thread *td, struct ktr_request *req);
140 static int ktrcanset(struct thread *,struct proc *);
141 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
142 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
143
144 /*
145 * ktrace itself generates events, such as context switches, which we do not
146 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine
147 * whether or not it is in a region where tracing of events should be
148 * suppressed.
149 */
150 static void
151 ktrace_enter(struct thread *td)
152 {
153
154 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set"));
155 td->td_pflags |= TDP_INKTRACE;
156 }
157
158 static void
159 ktrace_exit(struct thread *td)
160 {
161
162 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set"));
163 td->td_pflags &= ~TDP_INKTRACE;
164 }
165
166 static void
167 ktrace_assert(struct thread *td)
168 {
169
170 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
171 }
172
173 static void
174 ktrace_init(void *dummy)
175 {
176 struct ktr_request *req;
177 int i;
178
179 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
180 sx_init(&ktrace_sx, "ktrace_sx");
181 STAILQ_INIT(&ktr_free);
182 for (i = 0; i < ktr_requestpool; i++) {
183 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
184 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
185 }
186 }
187 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
188
189 static int
190 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
191 {
192 struct thread *td;
193 u_int newsize, oldsize, wantsize;
194 int error;
195
196 /* Handle easy read-only case first to avoid warnings from GCC. */
197 if (!req->newptr) {
198 mtx_lock(&ktrace_mtx);
199 oldsize = ktr_requestpool;
200 mtx_unlock(&ktrace_mtx);
201 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
202 }
203
204 error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
205 if (error)
206 return (error);
207 td = curthread;
208 ktrace_enter(td);
209 mtx_lock(&ktrace_mtx);
210 oldsize = ktr_requestpool;
211 newsize = ktrace_resize_pool(wantsize);
212 mtx_unlock(&ktrace_mtx);
213 ktrace_exit(td);
214 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
215 if (error)
216 return (error);
217 if (wantsize > oldsize && newsize < wantsize)
218 return (ENOSPC);
219 return (0);
220 }
221 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
222 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "");
223
224 static u_int
225 ktrace_resize_pool(u_int newsize)
226 {
227 struct ktr_request *req;
228 int bound;
229
230 mtx_assert(&ktrace_mtx, MA_OWNED);
231 print_message = 1;
232 bound = newsize - ktr_requestpool;
233 if (bound == 0)
234 return (ktr_requestpool);
235 if (bound < 0)
236 /* Shrink pool down to newsize if possible. */
237 while (bound++ < 0) {
238 req = STAILQ_FIRST(&ktr_free);
239 if (req == NULL)
240 return (ktr_requestpool);
241 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
242 ktr_requestpool--;
243 mtx_unlock(&ktrace_mtx);
244 free(req, M_KTRACE);
245 mtx_lock(&ktrace_mtx);
246 }
247 else
248 /* Grow pool up to newsize. */
249 while (bound-- > 0) {
250 mtx_unlock(&ktrace_mtx);
251 req = malloc(sizeof(struct ktr_request), M_KTRACE,
252 M_WAITOK);
253 mtx_lock(&ktrace_mtx);
254 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
255 ktr_requestpool++;
256 }
257 return (ktr_requestpool);
258 }
259
260 /* ktr_getrequest() assumes that ktr_comm[] is the same size as p_comm[]. */
261 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) ==
262 (sizeof((struct proc *)NULL)->p_comm));
263
264 static struct ktr_request *
265 ktr_getrequest(int type)
266 {
267 struct ktr_request *req;
268 struct thread *td = curthread;
269 struct proc *p = td->td_proc;
270 int pm;
271
272 ktrace_enter(td); /* XXX: In caller instead? */
273 mtx_lock(&ktrace_mtx);
274 if (!KTRCHECK(td, type)) {
275 mtx_unlock(&ktrace_mtx);
276 ktrace_exit(td);
277 return (NULL);
278 }
279 req = STAILQ_FIRST(&ktr_free);
280 if (req != NULL) {
281 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
282 req->ktr_header.ktr_type = type;
283 if (p->p_traceflag & KTRFAC_DROP) {
284 req->ktr_header.ktr_type |= KTR_DROP;
285 p->p_traceflag &= ~KTRFAC_DROP;
286 }
287 mtx_unlock(&ktrace_mtx);
288 microtime(&req->ktr_header.ktr_time);
289 req->ktr_header.ktr_pid = p->p_pid;
290 req->ktr_header.ktr_tid = td->td_tid;
291 bcopy(p->p_comm, req->ktr_header.ktr_comm,
292 sizeof(req->ktr_header.ktr_comm));
293 req->ktr_buffer = NULL;
294 req->ktr_header.ktr_len = 0;
295 } else {
296 p->p_traceflag |= KTRFAC_DROP;
297 pm = print_message;
298 print_message = 0;
299 mtx_unlock(&ktrace_mtx);
300 if (pm)
301 printf("Out of ktrace request objects.\n");
302 ktrace_exit(td);
303 }
304 return (req);
305 }
306
307 /*
308 * Some trace generation environments don't permit direct access to VFS,
309 * such as during a context switch where sleeping is not allowed. Under these
310 * circumstances, queue a request to the thread to be written asynchronously
311 * later.
312 */
313 static void
314 ktr_enqueuerequest(struct thread *td, struct ktr_request *req)
315 {
316
317 mtx_lock(&ktrace_mtx);
318 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
319 mtx_unlock(&ktrace_mtx);
320 ktrace_exit(td);
321 }
322
323 /*
324 * Drain any pending ktrace records from the per-thread queue to disk. This
325 * is used both internally before committing other records, and also on
326 * system call return. We drain all the ones we can find at the time when
327 * drain is requested, but don't keep draining after that as those events
328 * may be approximately "after" the current event.
329 */
330 static void
331 ktr_drain(struct thread *td)
332 {
333 struct ktr_request *queued_req;
334 STAILQ_HEAD(, ktr_request) local_queue;
335
336 ktrace_assert(td);
337 sx_assert(&ktrace_sx, SX_XLOCKED);
338
339 STAILQ_INIT(&local_queue); /* XXXRW: needed? */
340
341 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) {
342 mtx_lock(&ktrace_mtx);
343 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr);
344 mtx_unlock(&ktrace_mtx);
345
346 while ((queued_req = STAILQ_FIRST(&local_queue))) {
347 STAILQ_REMOVE_HEAD(&local_queue, ktr_list);
348 ktr_writerequest(td, queued_req);
349 ktr_freerequest(queued_req);
350 }
351 }
352 }
353
354 /*
355 * Submit a trace record for immediate commit to disk -- to be used only
356 * where entering VFS is OK. First drain any pending records that may have
357 * been cached in the thread.
358 */
359 static void
360 ktr_submitrequest(struct thread *td, struct ktr_request *req)
361 {
362
363 ktrace_assert(td);
364
365 sx_xlock(&ktrace_sx);
366 ktr_drain(td);
367 ktr_writerequest(td, req);
368 ktr_freerequest(req);
369 sx_xunlock(&ktrace_sx);
370
371 ktrace_exit(td);
372 }
373
374 static void
375 ktr_freerequest(struct ktr_request *req)
376 {
377
378 if (req->ktr_buffer != NULL)
379 free(req->ktr_buffer, M_KTRACE);
380 mtx_lock(&ktrace_mtx);
381 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
382 mtx_unlock(&ktrace_mtx);
383 }
384
385 void
386 ktrsyscall(code, narg, args)
387 int code, narg;
388 register_t args[];
389 {
390 struct ktr_request *req;
391 struct ktr_syscall *ktp;
392 size_t buflen;
393 char *buf = NULL;
394
395 buflen = sizeof(register_t) * narg;
396 if (buflen > 0) {
397 buf = malloc(buflen, M_KTRACE, M_WAITOK);
398 bcopy(args, buf, buflen);
399 }
400 req = ktr_getrequest(KTR_SYSCALL);
401 if (req == NULL) {
402 if (buf != NULL)
403 free(buf, M_KTRACE);
404 return;
405 }
406 ktp = &req->ktr_data.ktr_syscall;
407 ktp->ktr_code = code;
408 ktp->ktr_narg = narg;
409 if (buflen > 0) {
410 req->ktr_header.ktr_len = buflen;
411 req->ktr_buffer = buf;
412 }
413 ktr_submitrequest(curthread, req);
414 }
415
416 void
417 ktrsysret(code, error, retval)
418 int code, error;
419 register_t retval;
420 {
421 struct ktr_request *req;
422 struct ktr_sysret *ktp;
423
424 req = ktr_getrequest(KTR_SYSRET);
425 if (req == NULL)
426 return;
427 ktp = &req->ktr_data.ktr_sysret;
428 ktp->ktr_code = code;
429 ktp->ktr_error = error;
430 ktp->ktr_retval = retval; /* what about val2 ? */
431 ktr_submitrequest(curthread, req);
432 }
433
434 /*
435 * When a process exits, drain per-process asynchronous trace records.
436 */
437 void
438 ktrprocexit(struct thread *td)
439 {
440
441 ktrace_enter(td);
442 sx_xlock(&ktrace_sx);
443 ktr_drain(td);
444 sx_xunlock(&ktrace_sx);
445 ktrace_exit(td);
446 }
447
448 /*
449 * When a thread returns, drain any asynchronous records generated by the
450 * system call.
451 */
452 void
453 ktruserret(struct thread *td)
454 {
455
456 ktrace_enter(td);
457 sx_xlock(&ktrace_sx);
458 ktr_drain(td);
459 sx_xunlock(&ktrace_sx);
460 ktrace_exit(td);
461 }
462
463 void
464 ktrnamei(path)
465 char *path;
466 {
467 struct ktr_request *req;
468 int namelen;
469 char *buf = NULL;
470
471 namelen = strlen(path);
472 if (namelen > 0) {
473 buf = malloc(namelen, M_KTRACE, M_WAITOK);
474 bcopy(path, buf, namelen);
475 }
476 req = ktr_getrequest(KTR_NAMEI);
477 if (req == NULL) {
478 if (buf != NULL)
479 free(buf, M_KTRACE);
480 return;
481 }
482 if (namelen > 0) {
483 req->ktr_header.ktr_len = namelen;
484 req->ktr_buffer = buf;
485 }
486 ktr_submitrequest(curthread, req);
487 }
488
489 void
490 ktrsysctl(name, namelen)
491 int *name;
492 u_int namelen;
493 {
494 struct ktr_request *req;
495 u_int mib[CTL_MAXNAME + 2];
496 char *mibname;
497 size_t mibnamelen;
498 int error;
499
500 /* Lookup name of mib. */
501 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long"));
502 mib[0] = 0;
503 mib[1] = 1;
504 bcopy(name, mib + 2, namelen * sizeof(*name));
505 mibnamelen = 128;
506 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK);
507 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen,
508 NULL, 0, &mibnamelen, 0);
509 if (error) {
510 free(mibname, M_KTRACE);
511 return;
512 }
513 req = ktr_getrequest(KTR_SYSCTL);
514 if (req == NULL) {
515 free(mibname, M_KTRACE);
516 return;
517 }
518 req->ktr_header.ktr_len = mibnamelen;
519 req->ktr_buffer = mibname;
520 ktr_submitrequest(curthread, req);
521 }
522
523 void
524 ktrgenio(fd, rw, uio, error)
525 int fd;
526 enum uio_rw rw;
527 struct uio *uio;
528 int error;
529 {
530 struct ktr_request *req;
531 struct ktr_genio *ktg;
532 int datalen;
533 char *buf;
534
535 if (error) {
536 free(uio, M_IOV);
537 return;
538 }
539 uio->uio_offset = 0;
540 uio->uio_rw = UIO_WRITE;
541 datalen = imin(uio->uio_resid, ktr_geniosize);
542 buf = malloc(datalen, M_KTRACE, M_WAITOK);
543 error = uiomove(buf, datalen, uio);
544 free(uio, M_IOV);
545 if (error) {
546 free(buf, M_KTRACE);
547 return;
548 }
549 req = ktr_getrequest(KTR_GENIO);
550 if (req == NULL) {
551 free(buf, M_KTRACE);
552 return;
553 }
554 ktg = &req->ktr_data.ktr_genio;
555 ktg->ktr_fd = fd;
556 ktg->ktr_rw = rw;
557 req->ktr_header.ktr_len = datalen;
558 req->ktr_buffer = buf;
559 ktr_submitrequest(curthread, req);
560 }
561
562 void
563 ktrpsig(sig, action, mask, code)
564 int sig;
565 sig_t action;
566 sigset_t *mask;
567 int code;
568 {
569 struct ktr_request *req;
570 struct ktr_psig *kp;
571
572 req = ktr_getrequest(KTR_PSIG);
573 if (req == NULL)
574 return;
575 kp = &req->ktr_data.ktr_psig;
576 kp->signo = (char)sig;
577 kp->action = action;
578 kp->mask = *mask;
579 kp->code = code;
580 ktr_enqueuerequest(curthread, req);
581 }
582
583 void
584 ktrcsw(out, user)
585 int out, user;
586 {
587 struct ktr_request *req;
588 struct ktr_csw *kc;
589
590 req = ktr_getrequest(KTR_CSW);
591 if (req == NULL)
592 return;
593 kc = &req->ktr_data.ktr_csw;
594 kc->out = out;
595 kc->user = user;
596 ktr_enqueuerequest(curthread, req);
597 }
598
599 void
600 ktrstruct(name, namelen, data, datalen)
601 const char *name;
602 size_t namelen;
603 void *data;
604 size_t datalen;
605 {
606 struct ktr_request *req;
607 char *buf = NULL;
608 size_t buflen;
609
610 if (!data)
611 datalen = 0;
612 buflen = namelen + 1 + datalen;
613 buf = malloc(buflen, M_KTRACE, M_WAITOK);
614 bcopy(name, buf, namelen);
615 buf[namelen] = '\0';
616 bcopy(data, buf + namelen + 1, datalen);
617 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) {
618 free(buf, M_KTRACE);
619 return;
620 }
621 req->ktr_buffer = buf;
622 req->ktr_header.ktr_len = buflen;
623 ktr_submitrequest(curthread, req);
624 }
625 #endif /* KTRACE */
626
627 /* Interface and common routines */
628
629 #ifndef _SYS_SYSPROTO_H_
630 struct ktrace_args {
631 char *fname;
632 int ops;
633 int facs;
634 int pid;
635 };
636 #endif
637 /* ARGSUSED */
638 int
639 ktrace(td, uap)
640 struct thread *td;
641 register struct ktrace_args *uap;
642 {
643 #ifdef KTRACE
644 register struct vnode *vp = NULL;
645 register struct proc *p;
646 struct pgrp *pg;
647 int facs = uap->facs & ~KTRFAC_ROOT;
648 int ops = KTROP(uap->ops);
649 int descend = uap->ops & KTRFLAG_DESCEND;
650 int nfound, ret = 0;
651 int flags, error = 0, vfslocked;
652 struct nameidata nd;
653 struct ucred *cred;
654
655 /*
656 * Need something to (un)trace.
657 */
658 if (ops != KTROP_CLEARFILE && facs == 0)
659 return (EINVAL);
660
661 ktrace_enter(td);
662 if (ops != KTROP_CLEAR) {
663 /*
664 * an operation which requires a file argument.
665 */
666 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
667 uap->fname, td);
668 flags = FREAD | FWRITE | O_NOFOLLOW;
669 error = vn_open(&nd, &flags, 0, NULL);
670 if (error) {
671 ktrace_exit(td);
672 return (error);
673 }
674 vfslocked = NDHASGIANT(&nd);
675 NDFREE(&nd, NDF_ONLY_PNBUF);
676 vp = nd.ni_vp;
677 VOP_UNLOCK(vp, 0, td);
678 if (vp->v_type != VREG) {
679 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
680 VFS_UNLOCK_GIANT(vfslocked);
681 ktrace_exit(td);
682 return (EACCES);
683 }
684 VFS_UNLOCK_GIANT(vfslocked);
685 }
686 /*
687 * Clear all uses of the tracefile.
688 */
689 if (ops == KTROP_CLEARFILE) {
690 int vrele_count;
691
692 vrele_count = 0;
693 sx_slock(&allproc_lock);
694 FOREACH_PROC_IN_SYSTEM(p) {
695 PROC_LOCK(p);
696 if (p->p_tracevp == vp) {
697 if (ktrcanset(td, p)) {
698 mtx_lock(&ktrace_mtx);
699 cred = p->p_tracecred;
700 p->p_tracecred = NULL;
701 p->p_tracevp = NULL;
702 p->p_traceflag = 0;
703 mtx_unlock(&ktrace_mtx);
704 vrele_count++;
705 crfree(cred);
706 } else
707 error = EPERM;
708 }
709 PROC_UNLOCK(p);
710 }
711 sx_sunlock(&allproc_lock);
712 if (vrele_count > 0) {
713 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
714 while (vrele_count-- > 0)
715 vrele(vp);
716 VFS_UNLOCK_GIANT(vfslocked);
717 }
718 goto done;
719 }
720 /*
721 * do it
722 */
723 sx_slock(&proctree_lock);
724 if (uap->pid < 0) {
725 /*
726 * by process group
727 */
728 pg = pgfind(-uap->pid);
729 if (pg == NULL) {
730 sx_sunlock(&proctree_lock);
731 error = ESRCH;
732 goto done;
733 }
734 /*
735 * ktrops() may call vrele(). Lock pg_members
736 * by the proctree_lock rather than pg_mtx.
737 */
738 PGRP_UNLOCK(pg);
739 nfound = 0;
740 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
741 PROC_LOCK(p);
742 if (p_cansee(td, p) != 0) {
743 PROC_UNLOCK(p);
744 continue;
745 }
746 PROC_UNLOCK(p);
747 nfound++;
748 if (descend)
749 ret |= ktrsetchildren(td, p, ops, facs, vp);
750 else
751 ret |= ktrops(td, p, ops, facs, vp);
752 }
753 if (nfound == 0) {
754 sx_sunlock(&proctree_lock);
755 error = ESRCH;
756 goto done;
757 }
758 } else {
759 /*
760 * by pid
761 */
762 p = pfind(uap->pid);
763 if (p == NULL) {
764 sx_sunlock(&proctree_lock);
765 error = ESRCH;
766 goto done;
767 }
768 error = p_cansee(td, p);
769 /*
770 * The slock of the proctree lock will keep this process
771 * from going away, so unlocking the proc here is ok.
772 */
773 PROC_UNLOCK(p);
774 if (error) {
775 sx_sunlock(&proctree_lock);
776 goto done;
777 }
778 if (descend)
779 ret |= ktrsetchildren(td, p, ops, facs, vp);
780 else
781 ret |= ktrops(td, p, ops, facs, vp);
782 }
783 sx_sunlock(&proctree_lock);
784 if (!ret)
785 error = EPERM;
786 done:
787 if (vp != NULL) {
788 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
789 (void) vn_close(vp, FWRITE, td->td_ucred, td);
790 VFS_UNLOCK_GIANT(vfslocked);
791 }
792 ktrace_exit(td);
793 return (error);
794 #else /* !KTRACE */
795 return (ENOSYS);
796 #endif /* KTRACE */
797 }
798
799 /* ARGSUSED */
800 int
801 utrace(td, uap)
802 struct thread *td;
803 register struct utrace_args *uap;
804 {
805
806 #ifdef KTRACE
807 struct ktr_request *req;
808 void *cp;
809 int error;
810
811 if (!KTRPOINT(td, KTR_USER))
812 return (0);
813 if (uap->len > KTR_USER_MAXLEN)
814 return (EINVAL);
815 cp = malloc(uap->len, M_KTRACE, M_WAITOK);
816 error = copyin(uap->addr, cp, uap->len);
817 if (error) {
818 free(cp, M_KTRACE);
819 return (error);
820 }
821 req = ktr_getrequest(KTR_USER);
822 if (req == NULL) {
823 free(cp, M_KTRACE);
824 return (ENOMEM);
825 }
826 req->ktr_buffer = cp;
827 req->ktr_header.ktr_len = uap->len;
828 ktr_submitrequest(td, req);
829 return (0);
830 #else /* !KTRACE */
831 return (ENOSYS);
832 #endif /* KTRACE */
833 }
834
835 #ifdef KTRACE
836 static int
837 ktrops(td, p, ops, facs, vp)
838 struct thread *td;
839 struct proc *p;
840 int ops, facs;
841 struct vnode *vp;
842 {
843 struct vnode *tracevp = NULL;
844 struct ucred *tracecred = NULL;
845
846 PROC_LOCK(p);
847 if (!ktrcanset(td, p)) {
848 PROC_UNLOCK(p);
849 return (0);
850 }
851 mtx_lock(&ktrace_mtx);
852 if (ops == KTROP_SET) {
853 if (p->p_tracevp != vp) {
854 /*
855 * if trace file already in use, relinquish below
856 */
857 tracevp = p->p_tracevp;
858 VREF(vp);
859 p->p_tracevp = vp;
860 }
861 if (p->p_tracecred != td->td_ucred) {
862 tracecred = p->p_tracecred;
863 p->p_tracecred = crhold(td->td_ucred);
864 }
865 p->p_traceflag |= facs;
866 if (priv_check(td, PRIV_KTRACE) == 0)
867 p->p_traceflag |= KTRFAC_ROOT;
868 } else {
869 /* KTROP_CLEAR */
870 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
871 /* no more tracing */
872 p->p_traceflag = 0;
873 tracevp = p->p_tracevp;
874 p->p_tracevp = NULL;
875 tracecred = p->p_tracecred;
876 p->p_tracecred = NULL;
877 }
878 }
879 mtx_unlock(&ktrace_mtx);
880 PROC_UNLOCK(p);
881 if (tracevp != NULL) {
882 int vfslocked;
883
884 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
885 vrele(tracevp);
886 VFS_UNLOCK_GIANT(vfslocked);
887 }
888 if (tracecred != NULL)
889 crfree(tracecred);
890
891 return (1);
892 }
893
894 static int
895 ktrsetchildren(td, top, ops, facs, vp)
896 struct thread *td;
897 struct proc *top;
898 int ops, facs;
899 struct vnode *vp;
900 {
901 register struct proc *p;
902 register int ret = 0;
903
904 p = top;
905 sx_assert(&proctree_lock, SX_LOCKED);
906 for (;;) {
907 ret |= ktrops(td, p, ops, facs, vp);
908 /*
909 * If this process has children, descend to them next,
910 * otherwise do any siblings, and if done with this level,
911 * follow back up the tree (but not past top).
912 */
913 if (!LIST_EMPTY(&p->p_children))
914 p = LIST_FIRST(&p->p_children);
915 else for (;;) {
916 if (p == top)
917 return (ret);
918 if (LIST_NEXT(p, p_sibling)) {
919 p = LIST_NEXT(p, p_sibling);
920 break;
921 }
922 p = p->p_pptr;
923 }
924 }
925 /*NOTREACHED*/
926 }
927
928 static void
929 ktr_writerequest(struct thread *td, struct ktr_request *req)
930 {
931 struct ktr_header *kth;
932 struct vnode *vp;
933 struct proc *p;
934 struct ucred *cred;
935 struct uio auio;
936 struct iovec aiov[3];
937 struct mount *mp;
938 int datalen, buflen, vrele_count;
939 int error, vfslocked;
940
941 /*
942 * We hold the vnode and credential for use in I/O in case ktrace is
943 * disabled on the process as we write out the request.
944 *
945 * XXXRW: This is not ideal: we could end up performing a write after
946 * the vnode has been closed.
947 */
948 mtx_lock(&ktrace_mtx);
949 vp = td->td_proc->p_tracevp;
950 cred = td->td_proc->p_tracecred;
951
952 /*
953 * If vp is NULL, the vp has been cleared out from under this
954 * request, so just drop it. Make sure the credential and vnode are
955 * in sync: we should have both or neither.
956 */
957 if (vp == NULL) {
958 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
959 mtx_unlock(&ktrace_mtx);
960 return;
961 }
962 VREF(vp);
963 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL"));
964 crhold(cred);
965 mtx_unlock(&ktrace_mtx);
966
967 kth = &req->ktr_header;
968 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) <
969 sizeof(data_lengths) / sizeof(data_lengths[0]),
970 ("data_lengths array overflow"));
971 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
972 buflen = kth->ktr_len;
973 auio.uio_iov = &aiov[0];
974 auio.uio_offset = 0;
975 auio.uio_segflg = UIO_SYSSPACE;
976 auio.uio_rw = UIO_WRITE;
977 aiov[0].iov_base = (caddr_t)kth;
978 aiov[0].iov_len = sizeof(struct ktr_header);
979 auio.uio_resid = sizeof(struct ktr_header);
980 auio.uio_iovcnt = 1;
981 auio.uio_td = td;
982 if (datalen != 0) {
983 aiov[1].iov_base = (caddr_t)&req->ktr_data;
984 aiov[1].iov_len = datalen;
985 auio.uio_resid += datalen;
986 auio.uio_iovcnt++;
987 kth->ktr_len += datalen;
988 }
989 if (buflen != 0) {
990 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write"));
991 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer;
992 aiov[auio.uio_iovcnt].iov_len = buflen;
993 auio.uio_resid += buflen;
994 auio.uio_iovcnt++;
995 }
996
997 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
998 vn_start_write(vp, &mp, V_WAIT);
999 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1000 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
1001 #ifdef MAC
1002 error = mac_check_vnode_write(cred, NOCRED, vp);
1003 if (error == 0)
1004 #endif
1005 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
1006 VOP_UNLOCK(vp, 0, td);
1007 vn_finished_write(mp);
1008 crfree(cred);
1009 if (!error) {
1010 vrele(vp);
1011 VFS_UNLOCK_GIANT(vfslocked);
1012 return;
1013 }
1014 VFS_UNLOCK_GIANT(vfslocked);
1015
1016 /*
1017 * If error encountered, give up tracing on this vnode. We defer
1018 * all the vrele()'s on the vnode until after we are finished walking
1019 * the various lists to avoid needlessly holding locks.
1020 * NB: at this point we still hold the vnode reference that must
1021 * not go away as we need the valid vnode to compare with. Thus let
1022 * vrele_count start at 1 and the reference will be freed
1023 * by the loop at the end after our last use of vp.
1024 */
1025 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
1026 error);
1027 vrele_count = 1;
1028 /*
1029 * First, clear this vnode from being used by any processes in the
1030 * system.
1031 * XXX - If one process gets an EPERM writing to the vnode, should
1032 * we really do this? Other processes might have suitable
1033 * credentials for the operation.
1034 */
1035 cred = NULL;
1036 sx_slock(&allproc_lock);
1037 FOREACH_PROC_IN_SYSTEM(p) {
1038 PROC_LOCK(p);
1039 if (p->p_tracevp == vp) {
1040 mtx_lock(&ktrace_mtx);
1041 p->p_tracevp = NULL;
1042 p->p_traceflag = 0;
1043 cred = p->p_tracecred;
1044 p->p_tracecred = NULL;
1045 mtx_unlock(&ktrace_mtx);
1046 vrele_count++;
1047 }
1048 PROC_UNLOCK(p);
1049 if (cred != NULL) {
1050 crfree(cred);
1051 cred = NULL;
1052 }
1053 }
1054 sx_sunlock(&allproc_lock);
1055
1056 /*
1057 * We can't clear any pending requests in threads that have cached
1058 * them but not yet committed them, as those are per-thread. The
1059 * thread will have to clear it itself on system call return.
1060 */
1061 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1062 while (vrele_count-- > 0)
1063 vrele(vp);
1064 VFS_UNLOCK_GIANT(vfslocked);
1065 }
1066
1067 /*
1068 * Return true if caller has permission to set the ktracing state
1069 * of target. Essentially, the target can't possess any
1070 * more permissions than the caller. KTRFAC_ROOT signifies that
1071 * root previously set the tracing status on the target process, and
1072 * so, only root may further change it.
1073 */
1074 static int
1075 ktrcanset(td, targetp)
1076 struct thread *td;
1077 struct proc *targetp;
1078 {
1079
1080 PROC_LOCK_ASSERT(targetp, MA_OWNED);
1081 if (targetp->p_traceflag & KTRFAC_ROOT &&
1082 priv_check(td, PRIV_KTRACE))
1083 return (0);
1084
1085 if (p_candebug(td, targetp) != 0)
1086 return (0);
1087
1088 return (1);
1089 }
1090
1091 #endif /* KTRACE */
Cache object: a45091c7deecdbcc704475602a39d231
|