FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_subr.c
1 /* $NetBSD: kern_subr.c,v 1.115.6.1 2005/12/29 20:00:12 riz Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Luke Mewburn.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1982, 1986, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Copyright (c) 1992, 1993
50 * The Regents of the University of California. All rights reserved.
51 *
52 * This software was developed by the Computer Systems Engineering group
53 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
54 * contributed to Berkeley.
55 *
56 * All advertising materials mentioning features or use of this software
57 * must display the following acknowledgement:
58 * This product includes software developed by the University of
59 * California, Lawrence Berkeley Laboratory.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 * 1. Redistributions of source code must retain the above copyright
65 * notice, this list of conditions and the following disclaimer.
66 * 2. Redistributions in binary form must reproduce the above copyright
67 * notice, this list of conditions and the following disclaimer in the
68 * documentation and/or other materials provided with the distribution.
69 * 3. Neither the name of the University nor the names of its contributors
70 * may be used to endorse or promote products derived from this software
71 * without specific prior written permission.
72 *
73 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * @(#)kern_subr.c 8.4 (Berkeley) 2/14/95
86 */
87
88 #include <sys/cdefs.h>
89 __KERNEL_RCSID(0, "$NetBSD: kern_subr.c,v 1.115.6.1 2005/12/29 20:00:12 riz Exp $");
90
91 #include "opt_ddb.h"
92 #include "opt_md.h"
93 #include "opt_syscall_debug.h"
94 #include "opt_ktrace.h"
95 #include "opt_systrace.h"
96
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/proc.h>
100 #include <sys/malloc.h>
101 #include <sys/mount.h>
102 #include <sys/device.h>
103 #include <sys/reboot.h>
104 #include <sys/conf.h>
105 #include <sys/disklabel.h>
106 #include <sys/queue.h>
107 #include <sys/systrace.h>
108 #include <sys/ktrace.h>
109
110 #include <uvm/uvm_extern.h>
111
112 #include <dev/cons.h>
113
114 #include <net/if.h>
115
116 /* XXX these should eventually move to subr_autoconf.c */
117 static struct device *finddevice(const char *);
118 static struct device *getdisk(char *, int, int, dev_t *, int);
119 static struct device *parsedisk(char *, int, int, dev_t *);
120
121 /*
122 * A generic linear hook.
123 */
124 struct hook_desc {
125 LIST_ENTRY(hook_desc) hk_list;
126 void (*hk_fn)(void *);
127 void *hk_arg;
128 };
129 typedef LIST_HEAD(, hook_desc) hook_list_t;
130
131 static void *hook_establish(hook_list_t *, void (*)(void *), void *);
132 static void hook_disestablish(hook_list_t *, void *);
133 static void hook_destroy(hook_list_t *);
134 static void hook_proc_run(hook_list_t *, struct proc *);
135
136 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
137
138 int
139 uiomove(buf, n, uio)
140 void *buf;
141 size_t n;
142 struct uio *uio;
143 {
144 struct iovec *iov;
145 u_int cnt;
146 int error = 0;
147 char *cp = buf;
148 struct proc *p = uio->uio_procp;
149 int hold_count;
150
151 hold_count = KERNEL_LOCK_RELEASE_ALL();
152
153 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
154 spinlock_switchcheck();
155 #endif
156 #ifdef LOCKDEBUG
157 simple_lock_only_held(NULL, "uiomove");
158 #endif
159
160 #ifdef DIAGNOSTIC
161 if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
162 panic("uiomove: mode");
163 #endif
164 while (n > 0 && uio->uio_resid) {
165 iov = uio->uio_iov;
166 cnt = iov->iov_len;
167 if (cnt == 0) {
168 KASSERT(uio->uio_iovcnt > 0);
169 uio->uio_iov++;
170 uio->uio_iovcnt--;
171 continue;
172 }
173 if (cnt > n)
174 cnt = n;
175 switch (uio->uio_segflg) {
176
177 case UIO_USERSPACE:
178 if (curcpu()->ci_schedstate.spc_flags &
179 SPCF_SHOULDYIELD)
180 preempt(1);
181 if (__predict_true(p == curproc)) {
182 if (uio->uio_rw == UIO_READ)
183 error = copyout(cp, iov->iov_base, cnt);
184 else
185 error = copyin(iov->iov_base, cp, cnt);
186 } else {
187 if (uio->uio_rw == UIO_READ)
188 error = copyout_proc(p, cp,
189 iov->iov_base, cnt);
190 else
191 error = copyin_proc(p, iov->iov_base,
192 cp, cnt);
193 }
194 if (error)
195 goto out;
196 break;
197
198 case UIO_SYSSPACE:
199 if (uio->uio_rw == UIO_READ)
200 error = kcopy(cp, iov->iov_base, cnt);
201 else
202 error = kcopy(iov->iov_base, cp, cnt);
203 if (error)
204 goto out;
205 break;
206 }
207 iov->iov_base = (caddr_t)iov->iov_base + cnt;
208 iov->iov_len -= cnt;
209 uio->uio_resid -= cnt;
210 uio->uio_offset += cnt;
211 cp += cnt;
212 KDASSERT(cnt <= n);
213 n -= cnt;
214 }
215 out:
216 KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
217 return (error);
218 }
219
220 /*
221 * Wrapper for uiomove() that validates the arguments against a known-good
222 * kernel buffer.
223 */
224 int
225 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio)
226 {
227 size_t offset;
228
229 if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
230 (offset = uio->uio_offset) != uio->uio_offset)
231 return (EINVAL);
232 if (offset >= buflen)
233 return (0);
234 return (uiomove((char *)buf + offset, buflen - offset, uio));
235 }
236
237 /*
238 * Give next character to user as result of read.
239 */
240 int
241 ureadc(c, uio)
242 int c;
243 struct uio *uio;
244 {
245 struct iovec *iov;
246
247 if (uio->uio_resid <= 0)
248 panic("ureadc: non-positive resid");
249 again:
250 if (uio->uio_iovcnt <= 0)
251 panic("ureadc: non-positive iovcnt");
252 iov = uio->uio_iov;
253 if (iov->iov_len <= 0) {
254 uio->uio_iovcnt--;
255 uio->uio_iov++;
256 goto again;
257 }
258 switch (uio->uio_segflg) {
259
260 case UIO_USERSPACE:
261 if (subyte(iov->iov_base, c) < 0)
262 return (EFAULT);
263 break;
264
265 case UIO_SYSSPACE:
266 *(char *)iov->iov_base = c;
267 break;
268 }
269 iov->iov_base = (caddr_t)iov->iov_base + 1;
270 iov->iov_len--;
271 uio->uio_resid--;
272 uio->uio_offset++;
273 return (0);
274 }
275
276 /*
277 * Like copyin(), but operates on an arbitrary process.
278 */
279 int
280 copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
281 {
282 struct iovec iov;
283 struct uio uio;
284 int error;
285
286 if (len == 0)
287 return (0);
288
289 iov.iov_base = kaddr;
290 iov.iov_len = len;
291 uio.uio_iov = &iov;
292 uio.uio_iovcnt = 1;
293 uio.uio_offset = (off_t)(intptr_t)uaddr;
294 uio.uio_resid = len;
295 uio.uio_segflg = UIO_SYSSPACE;
296 uio.uio_rw = UIO_READ;
297 uio.uio_procp = NULL;
298
299 /* XXXCDC: how should locking work here? */
300 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
301 return (EFAULT);
302 p->p_vmspace->vm_refcnt++; /* XXX */
303 error = uvm_io(&p->p_vmspace->vm_map, &uio);
304 uvmspace_free(p->p_vmspace);
305
306 return (error);
307 }
308
309 /*
310 * Like copyout(), but operates on an arbitrary process.
311 */
312 int
313 copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
314 {
315 struct iovec iov;
316 struct uio uio;
317 int error;
318
319 if (len == 0)
320 return (0);
321
322 iov.iov_base = (void *) kaddr; /* XXX cast away const */
323 iov.iov_len = len;
324 uio.uio_iov = &iov;
325 uio.uio_iovcnt = 1;
326 uio.uio_offset = (off_t)(intptr_t)uaddr;
327 uio.uio_resid = len;
328 uio.uio_segflg = UIO_SYSSPACE;
329 uio.uio_rw = UIO_WRITE;
330 uio.uio_procp = NULL;
331
332 /* XXXCDC: how should locking work here? */
333 if ((p->p_flag & P_WEXIT) || (p->p_vmspace->vm_refcnt < 1))
334 return (EFAULT);
335 p->p_vmspace->vm_refcnt++; /* XXX */
336 error = uvm_io(&p->p_vmspace->vm_map, &uio);
337 uvmspace_free(p->p_vmspace);
338
339 return (error);
340 }
341
342 /*
343 * General routine to allocate a hash table.
344 * Allocate enough memory to hold at least `elements' list-head pointers.
345 * Return a pointer to the allocated space and set *hashmask to a pattern
346 * suitable for masking a value to use as an index into the returned array.
347 */
348 void *
349 hashinit(elements, htype, mtype, mflags, hashmask)
350 u_int elements;
351 enum hashtype htype;
352 struct malloc_type *mtype;
353 int mflags;
354 u_long *hashmask;
355 {
356 u_long hashsize, i;
357 LIST_HEAD(, generic) *hashtbl_list;
358 TAILQ_HEAD(, generic) *hashtbl_tailq;
359 size_t esize;
360 void *p;
361
362 if (elements == 0)
363 panic("hashinit: bad cnt");
364 for (hashsize = 1; hashsize < elements; hashsize <<= 1)
365 continue;
366
367 switch (htype) {
368 case HASH_LIST:
369 esize = sizeof(*hashtbl_list);
370 break;
371 case HASH_TAILQ:
372 esize = sizeof(*hashtbl_tailq);
373 break;
374 default:
375 #ifdef DIAGNOSTIC
376 panic("hashinit: invalid table type");
377 #else
378 return NULL;
379 #endif
380 }
381
382 if ((p = malloc(hashsize * esize, mtype, mflags)) == NULL)
383 return (NULL);
384
385 switch (htype) {
386 case HASH_LIST:
387 hashtbl_list = p;
388 for (i = 0; i < hashsize; i++)
389 LIST_INIT(&hashtbl_list[i]);
390 break;
391 case HASH_TAILQ:
392 hashtbl_tailq = p;
393 for (i = 0; i < hashsize; i++)
394 TAILQ_INIT(&hashtbl_tailq[i]);
395 break;
396 }
397 *hashmask = hashsize - 1;
398 return (p);
399 }
400
401 /*
402 * Free memory from hash table previosly allocated via hashinit().
403 */
404 void
405 hashdone(hashtbl, mtype)
406 void *hashtbl;
407 struct malloc_type *mtype;
408 {
409
410 free(hashtbl, mtype);
411 }
412
413
414 static void *
415 hook_establish(list, fn, arg)
416 hook_list_t *list;
417 void (*fn)(void *);
418 void *arg;
419 {
420 struct hook_desc *hd;
421
422 hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
423 if (hd == NULL)
424 return (NULL);
425
426 hd->hk_fn = fn;
427 hd->hk_arg = arg;
428 LIST_INSERT_HEAD(list, hd, hk_list);
429
430 return (hd);
431 }
432
433 static void
434 hook_disestablish(list, vhook)
435 hook_list_t *list;
436 void *vhook;
437 {
438 #ifdef DIAGNOSTIC
439 struct hook_desc *hd;
440
441 LIST_FOREACH(hd, list, hk_list) {
442 if (hd == vhook)
443 break;
444 }
445
446 if (hd == NULL)
447 panic("hook_disestablish: hook %p not established", vhook);
448 #endif
449 LIST_REMOVE((struct hook_desc *)vhook, hk_list);
450 free(vhook, M_DEVBUF);
451 }
452
453 static void
454 hook_destroy(list)
455 hook_list_t *list;
456 {
457 struct hook_desc *hd;
458
459 while ((hd = LIST_FIRST(list)) != NULL) {
460 LIST_REMOVE(hd, hk_list);
461 free(hd, M_DEVBUF);
462 }
463 }
464
465 static void
466 hook_proc_run(list, p)
467 hook_list_t *list;
468 struct proc *p;
469 {
470 struct hook_desc *hd;
471
472 for (hd = LIST_FIRST(list); hd != NULL; hd = LIST_NEXT(hd, hk_list)) {
473 ((void (*)(struct proc *, void *))*hd->hk_fn)(p,
474 hd->hk_arg);
475 }
476 }
477
478 /*
479 * "Shutdown hook" types, functions, and variables.
480 *
481 * Should be invoked immediately before the
482 * system is halted or rebooted, i.e. after file systems unmounted,
483 * after crash dump done, etc.
484 *
485 * Each shutdown hook is removed from the list before it's run, so that
486 * it won't be run again.
487 */
488
489 hook_list_t shutdownhook_list;
490
491 void *
492 shutdownhook_establish(fn, arg)
493 void (*fn)(void *);
494 void *arg;
495 {
496 return hook_establish(&shutdownhook_list, fn, arg);
497 }
498
499 void
500 shutdownhook_disestablish(vhook)
501 void *vhook;
502 {
503 hook_disestablish(&shutdownhook_list, vhook);
504 }
505
506 /*
507 * Run shutdown hooks. Should be invoked immediately before the
508 * system is halted or rebooted, i.e. after file systems unmounted,
509 * after crash dump done, etc.
510 *
511 * Each shutdown hook is removed from the list before it's run, so that
512 * it won't be run again.
513 */
514 void
515 doshutdownhooks()
516 {
517 struct hook_desc *dp;
518
519 while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
520 LIST_REMOVE(dp, hk_list);
521 (*dp->hk_fn)(dp->hk_arg);
522 #if 0
523 /*
524 * Don't bother freeing the hook structure,, since we may
525 * be rebooting because of a memory corruption problem,
526 * and this might only make things worse. It doesn't
527 * matter, anyway, since the system is just about to
528 * reboot.
529 */
530 free(dp, M_DEVBUF);
531 #endif
532 }
533 }
534
535 /*
536 * "Mountroot hook" types, functions, and variables.
537 */
538
539 hook_list_t mountroothook_list;
540
541 void *
542 mountroothook_establish(fn, dev)
543 void (*fn)(struct device *);
544 struct device *dev;
545 {
546 return hook_establish(&mountroothook_list, (void (*)(void *))fn, dev);
547 }
548
549 void
550 mountroothook_disestablish(vhook)
551 void *vhook;
552 {
553 hook_disestablish(&mountroothook_list, vhook);
554 }
555
556 void
557 mountroothook_destroy()
558 {
559 hook_destroy(&mountroothook_list);
560 }
561
562 void
563 domountroothook()
564 {
565 struct hook_desc *hd;
566
567 LIST_FOREACH(hd, &mountroothook_list, hk_list) {
568 if (hd->hk_arg == (void *)root_device) {
569 (*hd->hk_fn)(hd->hk_arg);
570 return;
571 }
572 }
573 }
574
575 hook_list_t exechook_list;
576
577 void *
578 exechook_establish(fn, arg)
579 void (*fn)(struct proc *, void *);
580 void *arg;
581 {
582 return hook_establish(&exechook_list, (void (*)(void *))fn, arg);
583 }
584
585 void
586 exechook_disestablish(vhook)
587 void *vhook;
588 {
589 hook_disestablish(&exechook_list, vhook);
590 }
591
592 /*
593 * Run exec hooks.
594 */
595 void
596 doexechooks(p)
597 struct proc *p;
598 {
599 hook_proc_run(&exechook_list, p);
600 }
601
602 hook_list_t exithook_list;
603
604 void *
605 exithook_establish(fn, arg)
606 void (*fn)(struct proc *, void *);
607 void *arg;
608 {
609 return hook_establish(&exithook_list, (void (*)(void *))fn, arg);
610 }
611
612 void
613 exithook_disestablish(vhook)
614 void *vhook;
615 {
616 hook_disestablish(&exithook_list, vhook);
617 }
618
619 /*
620 * Run exit hooks.
621 */
622 void
623 doexithooks(p)
624 struct proc *p;
625 {
626 hook_proc_run(&exithook_list, p);
627 }
628
629 hook_list_t forkhook_list;
630
631 void *
632 forkhook_establish(fn)
633 void (*fn)(struct proc *, struct proc *);
634 {
635 return hook_establish(&forkhook_list, (void (*)(void *))fn, NULL);
636 }
637
638 void
639 forkhook_disestablish(vhook)
640 void *vhook;
641 {
642 hook_disestablish(&forkhook_list, vhook);
643 }
644
645 /*
646 * Run fork hooks.
647 */
648 void
649 doforkhooks(p2, p1)
650 struct proc *p2, *p1;
651 {
652 struct hook_desc *hd;
653
654 LIST_FOREACH(hd, &forkhook_list, hk_list) {
655 ((void (*)(struct proc *, struct proc *))*hd->hk_fn)
656 (p2, p1);
657 }
658 }
659
660 /*
661 * "Power hook" types, functions, and variables.
662 * The list of power hooks is kept ordered with the last registered hook
663 * first.
664 * When running the hooks on power down the hooks are called in reverse
665 * registration order, when powering up in registration order.
666 */
667 struct powerhook_desc {
668 CIRCLEQ_ENTRY(powerhook_desc) sfd_list;
669 void (*sfd_fn)(int, void *);
670 void *sfd_arg;
671 };
672
673 CIRCLEQ_HEAD(, powerhook_desc) powerhook_list =
674 CIRCLEQ_HEAD_INITIALIZER(powerhook_list);
675
676 void *
677 powerhook_establish(fn, arg)
678 void (*fn)(int, void *);
679 void *arg;
680 {
681 struct powerhook_desc *ndp;
682
683 ndp = (struct powerhook_desc *)
684 malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
685 if (ndp == NULL)
686 return (NULL);
687
688 ndp->sfd_fn = fn;
689 ndp->sfd_arg = arg;
690 CIRCLEQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);
691
692 return (ndp);
693 }
694
695 void
696 powerhook_disestablish(vhook)
697 void *vhook;
698 {
699 #ifdef DIAGNOSTIC
700 struct powerhook_desc *dp;
701
702 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list)
703 if (dp == vhook)
704 goto found;
705 panic("powerhook_disestablish: hook %p not established", vhook);
706 found:
707 #endif
708
709 CIRCLEQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
710 sfd_list);
711 free(vhook, M_DEVBUF);
712 }
713
714 /*
715 * Run power hooks.
716 */
717 void
718 dopowerhooks(why)
719 int why;
720 {
721 struct powerhook_desc *dp;
722
723 if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
724 CIRCLEQ_FOREACH_REVERSE(dp, &powerhook_list, sfd_list) {
725 (*dp->sfd_fn)(why, dp->sfd_arg);
726 }
727 } else {
728 CIRCLEQ_FOREACH(dp, &powerhook_list, sfd_list) {
729 (*dp->sfd_fn)(why, dp->sfd_arg);
730 }
731 }
732 }
733
734 /*
735 * Determine the root device and, if instructed to, the root file system.
736 */
737
738 #include "md.h"
739 #if NMD == 0
740 #undef MEMORY_DISK_HOOKS
741 #endif
742
743 #ifdef MEMORY_DISK_HOOKS
744 static struct device fakemdrootdev[NMD];
745 #endif
746
747 #ifdef MEMORY_DISK_IS_ROOT
748 #define BOOT_FROM_MEMORY_HOOKS 1
749 #endif
750
751 #include "raid.h"
752 #if NRAID == 1
753 #define BOOT_FROM_RAID_HOOKS 1
754 #endif
755
756 #ifdef BOOT_FROM_RAID_HOOKS
757 extern int numraid;
758 extern struct device *raidrootdev;
759 #endif
760
761 /*
762 * The device and wedge that we booted from. If booted_wedge is NULL,
763 * the we might consult booted_partition.
764 */
765 struct device *booted_device;
766 struct device *booted_wedge;
767 int booted_partition;
768
769 /*
770 * Use partition letters if it's a disk class but not a wedge.
771 * XXX Check for wedge is kinda gross.
772 */
773 #define DEV_USES_PARTITIONS(dv) \
774 ((dv)->dv_class == DV_DISK && \
775 ((dv)->dv_cfdata == NULL || \
776 strcmp((dv)->dv_cfdata->cf_name, "dk") != 0))
777
778 void
779 setroot(bootdv, bootpartition)
780 struct device *bootdv;
781 int bootpartition;
782 {
783 struct device *dv;
784 int len;
785 #ifdef MEMORY_DISK_HOOKS
786 int i;
787 #endif
788 dev_t nrootdev;
789 dev_t ndumpdev = NODEV;
790 char buf[128];
791 const char *rootdevname;
792 const char *dumpdevname;
793 struct device *rootdv = NULL; /* XXX gcc -Wuninitialized */
794 struct device *dumpdv = NULL;
795 struct ifnet *ifp;
796 const char *deffsname;
797 struct vfsops *vops;
798
799 #ifdef MEMORY_DISK_HOOKS
800 for (i = 0; i < NMD; i++) {
801 fakemdrootdev[i].dv_class = DV_DISK;
802 fakemdrootdev[i].dv_cfdata = NULL;
803 fakemdrootdev[i].dv_unit = i;
804 fakemdrootdev[i].dv_parent = NULL;
805 snprintf(fakemdrootdev[i].dv_xname,
806 sizeof(fakemdrootdev[i].dv_xname), "md%d", i);
807 }
808 #endif /* MEMORY_DISK_HOOKS */
809
810 #ifdef MEMORY_DISK_IS_ROOT
811 bootdv = &fakemdrootdev[0];
812 bootpartition = 0;
813 #endif
814
815 /*
816 * If NFS is specified as the file system, and we found
817 * a DV_DISK boot device (or no boot device at all), then
818 * find a reasonable network interface for "rootspec".
819 */
820 vops = vfs_getopsbyname("nfs");
821 if (vops != NULL && vops->vfs_mountroot == mountroot &&
822 rootspec == NULL &&
823 (bootdv == NULL || bootdv->dv_class != DV_IFNET)) {
824 IFNET_FOREACH(ifp) {
825 if ((ifp->if_flags &
826 (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
827 break;
828 }
829 if (ifp == NULL) {
830 /*
831 * Can't find a suitable interface; ask the
832 * user.
833 */
834 boothowto |= RB_ASKNAME;
835 } else {
836 /*
837 * Have a suitable interface; behave as if
838 * the user specified this interface.
839 */
840 rootspec = (const char *)ifp->if_xname;
841 }
842 }
843
844 /*
845 * If wildcarded root and we the boot device wasn't determined,
846 * ask the user.
847 */
848 if (rootspec == NULL && bootdv == NULL)
849 boothowto |= RB_ASKNAME;
850
851 top:
852 if (boothowto & RB_ASKNAME) {
853 struct device *defdumpdv;
854
855 for (;;) {
856 printf("root device");
857 if (bootdv != NULL) {
858 printf(" (default %s", bootdv->dv_xname);
859 if (DEV_USES_PARTITIONS(bootdv))
860 printf("%c", bootpartition + 'a');
861 printf(")");
862 }
863 printf(": ");
864 len = cngetsn(buf, sizeof(buf));
865 if (len == 0 && bootdv != NULL) {
866 strlcpy(buf, bootdv->dv_xname, sizeof(buf));
867 len = strlen(buf);
868 }
869 if (len > 0 && buf[len - 1] == '*') {
870 buf[--len] = '\0';
871 dv = getdisk(buf, len, 1, &nrootdev, 0);
872 if (dv != NULL) {
873 rootdv = dv;
874 break;
875 }
876 }
877 dv = getdisk(buf, len, bootpartition, &nrootdev, 0);
878 if (dv != NULL) {
879 rootdv = dv;
880 break;
881 }
882 }
883
884 /*
885 * Set up the default dump device. If root is on
886 * a network device, there is no default dump
887 * device, since we don't support dumps to the
888 * network.
889 */
890 if (DEV_USES_PARTITIONS(rootdv) == 0)
891 defdumpdv = NULL;
892 else
893 defdumpdv = rootdv;
894
895 for (;;) {
896 printf("dump device");
897 if (defdumpdv != NULL) {
898 /*
899 * Note, we know it's a disk if we get here.
900 */
901 printf(" (default %sb)", defdumpdv->dv_xname);
902 }
903 printf(": ");
904 len = cngetsn(buf, sizeof(buf));
905 if (len == 0) {
906 if (defdumpdv != NULL) {
907 ndumpdev = MAKEDISKDEV(major(nrootdev),
908 DISKUNIT(nrootdev), 1);
909 }
910 dumpdv = defdumpdv;
911 break;
912 }
913 if (len == 4 && strcmp(buf, "none") == 0) {
914 dumpdv = NULL;
915 break;
916 }
917 dv = getdisk(buf, len, 1, &ndumpdev, 1);
918 if (dv != NULL) {
919 dumpdv = dv;
920 break;
921 }
922 }
923
924 rootdev = nrootdev;
925 dumpdev = ndumpdev;
926
927 for (vops = LIST_FIRST(&vfs_list); vops != NULL;
928 vops = LIST_NEXT(vops, vfs_list)) {
929 if (vops->vfs_mountroot != NULL &&
930 vops->vfs_mountroot == mountroot)
931 break;
932 }
933
934 if (vops == NULL) {
935 mountroot = NULL;
936 deffsname = "generic";
937 } else
938 deffsname = vops->vfs_name;
939
940 for (;;) {
941 printf("file system (default %s): ", deffsname);
942 len = cngetsn(buf, sizeof(buf));
943 if (len == 0)
944 break;
945 if (len == 4 && strcmp(buf, "halt") == 0)
946 cpu_reboot(RB_HALT, NULL);
947 else if (len == 6 && strcmp(buf, "reboot") == 0)
948 cpu_reboot(0, NULL);
949 #if defined(DDB)
950 else if (len == 3 && strcmp(buf, "ddb") == 0) {
951 console_debugger();
952 }
953 #endif
954 else if (len == 7 && strcmp(buf, "generic") == 0) {
955 mountroot = NULL;
956 break;
957 }
958 vops = vfs_getopsbyname(buf);
959 if (vops == NULL || vops->vfs_mountroot == NULL) {
960 printf("use one of: generic");
961 for (vops = LIST_FIRST(&vfs_list);
962 vops != NULL;
963 vops = LIST_NEXT(vops, vfs_list)) {
964 if (vops->vfs_mountroot != NULL)
965 printf(" %s", vops->vfs_name);
966 }
967 #if defined(DDB)
968 printf(" ddb");
969 #endif
970 printf(" halt reboot\n");
971 } else {
972 mountroot = vops->vfs_mountroot;
973 break;
974 }
975 }
976
977 } else if (rootspec == NULL) {
978 int majdev;
979
980 /*
981 * Wildcarded root; use the boot device.
982 */
983 rootdv = bootdv;
984
985 majdev = devsw_name2blk(bootdv->dv_xname, NULL, 0);
986 if (majdev >= 0) {
987 /*
988 * Root is on a disk. `bootpartition' is root,
989 * unless the device does not use partitions.
990 */
991 if (DEV_USES_PARTITIONS(bootdv))
992 rootdev = MAKEDISKDEV(majdev, bootdv->dv_unit,
993 bootpartition);
994 else
995 rootdev = makedev(majdev, bootdv->dv_unit);
996 }
997 } else {
998
999 /*
1000 * `root on <dev> ...'
1001 */
1002
1003 /*
1004 * If it's a network interface, we can bail out
1005 * early.
1006 */
1007 dv = finddevice(rootspec);
1008 if (dv != NULL && dv->dv_class == DV_IFNET) {
1009 rootdv = dv;
1010 goto haveroot;
1011 }
1012
1013 rootdevname = devsw_blk2name(major(rootdev));
1014 if (rootdevname == NULL) {
1015 printf("unknown device major 0x%x\n", rootdev);
1016 boothowto |= RB_ASKNAME;
1017 goto top;
1018 }
1019 memset(buf, 0, sizeof(buf));
1020 snprintf(buf, sizeof(buf), "%s%d", rootdevname,
1021 DISKUNIT(rootdev));
1022
1023 rootdv = finddevice(buf);
1024 if (rootdv == NULL) {
1025 printf("device %s (0x%x) not configured\n",
1026 buf, rootdev);
1027 boothowto |= RB_ASKNAME;
1028 goto top;
1029 }
1030 }
1031
1032 haveroot:
1033
1034 root_device = rootdv;
1035
1036 switch (rootdv->dv_class) {
1037 case DV_IFNET:
1038 aprint_normal("root on %s", rootdv->dv_xname);
1039 break;
1040
1041 case DV_DISK:
1042 aprint_normal("root on %s%c", rootdv->dv_xname,
1043 DISKPART(rootdev) + 'a');
1044 break;
1045
1046 default:
1047 printf("can't determine root device\n");
1048 boothowto |= RB_ASKNAME;
1049 goto top;
1050 }
1051
1052 /*
1053 * Now configure the dump device.
1054 *
1055 * If we haven't figured out the dump device, do so, with
1056 * the following rules:
1057 *
1058 * (a) We already know dumpdv in the RB_ASKNAME case.
1059 *
1060 * (b) If dumpspec is set, try to use it. If the device
1061 * is not available, punt.
1062 *
1063 * (c) If dumpspec is not set, the dump device is
1064 * wildcarded or unspecified. If the root device
1065 * is DV_IFNET, punt. Otherwise, use partition b
1066 * of the root device.
1067 */
1068
1069 if (boothowto & RB_ASKNAME) { /* (a) */
1070 if (dumpdv == NULL)
1071 goto nodumpdev;
1072 } else if (dumpspec != NULL) { /* (b) */
1073 if (strcmp(dumpspec, "none") == 0 || dumpdev == NODEV) {
1074 /*
1075 * Operator doesn't want a dump device.
1076 * Or looks like they tried to pick a network
1077 * device. Oops.
1078 */
1079 goto nodumpdev;
1080 }
1081
1082 dumpdevname = devsw_blk2name(major(dumpdev));
1083 if (dumpdevname == NULL)
1084 goto nodumpdev;
1085 memset(buf, 0, sizeof(buf));
1086 snprintf(buf, sizeof(buf), "%s%d", dumpdevname,
1087 DISKUNIT(dumpdev));
1088
1089 dumpdv = finddevice(buf);
1090 if (dumpdv == NULL) {
1091 /*
1092 * Device not configured.
1093 */
1094 goto nodumpdev;
1095 }
1096 } else { /* (c) */
1097 if (DEV_USES_PARTITIONS(rootdv) == 0)
1098 goto nodumpdev;
1099 else {
1100 dumpdv = rootdv;
1101 dumpdev = MAKEDISKDEV(major(rootdev),
1102 dumpdv->dv_unit, 1);
1103 }
1104 }
1105
1106 aprint_normal(" dumps on %s%c\n", dumpdv->dv_xname,
1107 DISKPART(dumpdev) + 'a');
1108 return;
1109
1110 nodumpdev:
1111 dumpdev = NODEV;
1112 aprint_normal("\n");
1113 }
1114
1115 static struct device *
1116 finddevice(name)
1117 const char *name;
1118 {
1119 struct device *dv;
1120 #if defined(BOOT_FROM_RAID_HOOKS) || defined(BOOT_FROM_MEMORY_HOOKS)
1121 int j;
1122 #endif /* BOOT_FROM_RAID_HOOKS || BOOT_FROM_MEMORY_HOOKS */
1123
1124 #ifdef BOOT_FROM_RAID_HOOKS
1125 for (j = 0; j < numraid; j++) {
1126 if (strcmp(name, raidrootdev[j].dv_xname) == 0) {
1127 dv = &raidrootdev[j];
1128 return (dv);
1129 }
1130 }
1131 #endif /* BOOT_FROM_RAID_HOOKS */
1132
1133 #ifdef BOOT_FROM_MEMORY_HOOKS
1134 for (j = 0; j < NMD; j++) {
1135 if (strcmp(name, fakemdrootdev[j].dv_xname) == 0) {
1136 dv = &fakemdrootdev[j];
1137 return (dv);
1138 }
1139 }
1140 #endif /* BOOT_FROM_MEMORY_HOOKS */
1141
1142 for (dv = TAILQ_FIRST(&alldevs); dv != NULL;
1143 dv = TAILQ_NEXT(dv, dv_list))
1144 if (strcmp(dv->dv_xname, name) == 0)
1145 break;
1146 return (dv);
1147 }
1148
1149 static struct device *
1150 getdisk(str, len, defpart, devp, isdump)
1151 char *str;
1152 int len, defpart;
1153 dev_t *devp;
1154 int isdump;
1155 {
1156 struct device *dv;
1157 #ifdef MEMORY_DISK_HOOKS
1158 int i;
1159 #endif
1160 #ifdef BOOT_FROM_RAID_HOOKS
1161 int j;
1162 #endif
1163
1164 if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
1165 printf("use one of:");
1166 #ifdef MEMORY_DISK_HOOKS
1167 if (isdump == 0)
1168 for (i = 0; i < NMD; i++)
1169 printf(" %s[a-%c]", fakemdrootdev[i].dv_xname,
1170 'a' + MAXPARTITIONS - 1);
1171 #endif
1172 #ifdef BOOT_FROM_RAID_HOOKS
1173 if (isdump == 0)
1174 for (j = 0; j < numraid; j++)
1175 printf(" %s[a-%c]", raidrootdev[j].dv_xname,
1176 'a' + MAXPARTITIONS - 1);
1177 #endif
1178 TAILQ_FOREACH(dv, &alldevs, dv_list) {
1179 if (DEV_USES_PARTITIONS(dv))
1180 printf(" %s[a-%c]", dv->dv_xname,
1181 'a' + MAXPARTITIONS - 1);
1182 else if (dv->dv_class == DV_DISK)
1183 printf(" %s", dv->dv_xname);
1184 if (isdump == 0 && dv->dv_class == DV_IFNET)
1185 printf(" %s", dv->dv_xname);
1186 }
1187 if (isdump)
1188 printf(" none");
1189 #if defined(DDB)
1190 printf(" ddb");
1191 #endif
1192 printf(" halt reboot\n");
1193 }
1194 return (dv);
1195 }
1196
1197 static struct device *
1198 parsedisk(str, len, defpart, devp)
1199 char *str;
1200 int len, defpart;
1201 dev_t *devp;
1202 {
1203 struct device *dv;
1204 char *cp, c;
1205 int majdev, part;
1206 #ifdef MEMORY_DISK_HOOKS
1207 int i;
1208 #endif
1209 if (len == 0)
1210 return (NULL);
1211
1212 if (len == 4 && strcmp(str, "halt") == 0)
1213 cpu_reboot(RB_HALT, NULL);
1214 else if (len == 6 && strcmp(str, "reboot") == 0)
1215 cpu_reboot(0, NULL);
1216 #if defined(DDB)
1217 else if (len == 3 && strcmp(str, "ddb") == 0)
1218 console_debugger();
1219 #endif
1220
1221 cp = str + len - 1;
1222 c = *cp;
1223 if (c >= 'a' && c <= ('a' + MAXPARTITIONS - 1)) {
1224 part = c - 'a';
1225 *cp = '\0';
1226 } else
1227 part = defpart;
1228
1229 #ifdef MEMORY_DISK_HOOKS
1230 for (i = 0; i < NMD; i++)
1231 if (strcmp(str, fakemdrootdev[i].dv_xname) == 0) {
1232 dv = &fakemdrootdev[i];
1233 goto gotdisk;
1234 }
1235 #endif
1236
1237 dv = finddevice(str);
1238 if (dv != NULL) {
1239 if (dv->dv_class == DV_DISK) {
1240 #ifdef MEMORY_DISK_HOOKS
1241 gotdisk:
1242 #endif
1243 majdev = devsw_name2blk(dv->dv_xname, NULL, 0);
1244 if (majdev < 0)
1245 panic("parsedisk");
1246 if (DEV_USES_PARTITIONS(dv))
1247 *devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1248 else
1249 *devp = makedev(majdev, dv->dv_unit);
1250 }
1251
1252 if (dv->dv_class == DV_IFNET)
1253 *devp = NODEV;
1254 }
1255
1256 *cp = c;
1257 return (dv);
1258 }
1259
1260 /*
1261 * snprintf() `bytes' into `buf', reformatting it so that the number,
1262 * plus a possible `x' + suffix extension) fits into len bytes (including
1263 * the terminating NUL).
1264 * Returns the number of bytes stored in buf, or -1 if there was a problem.
1265 * E.g, given a len of 9 and a suffix of `B':
1266 * bytes result
1267 * ----- ------
1268 * 99999 `99999 B'
1269 * 100000 `97 kB'
1270 * 66715648 `65152 kB'
1271 * 252215296 `240 MB'
1272 */
1273 int
1274 humanize_number(buf, len, bytes, suffix, divisor)
1275 char *buf;
1276 size_t len;
1277 u_int64_t bytes;
1278 const char *suffix;
1279 int divisor;
1280 {
1281 /* prefixes are: (none), kilo, Mega, Giga, Tera, Peta, Exa */
1282 const char *prefixes;
1283 int r;
1284 u_int64_t max;
1285 size_t i, suffixlen;
1286
1287 if (buf == NULL || suffix == NULL)
1288 return (-1);
1289 if (len > 0)
1290 buf[0] = '\0';
1291 suffixlen = strlen(suffix);
1292 /* check if enough room for `x y' + suffix + `\0' */
1293 if (len < 4 + suffixlen)
1294 return (-1);
1295
1296 if (divisor == 1024) {
1297 /*
1298 * binary multiplies
1299 * XXX IEC 60027-2 recommends Ki, Mi, Gi...
1300 */
1301 prefixes = " KMGTPE";
1302 } else
1303 prefixes = " kMGTPE"; /* SI for decimal multiplies */
1304
1305 max = 1;
1306 for (i = 0; i < len - suffixlen - 3; i++)
1307 max *= 10;
1308 for (i = 0; bytes >= max && prefixes[i + 1]; i++)
1309 bytes /= divisor;
1310
1311 r = snprintf(buf, len, "%qu%s%c%s", (unsigned long long)bytes,
1312 i == 0 ? "" : " ", prefixes[i], suffix);
1313
1314 return (r);
1315 }
1316
1317 int
1318 format_bytes(buf, len, bytes)
1319 char *buf;
1320 size_t len;
1321 u_int64_t bytes;
1322 {
1323 int rv;
1324 size_t nlen;
1325
1326 rv = humanize_number(buf, len, bytes, "B", 1024);
1327 if (rv != -1) {
1328 /* nuke the trailing ` B' if it exists */
1329 nlen = strlen(buf) - 2;
1330 if (strcmp(&buf[nlen], " B") == 0)
1331 buf[nlen] = '\0';
1332 }
1333 return (rv);
1334 }
1335
1336 /*
1337 * Start trace of particular system call. If process is being traced,
1338 * this routine is called by MD syscall dispatch code just before
1339 * a system call is actually executed.
1340 * MD caller guarantees the passed 'code' is within the supported
1341 * system call number range for emulation the process runs under.
1342 */
1343 int
1344 trace_enter(struct lwp *l, register_t code,
1345 register_t realcode, const struct sysent *callp, void *args)
1346 {
1347 #if defined(KTRACE) || defined(SYSTRACE)
1348 struct proc *p = l->l_proc;
1349 #endif
1350
1351 #ifdef SYSCALL_DEBUG
1352 scdebug_call(l, code, args);
1353 #endif /* SYSCALL_DEBUG */
1354
1355 #ifdef KTRACE
1356 if (KTRPOINT(p, KTR_SYSCALL))
1357 ktrsyscall(p, code, realcode, callp, args);
1358 #endif /* KTRACE */
1359
1360 #ifdef SYSTRACE
1361 if (ISSET(p->p_flag, P_SYSTRACE))
1362 return systrace_enter(p, code, args);
1363 #endif
1364 return 0;
1365 }
1366
1367 /*
1368 * End trace of particular system call. If process is being traced,
1369 * this routine is called by MD syscall dispatch code just after
1370 * a system call finishes.
1371 * MD caller guarantees the passed 'code' is within the supported
1372 * system call number range for emulation the process runs under.
1373 */
1374 void
1375 trace_exit(struct lwp *l, register_t code, void *args, register_t rval[],
1376 int error)
1377 {
1378 #if defined(KTRACE) || defined(SYSTRACE)
1379 struct proc *p = l->l_proc;
1380 #endif
1381
1382 #ifdef SYSCALL_DEBUG
1383 scdebug_ret(l, code, error, rval);
1384 #endif /* SYSCALL_DEBUG */
1385
1386 #ifdef KTRACE
1387 if (KTRPOINT(p, KTR_SYSRET)) {
1388 KERNEL_PROC_LOCK(l);
1389 ktrsysret(p, code, error, rval);
1390 KERNEL_PROC_UNLOCK(l);
1391 }
1392 #endif /* KTRACE */
1393
1394 #ifdef SYSTRACE
1395 if (ISSET(p->p_flag, P_SYSTRACE)) {
1396 KERNEL_PROC_LOCK(l);
1397 systrace_exit(p, code, args, rval, error);
1398 KERNEL_PROC_UNLOCK(l);
1399 }
1400 #endif
1401 }
Cache object: d82409828ef1a33125e951002f50ccc7
|