FreeBSD/Linux Kernel Cross Reference
sys/kern/init_main.c
1 /* $OpenBSD: init_main.c,v 1.320 2023/01/01 07:00:51 jsg Exp $ */
2 /* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */
3
4 /*
5 * Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
6 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 * (c) UNIX System Laboratories, Inc.
9 * All or some portions of this file are derived from material licensed
10 * to the University of California by American Telephone and Telegraph
11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12 * the permission of UNIX System Laboratories, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)init_main.c 8.9 (Berkeley) 1/21/94
39 */
40
41 #include <sys/param.h>
42 #include <sys/filedesc.h>
43 #include <sys/errno.h>
44 #include <sys/kthread.h>
45 #include <sys/mount.h>
46 #include <sys/proc.h>
47 #include <sys/resourcevar.h>
48 #include <sys/signalvar.h>
49 #include <sys/systm.h>
50 #include <sys/namei.h>
51 #include <sys/vnode.h>
52 #include <sys/tty.h>
53 #include <sys/buf.h>
54 #include <sys/device.h>
55 #include <sys/socketvar.h>
56 #include <sys/lockf.h>
57 #include <sys/reboot.h>
58 #ifdef SYSVSHM
59 #include <sys/shm.h>
60 #endif
61 #ifdef SYSVSEM
62 #include <sys/sem.h>
63 #endif
64 #ifdef SYSVMSG
65 #include <sys/msg.h>
66 #endif
67 #include <sys/domain.h>
68 #include <sys/event.h>
69 #include <sys/msgbuf.h>
70 #include <sys/mbuf.h>
71 #include <sys/pipe.h>
72 #include <sys/witness.h>
73 #include <sys/smr.h>
74 #include <sys/evcount.h>
75
76 #include <sys/syscallargs.h>
77
78 #include <uvm/uvm_extern.h>
79
80 #include <net/if.h>
81 #include <net/rtable.h>
82
83 #if defined(CRYPTO)
84 #include <crypto/cryptodev.h>
85 #include <crypto/cryptosoft.h>
86 #endif
87
88 #if defined(KUBSAN)
89 extern void kubsan_init(void);
90 #endif
91
92 #if defined(NFSSERVER) || defined(NFSCLIENT)
93 extern void nfs_init(void);
94 #endif
95
96 #include "stoeplitz.h"
97 #if NSTOEPLITZ > 0
98 extern void stoeplitz_init(void);
99 #endif
100
101 #include "mpath.h"
102 #include "vscsi.h"
103 #include "softraid.h"
104
105 const char copyright[] =
106 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n"
107 "\tThe Regents of the University of California. All rights reserved.\n"
108 "Copyright (c) 1995-2023 OpenBSD. All rights reserved. https://www.OpenBSD.org\n";
109
110 /* Components of the first process -- never freed. */
111 struct session session0;
112 struct pgrp pgrp0;
113 struct proc proc0;
114 struct process process0;
115 struct plimit limit0;
116 struct vmspace vmspace0;
117 struct sigacts sigacts0;
118 struct process *initprocess;
119 struct proc *reaperproc;
120
121 extern struct user *proc0paddr;
122
123 struct vnode *rootvp, *swapdev_vp;
124 int boothowto;
125 int db_active = 0;
126 int ncpus = 1;
127 int ncpusfound = 1; /* number of cpus we find */
128 volatile int start_init_exec; /* semaphore for start_init() */
129
130 #if !defined(NO_PROPOLICE)
131 long __guard_local __attribute__((section(".openbsd.randomdata")));
132 #endif
133
134 /* XXX return int so gcc -Werror won't complain */
135 int main(void *);
136 void check_console(struct proc *);
137 void start_init(void *);
138 void db_ctf_init(void);
139 void prof_init(void);
140 void init_exec(void);
141 void futex_init(void);
142 void taskq_init(void);
143 void timeout_proc_init(void);
144 void pool_gc_pages(void *);
145 void percpu_init(void);
146
147 #ifdef DIAGNOSTIC
148 int pdevinit_done = 0;
149 #endif
150
151 /*
152 * System startup; initialize the world, create process 0, mount root
153 * filesystem, and fork to create init and pagedaemon. Most of the
154 * hard work is done in the lower-level initialization routines including
155 * startup(), which does memory initialization and autoconfiguration.
156 */
157 /* XXX return int, so gcc -Werror won't complain */
158 int
159 main(void *framep)
160 {
161 struct proc *p;
162 struct process *pr;
163 struct pdevinit *pdev;
164 extern struct pdevinit pdevinit[];
165 extern void disk_init(void);
166
167 /*
168 * Initialize the current process pointer (curproc) before
169 * any possible traps/probes to simplify trap processing.
170 */
171 curproc = p = &proc0;
172 p->p_cpu = curcpu();
173
174 /*
175 * Initialize timeouts.
176 */
177 timeout_startup();
178
179 /*
180 * Attempt to find console and initialize
181 * in case of early panic or other messages.
182 */
183 config_init(); /* init autoconfiguration data structures */
184 consinit();
185
186 printf("%s\n", copyright);
187
188 #ifdef KUBSAN
189 /* Initialize kubsan. */
190 kubsan_init();
191 #endif
192
193 WITNESS_INITIALIZE();
194
195 KERNEL_LOCK_INIT();
196 SCHED_LOCK_INIT();
197
198 rw_obj_init();
199 uvm_init();
200 disk_init(); /* must come before autoconfiguration */
201 tty_init(); /* initialise tty's */
202 cpu_startup();
203
204 random_start(boothowto & RB_GOODRANDOM); /* Start the flow */
205
206 /*
207 * Initialize mbuf's. Do this now because we might attempt to
208 * allocate mbufs or mbuf clusters during autoconfiguration.
209 */
210 mbinit();
211
212 #if NSTOEPLITZ > 0
213 stoeplitz_init();
214 #endif
215
216 /* Initialize sockets. */
217 soinit();
218
219 /* Initialize SRP subsystem. */
220 srp_startup();
221
222 /* Initialize SMR subsystem. */
223 smr_startup();
224
225 /*
226 * Initialize process and pgrp structures.
227 */
228 procinit();
229
230 /* Initialize file locking. */
231 lf_init();
232
233 /*
234 * Initialize filedescriptors.
235 */
236 filedesc_init();
237
238 /*
239 * Initialize pipes.
240 */
241 pipe_init();
242
243 /*
244 * Initialize kqueues.
245 */
246 kqueue_init();
247
248 /*
249 * Initialize futexes.
250 */
251 futex_init();
252
253 /* Create credentials. */
254 p->p_ucred = crget();
255 p->p_ucred->cr_ngroups = 1; /* group 0 */
256
257 /*
258 * Create process 0 (the swapper).
259 */
260 pr = &process0;
261 process_initialize(pr, p);
262
263 LIST_INSERT_HEAD(&allprocess, pr, ps_list);
264 LIST_INSERT_HEAD(PIDHASH(0), pr, ps_hash);
265 atomic_setbits_int(&pr->ps_flags, PS_SYSTEM);
266
267 /* Set the default routing table/domain. */
268 process0.ps_rtableid = 0;
269
270 LIST_INSERT_HEAD(&allproc, p, p_list);
271 pr->ps_pgrp = &pgrp0;
272 LIST_INSERT_HEAD(TIDHASH(0), p, p_hash);
273 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
274 LIST_INIT(&pgrp0.pg_members);
275 LIST_INSERT_HEAD(&pgrp0.pg_members, pr, ps_pglist);
276
277 pgrp0.pg_session = &session0;
278 session0.s_count = 1;
279 session0.s_leader = pr;
280
281 atomic_setbits_int(&p->p_flag, P_SYSTEM);
282 p->p_stat = SONPROC;
283 pr->ps_nice = NZERO;
284 strlcpy(pr->ps_comm, "swapper", sizeof(pr->ps_comm));
285
286 /* Init timeouts. */
287 timeout_set(&p->p_sleep_to, endtsleep, p);
288
289 /* Initialize signal state for process 0. */
290 signal_init();
291 siginit(&sigacts0);
292 pr->ps_sigacts = &sigacts0;
293
294 /* Create the file descriptor table. */
295 p->p_fd = pr->ps_fd = fdinit();
296
297 /* Create the limits structures. */
298 lim_startup(&limit0);
299 pr->ps_limit = &limit0;
300
301 /* Allocate a prototype map so we have something to fork. */
302 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
303 trunc_page(VM_MAX_ADDRESS), TRUE, TRUE);
304 p->p_vmspace = pr->ps_vmspace = &vmspace0;
305
306 p->p_addr = proc0paddr; /* XXX */
307
308 /*
309 * Charge root for one process.
310 */
311 (void)chgproccnt(0, 1);
312
313 /* Initialize run queues */
314 sched_init_runqueues();
315 sleep_queue_init();
316 sched_init_cpu(curcpu());
317 p->p_cpu->ci_randseed = (arc4random() & 0x7fffffff) + 1;
318
319 /* Initialize timeouts in process context. */
320 timeout_proc_init();
321
322 /* Initialize task queues */
323 taskq_init();
324
325 /* Initialize the interface/address trees */
326 ifinit();
327
328 /* Lock the kernel on behalf of proc0. */
329 KERNEL_LOCK();
330
331 #if NMPATH > 0
332 /* Attach mpath before hardware */
333 config_rootfound("mpath", NULL);
334 #endif
335
336 /* Configure the devices */
337 cpu_configure();
338
339 /* Configure virtual memory system, set vm rlimits. */
340 uvm_init_limits(&limit0);
341
342 /* Per CPU memory allocation */
343 percpu_init();
344
345 /* Initialize the file systems. */
346 #if defined(NFSSERVER) || defined(NFSCLIENT)
347 nfs_init(); /* initialize server/shared data */
348 #endif
349 vfsinit();
350
351 /* Start real time and statistics clocks. */
352 initclocks();
353
354 #ifdef SYSVSHM
355 /* Initialize System V style shared memory. */
356 shminit();
357 #endif
358
359 #ifdef SYSVSEM
360 /* Initialize System V style semaphores. */
361 seminit();
362 #endif
363
364 #ifdef SYSVMSG
365 /* Initialize System V style message queues. */
366 msginit();
367 #endif
368
369 /* Create default routing table before attaching lo0. */
370 rtable_init();
371
372 /* Attach pseudo-devices. */
373 for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
374 if (pdev->pdev_count > 0)
375 (*pdev->pdev_attach)(pdev->pdev_count);
376 #ifdef DIAGNOSTIC
377 pdevinit_done = 1;
378 #endif
379
380 #ifdef CRYPTO
381 crypto_init();
382 swcr_init();
383 #endif /* CRYPTO */
384
385 /*
386 * Initialize protocols.
387 */
388 domaininit();
389
390 initconsbuf();
391
392 #if defined(GPROF) || defined(DDBPROF)
393 /* Initialize kernel profiling. */
394 prof_init();
395 #endif
396
397 /* Enable per-CPU data. */
398 mbcpuinit();
399 kqueue_init_percpu();
400 uvm_init_percpu();
401 evcount_init_percpu();
402
403 /* init exec */
404 init_exec();
405
406 /* Start the scheduler */
407 scheduler_start();
408
409 /*
410 * Create process 1 (init(8)). We do this now, as Unix has
411 * historically had init be process 1, and changing this would
412 * probably upset a lot of people.
413 *
414 * Note that process 1 won't immediately exec init(8), but will
415 * wait for us to inform it that the root file system has been
416 * mounted.
417 */
418 {
419 struct proc *initproc;
420
421 if (fork1(p, FORK_FORK, start_init, NULL, NULL, &initproc))
422 panic("fork init");
423 initprocess = initproc->p_p;
424 }
425
426 /*
427 * Create any kernel threads whose creation was deferred because
428 * initprocess had not yet been created.
429 */
430 kthread_run_deferred_queue();
431
432 /*
433 * Now that device driver threads have been created, wait for
434 * them to finish any deferred autoconfiguration. Note we don't
435 * need to lock this semaphore, since we haven't booted any
436 * secondary processors, yet.
437 */
438 while (config_pending)
439 tsleep_nsec(&config_pending, PWAIT, "cfpend", INFSLP);
440
441 dostartuphooks();
442
443 #if NVSCSI > 0
444 config_rootfound("vscsi", NULL);
445 #endif
446 #if NSOFTRAID > 0
447 config_rootfound("softraid", NULL);
448 #endif
449
450 /* Configure root/swap devices */
451 diskconf();
452
453 #ifdef DDB
454 /* Make debug symbols available in ddb. */
455 db_ctf_init();
456 #endif
457
458 if (mountroot == NULL || ((*mountroot)() != 0))
459 panic("cannot mount root");
460
461 TAILQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS;
462
463 /* Get the vnode for '/'. Set p->p_fd->fd_cdir to reference it. */
464 if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode))
465 panic("cannot find root vnode");
466 p->p_fd->fd_cdir = rootvnode;
467 vref(p->p_fd->fd_cdir);
468 VOP_UNLOCK(rootvnode);
469 p->p_fd->fd_rdir = NULL;
470
471 /*
472 * Now that root is mounted, we can fixup initprocess's CWD
473 * info. All other processes are kthreads, which merely
474 * share proc0's CWD info.
475 */
476 initprocess->ps_fd->fd_cdir = rootvnode;
477 vref(initprocess->ps_fd->fd_cdir);
478 initprocess->ps_fd->fd_rdir = NULL;
479
480 /*
481 * Now can look at time, having had a chance to verify the time
482 * from the file system. Reset p->p_rtime as it may have been
483 * munched in mi_switch() after the time got set.
484 */
485 LIST_FOREACH(pr, &allprocess, ps_list) {
486 nanouptime(&pr->ps_start);
487 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
488 nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
489 timespecclear(&p->p_rtime);
490 }
491 }
492
493 uvm_swap_init();
494
495 /* Create the pageout daemon kernel thread. */
496 if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
497 panic("fork pagedaemon");
498
499 /* Create the reaper daemon kernel thread. */
500 if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
501 panic("fork reaper");
502
503 /* Create the cleaner daemon kernel thread. */
504 if (kthread_create(buf_daemon, NULL, &cleanerproc, "cleaner"))
505 panic("fork cleaner");
506
507 /* Create the update daemon kernel thread. */
508 if (kthread_create(syncer_thread, NULL, &syncerproc, "update"))
509 panic("fork update");
510
511 /* Create the aiodone daemon kernel thread. */
512 if (kthread_create(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
513 panic("fork aiodoned");
514
515 #if !defined(__hppa__)
516 /* Create the page zeroing kernel thread. */
517 if (kthread_create(uvm_pagezero_thread, NULL, NULL, "zerothread"))
518 panic("fork zerothread");
519 #endif
520
521 #if defined(MULTIPROCESSOR)
522 /* Boot the secondary processors. */
523 cpu_boot_secondary_processors();
524 #endif
525
526 /* Now that all CPUs partake in scheduling, start SMR thread. */
527 smr_startup_thread();
528
529 config_process_deferred_mountroot();
530
531 /*
532 * Okay, now we can let init(8) exec! It's off to userland!
533 */
534 start_init_exec = 1;
535 wakeup((void *)&start_init_exec);
536
537 /*
538 * Start the idle pool page garbage collector
539 */
540 #if !(defined(__m88k__) && defined(MULTIPROCESSOR)) /* XXX */
541 pool_gc_pages(NULL);
542 #endif
543
544 start_periodic_resettodr();
545
546 /*
547 * proc0: nothing to do, back to sleep
548 */
549 while (1)
550 tsleep_nsec(&proc0, PVM, "scheduler", INFSLP);
551 /* NOTREACHED */
552 }
553
554 /*
555 * List of paths to try when searching for "init".
556 */
557 static char *initpaths[] = {
558 "/sbin/init",
559 "/sbin/oinit",
560 "/sbin/init.bak",
561 NULL,
562 };
563
564 void
565 check_console(struct proc *p)
566 {
567 struct nameidata nd;
568 int error;
569
570 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", p);
571 error = namei(&nd);
572 if (error) {
573 if (error == ENOENT)
574 printf("warning: /dev/console does not exist\n");
575 else
576 printf("warning: /dev/console error %d\n", error);
577 } else
578 vrele(nd.ni_vp);
579 }
580
581 /*
582 * Start the initial user process; try exec'ing each pathname in "initpaths".
583 * The program is invoked with one argument containing the boot flags.
584 */
585 void
586 start_init(void *arg)
587 {
588 struct proc *p = arg;
589 vaddr_t addr;
590 struct sys_execve_args /* {
591 syscallarg(const char *) path;
592 syscallarg(char *const *) argp;
593 syscallarg(char *const *) envp;
594 } */ args;
595 int options, error;
596 long i;
597 register_t retval[2];
598 char flags[4], *flagsp;
599 char **pathp, *path, *ucp, **uap, *arg0, *arg1 = NULL;
600
601 /*
602 * Now in process 1.
603 */
604
605 /*
606 * Wait for main() to tell us that it's safe to exec.
607 */
608 while (start_init_exec == 0)
609 tsleep_nsec(&start_init_exec, PWAIT, "initexec", INFSLP);
610
611 check_console(p);
612
613 /* process 0 ignores SIGCHLD, but we can't */
614 p->p_p->ps_sigacts->ps_sigflags = 0;
615
616 /*
617 * Need just enough stack to hold the faked-up "execve()" arguments.
618 */
619 #ifdef MACHINE_STACK_GROWS_UP
620 addr = USRSTACK;
621 #else
622 addr = USRSTACK - PAGE_SIZE;
623 #endif
624 p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
625 p->p_vmspace->vm_minsaddr = (caddr_t)(addr + PAGE_SIZE);
626 if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
627 NULL, UVM_UNKNOWN_OFFSET, 0,
628 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_MASK, MAP_INHERIT_COPY,
629 MADV_NORMAL,
630 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW|UVM_FLAG_STACK|UVM_FLAG_SYSCALL)))
631 panic("init: couldn't allocate argument space");
632
633 for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
634 #ifdef MACHINE_STACK_GROWS_UP
635 ucp = (char *)addr;
636 #else
637 ucp = (char *)(addr + PAGE_SIZE);
638 #endif
639 /*
640 * Construct the boot flag argument.
641 */
642 flagsp = flags;
643 *flagsp++ = '-';
644 options = 0;
645
646 if (boothowto & RB_SINGLE) {
647 *flagsp++ = 's';
648 options = 1;
649 }
650 #ifdef notyet
651 if (boothowto & RB_FASTBOOT) {
652 *flagsp++ = 'f';
653 options = 1;
654 }
655 #endif
656
657 /*
658 * Move out the flags (arg 1), if necessary.
659 */
660 if (options != 0) {
661 *flagsp++ = '\0';
662 i = flagsp - flags;
663 #ifdef DEBUG
664 printf("init: copying out flags `%s' %ld\n", flags, i);
665 #endif
666 #ifdef MACHINE_STACK_GROWS_UP
667 arg1 = ucp;
668 (void)copyout((caddr_t)flags, (caddr_t)ucp, i);
669 ucp += i;
670 #else
671 (void)copyout((caddr_t)flags, (caddr_t)(ucp -= i), i);
672 arg1 = ucp;
673 #endif
674 }
675
676 /*
677 * Move out the file name (also arg 0).
678 */
679 i = strlen(path) + 1;
680 #ifdef DEBUG
681 printf("init: copying out path `%s' %ld\n", path, i);
682 #endif
683 #ifdef MACHINE_STACK_GROWS_UP
684 arg0 = ucp;
685 (void)copyout((caddr_t)path, (caddr_t)ucp, i);
686 ucp += i;
687 ucp = (caddr_t)ALIGN((u_long)ucp);
688 uap = (char **)ucp + 3;
689 #else
690 (void)copyout((caddr_t)path, (caddr_t)(ucp -= i), i);
691 arg0 = ucp;
692 uap = (char **)((u_long)ucp & ~ALIGNBYTES);
693 #endif
694
695 /*
696 * Move out the arg pointers.
697 */
698 i = 0;
699 copyout(&i, (caddr_t)--uap, sizeof(register_t)); /* terminator */
700 if (options != 0)
701 copyout(&arg1, (caddr_t)--uap, sizeof(register_t));
702 copyout(&arg0, (caddr_t)--uap, sizeof(register_t));
703
704 /*
705 * Point at the arguments.
706 */
707 SCARG(&args, path) = arg0;
708 SCARG(&args, argp) = uap;
709 SCARG(&args, envp) = NULL;
710
711 /*
712 * Now try to exec the program. If can't for any reason
713 * other than it doesn't exist, complain.
714 */
715 if ((error = sys_execve(p, &args, retval)) == EJUSTRETURN) {
716 KERNEL_UNLOCK();
717 return;
718 }
719 if (error != ENOENT)
720 printf("exec %s: error %d\n", path, error);
721 }
722 printf("init: not found\n");
723 panic("no init");
724 }
Cache object: 7278115705c012e1ae77329cd4123750
|