The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_shutdown.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1986, 1988, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)kern_shutdown.c     8.3 (Berkeley) 1/21/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_ddb.h"
   43 #include "opt_ekcd.h"
   44 #include "opt_kdb.h"
   45 #include "opt_panic.h"
   46 #include "opt_printf.h"
   47 #include "opt_sched.h"
   48 #include "opt_watchdog.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/bio.h>
   53 #include <sys/buf.h>
   54 #include <sys/conf.h>
   55 #include <sys/compressor.h>
   56 #include <sys/cons.h>
   57 #include <sys/disk.h>
   58 #include <sys/eventhandler.h>
   59 #include <sys/filedesc.h>
   60 #include <sys/jail.h>
   61 #include <sys/kdb.h>
   62 #include <sys/kernel.h>
   63 #include <sys/kerneldump.h>
   64 #include <sys/kthread.h>
   65 #include <sys/ktr.h>
   66 #include <sys/malloc.h>
   67 #include <sys/mbuf.h>
   68 #include <sys/mount.h>
   69 #include <sys/priv.h>
   70 #include <sys/proc.h>
   71 #include <sys/reboot.h>
   72 #include <sys/resourcevar.h>
   73 #include <sys/rwlock.h>
   74 #include <sys/sbuf.h>
   75 #include <sys/sched.h>
   76 #include <sys/smp.h>
   77 #include <sys/sysctl.h>
   78 #include <sys/sysproto.h>
   79 #include <sys/taskqueue.h>
   80 #include <sys/vnode.h>
   81 #include <sys/watchdog.h>
   82 
   83 #include <crypto/chacha20/chacha.h>
   84 #include <crypto/rijndael/rijndael-api-fst.h>
   85 #include <crypto/sha2/sha256.h>
   86 
   87 #include <ddb/ddb.h>
   88 
   89 #include <machine/cpu.h>
   90 #include <machine/dump.h>
   91 #include <machine/pcb.h>
   92 #include <machine/smp.h>
   93 
   94 #include <security/mac/mac_framework.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_object.h>
   98 #include <vm/vm_page.h>
   99 #include <vm/vm_pager.h>
  100 #include <vm/swap_pager.h>
  101 
  102 #include <sys/signalvar.h>
  103 
  104 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer");
  105 
  106 #ifndef PANIC_REBOOT_WAIT_TIME
  107 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
  108 #endif
  109 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME;
  110 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN,
  111     &panic_reboot_wait_time, 0,
  112     "Seconds to wait before rebooting after a panic");
  113 
  114 /*
  115  * Note that stdarg.h and the ANSI style va_start macro is used for both
  116  * ANSI and traditional C compilers.
  117  */
  118 #include <machine/stdarg.h>
  119 
  120 #ifdef KDB
  121 #ifdef KDB_UNATTENDED
  122 int debugger_on_panic = 0;
  123 #else
  124 int debugger_on_panic = 1;
  125 #endif
  126 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic,
  127     CTLFLAG_RWTUN | CTLFLAG_SECURE,
  128     &debugger_on_panic, 0, "Run debugger on kernel panic");
  129 
  130 static bool debugger_on_recursive_panic = false;
  131 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic,
  132     CTLFLAG_RWTUN | CTLFLAG_SECURE,
  133     &debugger_on_recursive_panic, 0, "Run debugger on recursive kernel panic");
  134 
  135 int debugger_on_trap = 0;
  136 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap,
  137     CTLFLAG_RWTUN | CTLFLAG_SECURE,
  138     &debugger_on_trap, 0, "Run debugger on kernel trap before panic");
  139 
  140 #ifdef KDB_TRACE
  141 static int trace_on_panic = 1;
  142 static bool trace_all_panics = true;
  143 #else
  144 static int trace_on_panic = 0;
  145 static bool trace_all_panics = false;
  146 #endif
  147 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic,
  148     CTLFLAG_RWTUN | CTLFLAG_SECURE,
  149     &trace_on_panic, 0, "Print stack trace on kernel panic");
  150 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN,
  151     &trace_all_panics, 0, "Print stack traces on secondary kernel panics");
  152 #endif /* KDB */
  153 
  154 static int sync_on_panic = 0;
  155 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN,
  156         &sync_on_panic, 0, "Do a sync before rebooting from a panic");
  157 
  158 static bool poweroff_on_panic = 0;
  159 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN,
  160         &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic");
  161 
  162 static bool powercycle_on_panic = 0;
  163 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN,
  164         &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic");
  165 
  166 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  167     "Shutdown environment");
  168 
  169 #ifndef DIAGNOSTIC
  170 static int show_busybufs;
  171 #else
  172 static int show_busybufs = 1;
  173 #endif
  174 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW,
  175     &show_busybufs, 0,
  176     "Show busy buffers during shutdown");
  177 
  178 int suspend_blocked = 0;
  179 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW,
  180         &suspend_blocked, 0, "Block suspend due to a pending shutdown");
  181 
  182 #ifdef EKCD
  183 FEATURE(ekcd, "Encrypted kernel crash dumps support");
  184 
  185 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data");
  186 
  187 struct kerneldumpcrypto {
  188         uint8_t                 kdc_encryption;
  189         uint8_t                 kdc_iv[KERNELDUMP_IV_MAX_SIZE];
  190         union {
  191                 struct {
  192                         keyInstance     aes_ki;
  193                         cipherInstance  aes_ci;
  194                 } u_aes;
  195                 struct chacha_ctx       u_chacha;
  196         } u;
  197 #define kdc_ki  u.u_aes.aes_ki
  198 #define kdc_ci  u.u_aes.aes_ci
  199 #define kdc_chacha      u.u_chacha
  200         uint32_t                kdc_dumpkeysize;
  201         struct kerneldumpkey    kdc_dumpkey[];
  202 };
  203 #endif
  204 
  205 struct kerneldumpcomp {
  206         uint8_t                 kdc_format;
  207         struct compressor       *kdc_stream;
  208         uint8_t                 *kdc_buf;
  209         size_t                  kdc_resid;
  210 };
  211 
  212 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di,
  213                     uint8_t compression);
  214 static void     kerneldumpcomp_destroy(struct dumperinfo *di);
  215 static int      kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg);
  216 
  217 static int kerneldump_gzlevel = 6;
  218 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN,
  219     &kerneldump_gzlevel, 0,
  220     "Kernel crash dump compression level");
  221 
  222 /*
  223  * Variable panicstr contains argument to first call to panic; used as flag
  224  * to indicate that the kernel has already called panic.
  225  */
  226 const char *panicstr;
  227 bool __read_frequently panicked;
  228 
  229 int __read_mostly dumping;              /* system is dumping */
  230 int rebooting;                          /* system is rebooting */
  231 /*
  232  * Used to serialize between sysctl kern.shutdown.dumpdevname and list
  233  * modifications via ioctl.
  234  */
  235 static struct mtx dumpconf_list_lk;
  236 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF);
  237 
  238 /* Our selected dumper(s). */
  239 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs =
  240     TAILQ_HEAD_INITIALIZER(dumper_configs);
  241 
  242 /* Context information for dump-debuggers. */
  243 static struct pcb dumppcb;              /* Registers. */
  244 lwpid_t dumptid;                        /* Thread ID. */
  245 
  246 static struct cdevsw reroot_cdevsw = {
  247      .d_version = D_VERSION,
  248      .d_name    = "reroot",
  249 };
  250 
  251 static void poweroff_wait(void *, int);
  252 static void shutdown_halt(void *junk, int howto);
  253 static void shutdown_panic(void *junk, int howto);
  254 static void shutdown_reset(void *junk, int howto);
  255 static int kern_reroot(void);
  256 
  257 /* register various local shutdown events */
  258 static void
  259 shutdown_conf(void *unused)
  260 {
  261 
  262         EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
  263             SHUTDOWN_PRI_FIRST);
  264         EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
  265             SHUTDOWN_PRI_LAST + 100);
  266         EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
  267             SHUTDOWN_PRI_LAST + 100);
  268         EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL,
  269             SHUTDOWN_PRI_LAST + 200);
  270 }
  271 
  272 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL);
  273 
  274 /*
  275  * The only reason this exists is to create the /dev/reroot/ directory,
  276  * used by reroot code in init(8) as a mountpoint for tmpfs.
  277  */
  278 static void
  279 reroot_conf(void *unused)
  280 {
  281         int error;
  282         struct cdev *cdev;
  283 
  284         error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev,
  285             &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot");
  286         if (error != 0) {
  287                 printf("%s: failed to create device node, error %d",
  288                     __func__, error);
  289         }
  290 }
  291 
  292 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL);
  293 
  294 /*
  295  * The system call that results in a reboot.
  296  */
  297 /* ARGSUSED */
  298 int
  299 sys_reboot(struct thread *td, struct reboot_args *uap)
  300 {
  301         int error;
  302 
  303         error = 0;
  304 #ifdef MAC
  305         error = mac_system_check_reboot(td->td_ucred, uap->opt);
  306 #endif
  307         if (error == 0)
  308                 error = priv_check(td, PRIV_REBOOT);
  309         if (error == 0) {
  310                 if (uap->opt & RB_REROOT)
  311                         error = kern_reroot();
  312                 else
  313                         kern_reboot(uap->opt);
  314         }
  315         return (error);
  316 }
  317 
  318 static void
  319 shutdown_nice_task_fn(void *arg, int pending __unused)
  320 {
  321         int howto;
  322 
  323         howto = (uintptr_t)arg;
  324         /* Send a signal to init(8) and have it shutdown the world. */
  325         PROC_LOCK(initproc);
  326         if (howto & RB_POWEROFF)
  327                 kern_psignal(initproc, SIGUSR2);
  328         else if (howto & RB_POWERCYCLE)
  329                 kern_psignal(initproc, SIGWINCH);
  330         else if (howto & RB_HALT)
  331                 kern_psignal(initproc, SIGUSR1);
  332         else
  333                 kern_psignal(initproc, SIGINT);
  334         PROC_UNLOCK(initproc);
  335 }
  336 
  337 static struct task shutdown_nice_task = TASK_INITIALIZER(0,
  338     &shutdown_nice_task_fn, NULL);
  339 
  340 /*
  341  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
  342  */
  343 void
  344 shutdown_nice(int howto)
  345 {
  346 
  347         if (initproc != NULL && !SCHEDULER_STOPPED()) {
  348                 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto;
  349                 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task);
  350         } else {
  351                 /*
  352                  * No init(8) running, or scheduler would not allow it
  353                  * to run, so simply reboot.
  354                  */
  355                 kern_reboot(howto | RB_NOSYNC);
  356         }
  357 }
  358 
  359 static void
  360 print_uptime(void)
  361 {
  362         int f;
  363         struct timespec ts;
  364 
  365         getnanouptime(&ts);
  366         printf("Uptime: ");
  367         f = 0;
  368         if (ts.tv_sec >= 86400) {
  369                 printf("%ldd", (long)ts.tv_sec / 86400);
  370                 ts.tv_sec %= 86400;
  371                 f = 1;
  372         }
  373         if (f || ts.tv_sec >= 3600) {
  374                 printf("%ldh", (long)ts.tv_sec / 3600);
  375                 ts.tv_sec %= 3600;
  376                 f = 1;
  377         }
  378         if (f || ts.tv_sec >= 60) {
  379                 printf("%ldm", (long)ts.tv_sec / 60);
  380                 ts.tv_sec %= 60;
  381                 f = 1;
  382         }
  383         printf("%lds\n", (long)ts.tv_sec);
  384 }
  385 
  386 int
  387 doadump(boolean_t textdump)
  388 {
  389         boolean_t coredump;
  390         int error;
  391 
  392         error = 0;
  393         if (dumping)
  394                 return (EBUSY);
  395         if (TAILQ_EMPTY(&dumper_configs))
  396                 return (ENXIO);
  397 
  398         savectx(&dumppcb);
  399         dumptid = curthread->td_tid;
  400         dumping++;
  401 
  402         coredump = TRUE;
  403 #ifdef DDB
  404         if (textdump && textdump_pending) {
  405                 coredump = FALSE;
  406                 textdump_dumpsys(TAILQ_FIRST(&dumper_configs));
  407         }
  408 #endif
  409         if (coredump) {
  410                 struct dumperinfo *di;
  411 
  412                 TAILQ_FOREACH(di, &dumper_configs, di_next) {
  413                         error = dumpsys(di);
  414                         if (error == 0)
  415                                 break;
  416                 }
  417         }
  418 
  419         dumping--;
  420         return (error);
  421 }
  422 
  423 /*
  424  * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or
  425  * power off.
  426  */
  427 void
  428 kern_reboot(int howto)
  429 {
  430         static int once = 0;
  431 
  432         /*
  433          * Normal paths here don't hold Giant, but we can wind up here
  434          * unexpectedly with it held.  Drop it now so we don't have to
  435          * drop and pick it up elsewhere. The paths it is locking will
  436          * never be returned to, and it is preferable to preclude
  437          * deadlock than to lock against code that won't ever
  438          * continue.
  439          */
  440         while (mtx_owned(&Giant))
  441                 mtx_unlock(&Giant);
  442 
  443 #if defined(SMP)
  444         /*
  445          * Bind us to the first CPU so that all shutdown code runs there.  Some
  446          * systems don't shutdown properly (i.e., ACPI power off) if we
  447          * run on another processor.
  448          */
  449         if (!SCHEDULER_STOPPED()) {
  450                 thread_lock(curthread);
  451                 sched_bind(curthread, CPU_FIRST());
  452                 thread_unlock(curthread);
  453                 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(),
  454                     ("%s: not running on cpu 0", __func__));
  455         }
  456 #endif
  457         /* We're in the process of rebooting. */
  458         rebooting = 1;
  459 
  460         /* We are out of the debugger now. */
  461         kdb_active = 0;
  462 
  463         /*
  464          * Do any callouts that should be done BEFORE syncing the filesystems.
  465          */
  466         EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
  467 
  468         /* 
  469          * Now sync filesystems
  470          */
  471         if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) {
  472                 once = 1;
  473                 bufshutdown(show_busybufs);
  474         }
  475 
  476         print_uptime();
  477 
  478         cngrab();
  479 
  480         /*
  481          * Ok, now do things that assume all filesystem activity has
  482          * been completed.
  483          */
  484         EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
  485 
  486         if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 
  487                 doadump(TRUE);
  488 
  489         /* Now that we're going to really halt the system... */
  490         EVENTHANDLER_INVOKE(shutdown_final, howto);
  491 
  492         for(;;) ;       /* safety against shutdown_reset not working */
  493         /* NOTREACHED */
  494 }
  495 
  496 /*
  497  * The system call that results in changing the rootfs.
  498  */
  499 static int
  500 kern_reroot(void)
  501 {
  502         struct vnode *oldrootvnode, *vp;
  503         struct mount *mp, *devmp;
  504         int error;
  505 
  506         if (curproc != initproc)
  507                 return (EPERM);
  508 
  509         /*
  510          * Mark the filesystem containing currently-running executable
  511          * (the temporary copy of init(8)) busy.
  512          */
  513         vp = curproc->p_textvp;
  514         error = vn_lock(vp, LK_SHARED);
  515         if (error != 0)
  516                 return (error);
  517         mp = vp->v_mount;
  518         error = vfs_busy(mp, MBF_NOWAIT);
  519         if (error != 0) {
  520                 vfs_ref(mp);
  521                 VOP_UNLOCK(vp);
  522                 error = vfs_busy(mp, 0);
  523                 vn_lock(vp, LK_SHARED | LK_RETRY);
  524                 vfs_rel(mp);
  525                 if (error != 0) {
  526                         VOP_UNLOCK(vp);
  527                         return (ENOENT);
  528                 }
  529                 if (VN_IS_DOOMED(vp)) {
  530                         VOP_UNLOCK(vp);
  531                         vfs_unbusy(mp);
  532                         return (ENOENT);
  533                 }
  534         }
  535         VOP_UNLOCK(vp);
  536 
  537         /*
  538          * Remove the filesystem containing currently-running executable
  539          * from the mount list, to prevent it from being unmounted
  540          * by vfs_unmountall(), and to avoid confusing vfs_mountroot().
  541          *
  542          * Also preserve /dev - forcibly unmounting it could cause driver
  543          * reinitialization.
  544          */
  545 
  546         vfs_ref(rootdevmp);
  547         devmp = rootdevmp;
  548         rootdevmp = NULL;
  549 
  550         mtx_lock(&mountlist_mtx);
  551         TAILQ_REMOVE(&mountlist, mp, mnt_list);
  552         TAILQ_REMOVE(&mountlist, devmp, mnt_list);
  553         mtx_unlock(&mountlist_mtx);
  554 
  555         oldrootvnode = rootvnode;
  556 
  557         /*
  558          * Unmount everything except for the two filesystems preserved above.
  559          */
  560         vfs_unmountall();
  561 
  562         /*
  563          * Add /dev back; vfs_mountroot() will move it into its new place.
  564          */
  565         mtx_lock(&mountlist_mtx);
  566         TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list);
  567         mtx_unlock(&mountlist_mtx);
  568         rootdevmp = devmp;
  569         vfs_rel(rootdevmp);
  570 
  571         /*
  572          * Mount the new rootfs.
  573          */
  574         vfs_mountroot();
  575 
  576         /*
  577          * Update all references to the old rootvnode.
  578          */
  579         mountcheckdirs(oldrootvnode, rootvnode);
  580 
  581         /*
  582          * Add the temporary filesystem back and unbusy it.
  583          */
  584         mtx_lock(&mountlist_mtx);
  585         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  586         mtx_unlock(&mountlist_mtx);
  587         vfs_unbusy(mp);
  588 
  589         return (0);
  590 }
  591 
  592 /*
  593  * If the shutdown was a clean halt, behave accordingly.
  594  */
  595 static void
  596 shutdown_halt(void *junk, int howto)
  597 {
  598 
  599         if (howto & RB_HALT) {
  600                 printf("\n");
  601                 printf("The operating system has halted.\n");
  602                 printf("Please press any key to reboot.\n\n");
  603 
  604                 wdog_kern_pat(WD_TO_NEVER);
  605 
  606                 switch (cngetc()) {
  607                 case -1:                /* No console, just die */
  608                         cpu_halt();
  609                         /* NOTREACHED */
  610                 default:
  611                         break;
  612                 }
  613         }
  614 }
  615 
  616 /*
  617  * Check to see if the system paniced, pause and then reboot
  618  * according to the specified delay.
  619  */
  620 static void
  621 shutdown_panic(void *junk, int howto)
  622 {
  623         int loop;
  624 
  625         if (howto & RB_DUMP) {
  626                 if (panic_reboot_wait_time != 0) {
  627                         if (panic_reboot_wait_time != -1) {
  628                                 printf("Automatic reboot in %d seconds - "
  629                                        "press a key on the console to abort\n",
  630                                         panic_reboot_wait_time);
  631                                 for (loop = panic_reboot_wait_time * 10;
  632                                      loop > 0; --loop) {
  633                                         DELAY(1000 * 100); /* 1/10th second */
  634                                         /* Did user type a key? */
  635                                         if (cncheckc() != -1)
  636                                                 break;
  637                                 }
  638                                 if (!loop)
  639                                         return;
  640                         }
  641                 } else { /* zero time specified - reboot NOW */
  642                         return;
  643                 }
  644                 printf("--> Press a key on the console to reboot,\n");
  645                 printf("--> or switch off the system now.\n");
  646                 cngetc();
  647         }
  648 }
  649 
  650 /*
  651  * Everything done, now reset
  652  */
  653 static void
  654 shutdown_reset(void *junk, int howto)
  655 {
  656 
  657         printf("Rebooting...\n");
  658         DELAY(1000000); /* wait 1 sec for printf's to complete and be read */
  659 
  660         /*
  661          * Acquiring smp_ipi_mtx here has a double effect:
  662          * - it disables interrupts avoiding CPU0 preemption
  663          *   by fast handlers (thus deadlocking  against other CPUs)
  664          * - it avoids deadlocks against smp_rendezvous() or, more 
  665          *   generally, threads busy-waiting, with this spinlock held,
  666          *   and waiting for responses by threads on other CPUs
  667          *   (ie. smp_tlb_shootdown()).
  668          *
  669          * For the !SMP case it just needs to handle the former problem.
  670          */
  671 #ifdef SMP
  672         mtx_lock_spin(&smp_ipi_mtx);
  673 #else
  674         spinlock_enter();
  675 #endif
  676 
  677         cpu_reset();
  678         /* NOTREACHED */ /* assuming reset worked */
  679 }
  680 
  681 #if defined(WITNESS) || defined(INVARIANT_SUPPORT)
  682 static int kassert_warn_only = 0;
  683 #ifdef KDB
  684 static int kassert_do_kdb = 0;
  685 #endif
  686 #ifdef KTR
  687 static int kassert_do_ktr = 0;
  688 #endif
  689 static int kassert_do_log = 1;
  690 static int kassert_log_pps_limit = 4;
  691 static int kassert_log_mute_at = 0;
  692 static int kassert_log_panic_at = 0;
  693 static int kassert_suppress_in_panic = 0;
  694 static int kassert_warnings = 0;
  695 
  696 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
  697     "kassert options");
  698 
  699 #ifdef KASSERT_PANIC_OPTIONAL
  700 #define KASSERT_RWTUN   CTLFLAG_RWTUN
  701 #else
  702 #define KASSERT_RWTUN   CTLFLAG_RDTUN
  703 #endif
  704 
  705 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN,
  706     &kassert_warn_only, 0,
  707     "KASSERT triggers a panic (0) or just a warning (1)");
  708 
  709 #ifdef KDB
  710 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN,
  711     &kassert_do_kdb, 0, "KASSERT will enter the debugger");
  712 #endif
  713 
  714 #ifdef KTR
  715 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN,
  716     &kassert_do_ktr, 0,
  717     "KASSERT does a KTR, set this to the KTRMASK you want");
  718 #endif
  719 
  720 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN,
  721     &kassert_do_log, 0,
  722     "If warn_only is enabled, log (1) or do not log (0) assertion violations");
  723 
  724 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS,
  725     &kassert_warnings, 0, "number of KASSERTs that have been triggered");
  726 
  727 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN,
  728     &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic");
  729 
  730 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN,
  731     &kassert_log_pps_limit, 0, "limit number of log messages per second");
  732 
  733 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN,
  734     &kassert_log_mute_at, 0, "max number of KASSERTS to log");
  735 
  736 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN,
  737     &kassert_suppress_in_panic, 0,
  738     "KASSERTs will be suppressed while handling a panic");
  739 #undef KASSERT_RWTUN
  740 
  741 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS);
  742 
  743 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert,
  744     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0,
  745     kassert_sysctl_kassert, "I",
  746     "set to trigger a test kassert");
  747 
  748 static int
  749 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)
  750 {
  751         int error, i;
  752 
  753         error = sysctl_wire_old_buffer(req, sizeof(int));
  754         if (error == 0) {
  755                 i = 0;
  756                 error = sysctl_handle_int(oidp, &i, 0, req);
  757         }
  758         if (error != 0 || req->newptr == NULL)
  759                 return (error);
  760         KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i));
  761         return (0);
  762 }
  763 
  764 #ifdef KASSERT_PANIC_OPTIONAL
  765 /*
  766  * Called by KASSERT, this decides if we will panic
  767  * or if we will log via printf and/or ktr.
  768  */
  769 void
  770 kassert_panic(const char *fmt, ...)
  771 {
  772         static char buf[256];
  773         va_list ap;
  774 
  775         va_start(ap, fmt);
  776         (void)vsnprintf(buf, sizeof(buf), fmt, ap);
  777         va_end(ap);
  778 
  779         /*
  780          * If we are suppressing secondary panics, log the warning but do not
  781          * re-enter panic/kdb.
  782          */
  783         if (panicstr != NULL && kassert_suppress_in_panic) {
  784                 if (kassert_do_log) {
  785                         printf("KASSERT failed: %s\n", buf);
  786 #ifdef KDB
  787                         if (trace_all_panics && trace_on_panic)
  788                                 kdb_backtrace();
  789 #endif
  790                 }
  791                 return;
  792         }
  793 
  794         /*
  795          * panic if we're not just warning, or if we've exceeded
  796          * kassert_log_panic_at warnings.
  797          */
  798         if (!kassert_warn_only ||
  799             (kassert_log_panic_at > 0 &&
  800              kassert_warnings >= kassert_log_panic_at)) {
  801                 va_start(ap, fmt);
  802                 vpanic(fmt, ap);
  803                 /* NORETURN */
  804         }
  805 #ifdef KTR
  806         if (kassert_do_ktr)
  807                 CTR0(ktr_mask, buf);
  808 #endif /* KTR */
  809         /*
  810          * log if we've not yet met the mute limit.
  811          */
  812         if (kassert_do_log &&
  813             (kassert_log_mute_at == 0 ||
  814              kassert_warnings < kassert_log_mute_at)) {
  815                 static  struct timeval lasterr;
  816                 static  int curerr;
  817 
  818                 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) {
  819                         printf("KASSERT failed: %s\n", buf);
  820                         kdb_backtrace();
  821                 }
  822         }
  823 #ifdef KDB
  824         if (kassert_do_kdb) {
  825                 kdb_enter(KDB_WHY_KASSERT, buf);
  826         }
  827 #endif
  828         atomic_add_int(&kassert_warnings, 1);
  829 }
  830 #endif /* KASSERT_PANIC_OPTIONAL */
  831 #endif
  832 
  833 /*
  834  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
  835  * and then reboots.  If we are called twice, then we avoid trying to sync
  836  * the disks as this often leads to recursive panics.
  837  */
  838 void
  839 panic(const char *fmt, ...)
  840 {
  841         va_list ap;
  842 
  843         va_start(ap, fmt);
  844         vpanic(fmt, ap);
  845 }
  846 
  847 void
  848 vpanic(const char *fmt, va_list ap)
  849 {
  850 #ifdef SMP
  851         cpuset_t other_cpus;
  852 #endif
  853         struct thread *td = curthread;
  854         int bootopt, newpanic;
  855         static char buf[256];
  856 
  857         spinlock_enter();
  858 
  859 #ifdef SMP
  860         /*
  861          * stop_cpus_hard(other_cpus) should prevent multiple CPUs from
  862          * concurrently entering panic.  Only the winner will proceed
  863          * further.
  864          */
  865         if (panicstr == NULL && !kdb_active) {
  866                 other_cpus = all_cpus;
  867                 CPU_CLR(PCPU_GET(cpuid), &other_cpus);
  868                 stop_cpus_hard(other_cpus);
  869         }
  870 #endif
  871 
  872         /*
  873          * Ensure that the scheduler is stopped while panicking, even if panic
  874          * has been entered from kdb.
  875          */
  876         td->td_stopsched = 1;
  877 
  878         bootopt = RB_AUTOBOOT;
  879         newpanic = 0;
  880         if (panicstr)
  881                 bootopt |= RB_NOSYNC;
  882         else {
  883                 bootopt |= RB_DUMP;
  884                 panicstr = fmt;
  885                 panicked = true;
  886                 newpanic = 1;
  887         }
  888 
  889         if (newpanic) {
  890                 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
  891                 panicstr = buf;
  892                 cngrab();
  893                 printf("panic: %s\n", buf);
  894         } else {
  895                 printf("panic: ");
  896                 vprintf(fmt, ap);
  897                 printf("\n");
  898         }
  899 #ifdef SMP
  900         printf("cpuid = %d\n", PCPU_GET(cpuid));
  901 #endif
  902         printf("time = %jd\n", (intmax_t )time_second);
  903 #ifdef KDB
  904         if ((newpanic || trace_all_panics) && trace_on_panic)
  905                 kdb_backtrace();
  906         if (debugger_on_panic)
  907                 kdb_enter(KDB_WHY_PANIC, "panic");
  908         else if (!newpanic && debugger_on_recursive_panic)
  909                 kdb_enter(KDB_WHY_PANIC, "re-panic");
  910 #endif
  911         /*thread_lock(td); */
  912         td->td_flags |= TDF_INPANIC;
  913         /* thread_unlock(td); */
  914         if (!sync_on_panic)
  915                 bootopt |= RB_NOSYNC;
  916         if (poweroff_on_panic)
  917                 bootopt |= RB_POWEROFF;
  918         if (powercycle_on_panic)
  919                 bootopt |= RB_POWERCYCLE;
  920         kern_reboot(bootopt);
  921 }
  922 
  923 /*
  924  * Support for poweroff delay.
  925  *
  926  * Please note that setting this delay too short might power off your machine
  927  * before the write cache on your hard disk has been flushed, leading to
  928  * soft-updates inconsistencies.
  929  */
  930 #ifndef POWEROFF_DELAY
  931 # define POWEROFF_DELAY 5000
  932 #endif
  933 static int poweroff_delay = POWEROFF_DELAY;
  934 
  935 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
  936     &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)");
  937 
  938 static void
  939 poweroff_wait(void *junk, int howto)
  940 {
  941 
  942         if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0)
  943                 return;
  944         DELAY(poweroff_delay * 1000);
  945 }
  946 
  947 /*
  948  * Some system processes (e.g. syncer) need to be stopped at appropriate
  949  * points in their main loops prior to a system shutdown, so that they
  950  * won't interfere with the shutdown process (e.g. by holding a disk buf
  951  * to cause sync to fail).  For each of these system processes, register
  952  * shutdown_kproc() as a handler for one of shutdown events.
  953  */
  954 static int kproc_shutdown_wait = 60;
  955 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
  956     &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process");
  957 
  958 void
  959 kproc_shutdown(void *arg, int howto)
  960 {
  961         struct proc *p;
  962         int error;
  963 
  964         if (panicstr)
  965                 return;
  966 
  967         p = (struct proc *)arg;
  968         printf("Waiting (max %d seconds) for system process `%s' to stop... ",
  969             kproc_shutdown_wait, p->p_comm);
  970         error = kproc_suspend(p, kproc_shutdown_wait * hz);
  971 
  972         if (error == EWOULDBLOCK)
  973                 printf("timed out\n");
  974         else
  975                 printf("done\n");
  976 }
  977 
  978 void
  979 kthread_shutdown(void *arg, int howto)
  980 {
  981         struct thread *td;
  982         int error;
  983 
  984         if (panicstr)
  985                 return;
  986 
  987         td = (struct thread *)arg;
  988         printf("Waiting (max %d seconds) for system thread `%s' to stop... ",
  989             kproc_shutdown_wait, td->td_name);
  990         error = kthread_suspend(td, kproc_shutdown_wait * hz);
  991 
  992         if (error == EWOULDBLOCK)
  993                 printf("timed out\n");
  994         else
  995                 printf("done\n");
  996 }
  997 
  998 static int
  999 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1000 {
 1001         char buf[256];
 1002         struct dumperinfo *di;
 1003         struct sbuf sb;
 1004         int error;
 1005 
 1006         error = sysctl_wire_old_buffer(req, 0);
 1007         if (error != 0)
 1008                 return (error);
 1009 
 1010         sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req);
 1011 
 1012         mtx_lock(&dumpconf_list_lk);
 1013         TAILQ_FOREACH(di, &dumper_configs, di_next) {
 1014                 if (di != TAILQ_FIRST(&dumper_configs))
 1015                         sbuf_putc(&sb, ',');
 1016                 sbuf_cat(&sb, di->di_devname);
 1017         }
 1018         mtx_unlock(&dumpconf_list_lk);
 1019 
 1020         error = sbuf_finish(&sb);
 1021         sbuf_delete(&sb);
 1022         return (error);
 1023 }
 1024 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname,
 1025     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0,
 1026     dumpdevname_sysctl_handler, "A",
 1027     "Device(s) for kernel dumps");
 1028 
 1029 static int      _dump_append(struct dumperinfo *di, void *virtual,
 1030                     vm_offset_t physical, size_t length);
 1031 
 1032 #ifdef EKCD
 1033 static struct kerneldumpcrypto *
 1034 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption,
 1035     const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey)
 1036 {
 1037         struct kerneldumpcrypto *kdc;
 1038         struct kerneldumpkey *kdk;
 1039         uint32_t dumpkeysize;
 1040 
 1041         dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize);
 1042         kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO);
 1043 
 1044         arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0);
 1045 
 1046         kdc->kdc_encryption = encryption;
 1047         switch (kdc->kdc_encryption) {
 1048         case KERNELDUMP_ENC_AES_256_CBC:
 1049                 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0)
 1050                         goto failed;
 1051                 break;
 1052         case KERNELDUMP_ENC_CHACHA20:
 1053                 chacha_keysetup(&kdc->kdc_chacha, key, 256);
 1054                 break;
 1055         default:
 1056                 goto failed;
 1057         }
 1058 
 1059         kdc->kdc_dumpkeysize = dumpkeysize;
 1060         kdk = kdc->kdc_dumpkey;
 1061         kdk->kdk_encryption = kdc->kdc_encryption;
 1062         memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv));
 1063         kdk->kdk_encryptedkeysize = htod32(encryptedkeysize);
 1064         memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize);
 1065 
 1066         return (kdc);
 1067 failed:
 1068         zfree(kdc, M_EKCD);
 1069         return (NULL);
 1070 }
 1071 
 1072 static int
 1073 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc)
 1074 {
 1075         uint8_t hash[SHA256_DIGEST_LENGTH];
 1076         SHA256_CTX ctx;
 1077         struct kerneldumpkey *kdk;
 1078         int error;
 1079 
 1080         error = 0;
 1081 
 1082         if (kdc == NULL)
 1083                 return (0);
 1084 
 1085         /*
 1086          * When a user enters ddb it can write a crash dump multiple times.
 1087          * Each time it should be encrypted using a different IV.
 1088          */
 1089         SHA256_Init(&ctx);
 1090         SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv));
 1091         SHA256_Final(hash, &ctx);
 1092         bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv));
 1093 
 1094         switch (kdc->kdc_encryption) {
 1095         case KERNELDUMP_ENC_AES_256_CBC:
 1096                 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC,
 1097                     kdc->kdc_iv) <= 0) {
 1098                         error = EINVAL;
 1099                         goto out;
 1100                 }
 1101                 break;
 1102         case KERNELDUMP_ENC_CHACHA20:
 1103                 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL);
 1104                 break;
 1105         default:
 1106                 error = EINVAL;
 1107                 goto out;
 1108         }
 1109 
 1110         kdk = kdc->kdc_dumpkey;
 1111         memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv));
 1112 out:
 1113         explicit_bzero(hash, sizeof(hash));
 1114         return (error);
 1115 }
 1116 
 1117 static uint32_t
 1118 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc)
 1119 {
 1120 
 1121         if (kdc == NULL)
 1122                 return (0);
 1123         return (kdc->kdc_dumpkeysize);
 1124 }
 1125 #endif /* EKCD */
 1126 
 1127 static struct kerneldumpcomp *
 1128 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression)
 1129 {
 1130         struct kerneldumpcomp *kdcomp;
 1131         int format;
 1132 
 1133         switch (compression) {
 1134         case KERNELDUMP_COMP_GZIP:
 1135                 format = COMPRESS_GZIP;
 1136                 break;
 1137         case KERNELDUMP_COMP_ZSTD:
 1138                 format = COMPRESS_ZSTD;
 1139                 break;
 1140         default:
 1141                 return (NULL);
 1142         }
 1143 
 1144         kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO);
 1145         kdcomp->kdc_format = compression;
 1146         kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb,
 1147             format, di->maxiosize, kerneldump_gzlevel, di);
 1148         if (kdcomp->kdc_stream == NULL) {
 1149                 free(kdcomp, M_DUMPER);
 1150                 return (NULL);
 1151         }
 1152         kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP);
 1153         return (kdcomp);
 1154 }
 1155 
 1156 static void
 1157 kerneldumpcomp_destroy(struct dumperinfo *di)
 1158 {
 1159         struct kerneldumpcomp *kdcomp;
 1160 
 1161         kdcomp = di->kdcomp;
 1162         if (kdcomp == NULL)
 1163                 return;
 1164         compressor_fini(kdcomp->kdc_stream);
 1165         zfree(kdcomp->kdc_buf, M_DUMPER);
 1166         free(kdcomp, M_DUMPER);
 1167 }
 1168 
 1169 /*
 1170  * Must not be present on global list.
 1171  */
 1172 static void
 1173 free_single_dumper(struct dumperinfo *di)
 1174 {
 1175 
 1176         if (di == NULL)
 1177                 return;
 1178 
 1179         zfree(di->blockbuf, M_DUMPER);
 1180 
 1181         kerneldumpcomp_destroy(di);
 1182 
 1183 #ifdef EKCD
 1184         zfree(di->kdcrypto, M_EKCD);
 1185 #endif
 1186         zfree(di, M_DUMPER);
 1187 }
 1188 
 1189 /* Registration of dumpers */
 1190 int
 1191 dumper_insert(const struct dumperinfo *di_template, const char *devname,
 1192     const struct diocskerneldump_arg *kda)
 1193 {
 1194         struct dumperinfo *newdi, *listdi;
 1195         bool inserted;
 1196         uint8_t index;
 1197         int error;
 1198 
 1199         index = kda->kda_index;
 1200         MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV &&
 1201             index != KDA_REMOVE_ALL);
 1202 
 1203         error = priv_check(curthread, PRIV_SETDUMPER);
 1204         if (error != 0)
 1205                 return (error);
 1206 
 1207         newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, M_WAITOK
 1208             | M_ZERO);
 1209         memcpy(newdi, di_template, sizeof(*newdi));
 1210         newdi->blockbuf = NULL;
 1211         newdi->kdcrypto = NULL;
 1212         newdi->kdcomp = NULL;
 1213         strcpy(newdi->di_devname, devname);
 1214 
 1215         if (kda->kda_encryption != KERNELDUMP_ENC_NONE) {
 1216 #ifdef EKCD
 1217                 newdi->kdcrypto = kerneldumpcrypto_create(di_template->blocksize,
 1218                     kda->kda_encryption, kda->kda_key,
 1219                     kda->kda_encryptedkeysize, kda->kda_encryptedkey);
 1220                 if (newdi->kdcrypto == NULL) {
 1221                         error = EINVAL;
 1222                         goto cleanup;
 1223                 }
 1224 #else
 1225                 error = EOPNOTSUPP;
 1226                 goto cleanup;
 1227 #endif
 1228         }
 1229         if (kda->kda_compression != KERNELDUMP_COMP_NONE) {
 1230 #ifdef EKCD
 1231                 /*
 1232                  * We can't support simultaneous unpadded block cipher
 1233                  * encryption and compression because there is no guarantee the
 1234                  * length of the compressed result is exactly a multiple of the
 1235                  * cipher block size.
 1236                  */
 1237                 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) {
 1238                         error = EOPNOTSUPP;
 1239                         goto cleanup;
 1240                 }
 1241 #endif
 1242                 newdi->kdcomp = kerneldumpcomp_create(newdi,
 1243                     kda->kda_compression);
 1244                 if (newdi->kdcomp == NULL) {
 1245                         error = EINVAL;
 1246                         goto cleanup;
 1247                 }
 1248         }
 1249 
 1250         newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO);
 1251 
 1252         /* Add the new configuration to the queue */
 1253         mtx_lock(&dumpconf_list_lk);
 1254         inserted = false;
 1255         TAILQ_FOREACH(listdi, &dumper_configs, di_next) {
 1256                 if (index == 0) {
 1257                         TAILQ_INSERT_BEFORE(listdi, newdi, di_next);
 1258                         inserted = true;
 1259                         break;
 1260                 }
 1261                 index--;
 1262         }
 1263         if (!inserted)
 1264                 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next);
 1265         mtx_unlock(&dumpconf_list_lk);
 1266 
 1267         return (0);
 1268 
 1269 cleanup:
 1270         free_single_dumper(newdi);
 1271         return (error);
 1272 }
 1273 
 1274 #ifdef DDB
 1275 void
 1276 dumper_ddb_insert(struct dumperinfo *newdi)
 1277 {
 1278         TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next);
 1279 }
 1280 
 1281 void
 1282 dumper_ddb_remove(struct dumperinfo *di)
 1283 {
 1284         TAILQ_REMOVE(&dumper_configs, di, di_next);
 1285 }
 1286 #endif
 1287 
 1288 static bool
 1289 dumper_config_match(const struct dumperinfo *di, const char *devname,
 1290     const struct diocskerneldump_arg *kda)
 1291 {
 1292         if (kda->kda_index == KDA_REMOVE_ALL)
 1293                 return (true);
 1294 
 1295         if (strcmp(di->di_devname, devname) != 0)
 1296                 return (false);
 1297 
 1298         /*
 1299          * Allow wildcard removal of configs matching a device on g_dev_orphan.
 1300          */
 1301         if (kda->kda_index == KDA_REMOVE_DEV)
 1302                 return (true);
 1303 
 1304         if (di->kdcomp != NULL) {
 1305                 if (di->kdcomp->kdc_format != kda->kda_compression)
 1306                         return (false);
 1307         } else if (kda->kda_compression != KERNELDUMP_COMP_NONE)
 1308                 return (false);
 1309 #ifdef EKCD
 1310         if (di->kdcrypto != NULL) {
 1311                 if (di->kdcrypto->kdc_encryption != kda->kda_encryption)
 1312                         return (false);
 1313                 /*
 1314                  * Do we care to verify keys match to delete?  It seems weird
 1315                  * to expect multiple fallback dump configurations on the same
 1316                  * device that only differ in crypto key.
 1317                  */
 1318         } else
 1319 #endif
 1320                 if (kda->kda_encryption != KERNELDUMP_ENC_NONE)
 1321                         return (false);
 1322 
 1323         return (true);
 1324 }
 1325 
 1326 int
 1327 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda)
 1328 {
 1329         struct dumperinfo *di, *sdi;
 1330         bool found;
 1331         int error;
 1332 
 1333         error = priv_check(curthread, PRIV_SETDUMPER);
 1334         if (error != 0)
 1335                 return (error);
 1336 
 1337         /*
 1338          * Try to find a matching configuration, and kill it.
 1339          *
 1340          * NULL 'kda' indicates remove any configuration matching 'devname',
 1341          * which may remove multiple configurations in atypical configurations.
 1342          */
 1343         found = false;
 1344         mtx_lock(&dumpconf_list_lk);
 1345         TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) {
 1346                 if (dumper_config_match(di, devname, kda)) {
 1347                         found = true;
 1348                         TAILQ_REMOVE(&dumper_configs, di, di_next);
 1349                         free_single_dumper(di);
 1350                 }
 1351         }
 1352         mtx_unlock(&dumpconf_list_lk);
 1353 
 1354         /* Only produce ENOENT if a more targeted match didn't match. */
 1355         if (!found && kda->kda_index == KDA_REMOVE)
 1356                 return (ENOENT);
 1357         return (0);
 1358 }
 1359 
 1360 static int
 1361 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length)
 1362 {
 1363 
 1364         if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset ||
 1365             offset - di->mediaoffset + length > di->mediasize)) {
 1366                 if (di->kdcomp != NULL && offset >= di->mediaoffset) {
 1367                         printf(
 1368                     "Compressed dump failed to fit in device boundaries.\n");
 1369                         return (E2BIG);
 1370                 }
 1371 
 1372                 printf("Attempt to write outside dump device boundaries.\n"
 1373             "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n",
 1374                     (intmax_t)offset, (intmax_t)di->mediaoffset,
 1375                     (uintmax_t)length, (intmax_t)di->mediasize);
 1376                 return (ENOSPC);
 1377         }
 1378         if (length % di->blocksize != 0) {
 1379                 printf("Attempt to write partial block of length %ju.\n",
 1380                     (uintmax_t)length);
 1381                 return (EINVAL);
 1382         }
 1383         if (offset % di->blocksize != 0) {
 1384                 printf("Attempt to write at unaligned offset %jd.\n",
 1385                     (intmax_t)offset);
 1386                 return (EINVAL);
 1387         }
 1388 
 1389         return (0);
 1390 }
 1391 
 1392 #ifdef EKCD
 1393 static int
 1394 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size)
 1395 {
 1396 
 1397         switch (kdc->kdc_encryption) {
 1398         case KERNELDUMP_ENC_AES_256_CBC:
 1399                 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf,
 1400                     8 * size, buf) <= 0) {
 1401                         return (EIO);
 1402                 }
 1403                 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC,
 1404                     buf + size - 16 /* IV size for AES-256-CBC */) <= 0) {
 1405                         return (EIO);
 1406                 }
 1407                 break;
 1408         case KERNELDUMP_ENC_CHACHA20:
 1409                 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size);
 1410                 break;
 1411         default:
 1412                 return (EINVAL);
 1413         }
 1414 
 1415         return (0);
 1416 }
 1417 
 1418 /* Encrypt data and call dumper. */
 1419 static int
 1420 dump_encrypted_write(struct dumperinfo *di, void *virtual,
 1421     vm_offset_t physical, off_t offset, size_t length)
 1422 {
 1423         static uint8_t buf[KERNELDUMP_BUFFER_SIZE];
 1424         struct kerneldumpcrypto *kdc;
 1425         int error;
 1426         size_t nbytes;
 1427 
 1428         kdc = di->kdcrypto;
 1429 
 1430         while (length > 0) {
 1431                 nbytes = MIN(length, sizeof(buf));
 1432                 bcopy(virtual, buf, nbytes);
 1433 
 1434                 if (dump_encrypt(kdc, buf, nbytes) != 0)
 1435                         return (EIO);
 1436 
 1437                 error = dump_write(di, buf, physical, offset, nbytes);
 1438                 if (error != 0)
 1439                         return (error);
 1440 
 1441                 offset += nbytes;
 1442                 virtual = (void *)((uint8_t *)virtual + nbytes);
 1443                 length -= nbytes;
 1444         }
 1445 
 1446         return (0);
 1447 }
 1448 #endif /* EKCD */
 1449 
 1450 static int
 1451 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg)
 1452 {
 1453         struct dumperinfo *di;
 1454         size_t resid, rlength;
 1455         int error;
 1456 
 1457         di = arg;
 1458 
 1459         if (length % di->blocksize != 0) {
 1460                 /*
 1461                  * This must be the final write after flushing the compression
 1462                  * stream. Write as many full blocks as possible and stash the
 1463                  * residual data in the dumper's block buffer. It will be
 1464                  * padded and written in dump_finish().
 1465                  */
 1466                 rlength = rounddown(length, di->blocksize);
 1467                 if (rlength != 0) {
 1468                         error = _dump_append(di, base, 0, rlength);
 1469                         if (error != 0)
 1470                                 return (error);
 1471                 }
 1472                 resid = length - rlength;
 1473                 memmove(di->blockbuf, (uint8_t *)base + rlength, resid);
 1474                 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid);
 1475                 di->kdcomp->kdc_resid = resid;
 1476                 return (EAGAIN);
 1477         }
 1478         return (_dump_append(di, base, 0, length));
 1479 }
 1480 
 1481 /*
 1482  * Write kernel dump headers at the beginning and end of the dump extent.
 1483  * Write the kernel dump encryption key after the leading header if we were
 1484  * configured to do so.
 1485  */
 1486 static int
 1487 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh)
 1488 {
 1489 #ifdef EKCD
 1490         struct kerneldumpcrypto *kdc;
 1491 #endif
 1492         void *buf, *key;
 1493         size_t hdrsz;
 1494         uint64_t extent;
 1495         uint32_t keysize;
 1496         int error;
 1497 
 1498         hdrsz = sizeof(*kdh);
 1499         if (hdrsz > di->blocksize)
 1500                 return (ENOMEM);
 1501 
 1502 #ifdef EKCD
 1503         kdc = di->kdcrypto;
 1504         key = kdc->kdc_dumpkey;
 1505         keysize = kerneldumpcrypto_dumpkeysize(kdc);
 1506 #else
 1507         key = NULL;
 1508         keysize = 0;
 1509 #endif
 1510 
 1511         /*
 1512          * If the dump device has special handling for headers, let it take care
 1513          * of writing them out.
 1514          */
 1515         if (di->dumper_hdr != NULL)
 1516                 return (di->dumper_hdr(di, kdh, key, keysize));
 1517 
 1518         if (hdrsz == di->blocksize)
 1519                 buf = kdh;
 1520         else {
 1521                 buf = di->blockbuf;
 1522                 memset(buf, 0, di->blocksize);
 1523                 memcpy(buf, kdh, hdrsz);
 1524         }
 1525 
 1526         extent = dtoh64(kdh->dumpextent);
 1527 #ifdef EKCD
 1528         if (kdc != NULL) {
 1529                 error = dump_write(di, kdc->kdc_dumpkey, 0,
 1530                     di->mediaoffset + di->mediasize - di->blocksize - extent -
 1531                     keysize, keysize);
 1532                 if (error != 0)
 1533                         return (error);
 1534         }
 1535 #endif
 1536 
 1537         error = dump_write(di, buf, 0,
 1538             di->mediaoffset + di->mediasize - 2 * di->blocksize - extent -
 1539             keysize, di->blocksize);
 1540         if (error == 0)
 1541                 error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize -
 1542                     di->blocksize, di->blocksize);
 1543         return (error);
 1544 }
 1545 
 1546 /*
 1547  * Don't touch the first SIZEOF_METADATA bytes on the dump device.  This is to
 1548  * protect us from metadata and metadata from us.
 1549  */
 1550 #define SIZEOF_METADATA         (64 * 1024)
 1551 
 1552 /*
 1553  * Do some preliminary setup for a kernel dump: initialize state for encryption,
 1554  * if requested, and make sure that we have enough space on the dump device.
 1555  *
 1556  * We set things up so that the dump ends before the last sector of the dump
 1557  * device, at which the trailing header is written.
 1558  *
 1559  *     +-----------+------+-----+----------------------------+------+
 1560  *     |           | lhdr | key |    ... kernel dump ...     | thdr |
 1561  *     +-----------+------+-----+----------------------------+------+
 1562  *                   1 blk  opt <------- dump extent --------> 1 blk
 1563  *
 1564  * Dumps written using dump_append() start at the beginning of the extent.
 1565  * Uncompressed dumps will use the entire extent, but compressed dumps typically
 1566  * will not. The true length of the dump is recorded in the leading and trailing
 1567  * headers once the dump has been completed.
 1568  *
 1569  * The dump device may provide a callback, in which case it will initialize
 1570  * dumpoff and take care of laying out the headers.
 1571  */
 1572 int
 1573 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh)
 1574 {
 1575         uint64_t dumpextent, span;
 1576         uint32_t keysize;
 1577         int error;
 1578 
 1579 #ifdef EKCD
 1580         error = kerneldumpcrypto_init(di->kdcrypto);
 1581         if (error != 0)
 1582                 return (error);
 1583         keysize = kerneldumpcrypto_dumpkeysize(di->kdcrypto);
 1584 #else
 1585         error = 0;
 1586         keysize = 0;
 1587 #endif
 1588 
 1589         if (di->dumper_start != NULL) {
 1590                 error = di->dumper_start(di);
 1591         } else {
 1592                 dumpextent = dtoh64(kdh->dumpextent);
 1593                 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize +
 1594                     keysize;
 1595                 if (di->mediasize < span) {
 1596                         if (di->kdcomp == NULL)
 1597                                 return (E2BIG);
 1598 
 1599                         /*
 1600                          * We don't yet know how much space the compressed dump
 1601                          * will occupy, so try to use the whole swap partition
 1602                          * (minus the first 64KB) in the hope that the
 1603                          * compressed dump will fit. If that doesn't turn out to
 1604                          * be enough, the bounds checking in dump_write()
 1605                          * will catch us and cause the dump to fail.
 1606                          */
 1607                         dumpextent = di->mediasize - span + dumpextent;
 1608                         kdh->dumpextent = htod64(dumpextent);
 1609                 }
 1610 
 1611                 /*
 1612                  * The offset at which to begin writing the dump.
 1613                  */
 1614                 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize -
 1615                     dumpextent;
 1616         }
 1617         di->origdumpoff = di->dumpoff;
 1618         return (error);
 1619 }
 1620 
 1621 static int
 1622 _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical,
 1623     size_t length)
 1624 {
 1625         int error;
 1626 
 1627 #ifdef EKCD
 1628         if (di->kdcrypto != NULL)
 1629                 error = dump_encrypted_write(di, virtual, physical, di->dumpoff,
 1630                     length);
 1631         else
 1632 #endif
 1633                 error = dump_write(di, virtual, physical, di->dumpoff, length);
 1634         if (error == 0)
 1635                 di->dumpoff += length;
 1636         return (error);
 1637 }
 1638 
 1639 /*
 1640  * Write to the dump device starting at dumpoff. When compression is enabled,
 1641  * writes to the device will be performed using a callback that gets invoked
 1642  * when the compression stream's output buffer is full.
 1643  */
 1644 int
 1645 dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical,
 1646     size_t length)
 1647 {
 1648         void *buf;
 1649 
 1650         if (di->kdcomp != NULL) {
 1651                 /* Bounce through a buffer to avoid CRC errors. */
 1652                 if (length > di->maxiosize)
 1653                         return (EINVAL);
 1654                 buf = di->kdcomp->kdc_buf;
 1655                 memmove(buf, virtual, length);
 1656                 return (compressor_write(di->kdcomp->kdc_stream, buf, length));
 1657         }
 1658         return (_dump_append(di, virtual, physical, length));
 1659 }
 1660 
 1661 /*
 1662  * Write to the dump device at the specified offset.
 1663  */
 1664 int
 1665 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical,
 1666     off_t offset, size_t length)
 1667 {
 1668         int error;
 1669 
 1670         error = dump_check_bounds(di, offset, length);
 1671         if (error != 0)
 1672                 return (error);
 1673         return (di->dumper(di->priv, virtual, physical, offset, length));
 1674 }
 1675 
 1676 /*
 1677  * Perform kernel dump finalization: flush the compression stream, if necessary,
 1678  * write the leading and trailing kernel dump headers now that we know the true
 1679  * length of the dump, and optionally write the encryption key following the
 1680  * leading header.
 1681  */
 1682 int
 1683 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh)
 1684 {
 1685         int error;
 1686 
 1687         if (di->kdcomp != NULL) {
 1688                 error = compressor_flush(di->kdcomp->kdc_stream);
 1689                 if (error == EAGAIN) {
 1690                         /* We have residual data in di->blockbuf. */
 1691                         error = _dump_append(di, di->blockbuf, 0, di->blocksize);
 1692                         if (error == 0)
 1693                                 /* Compensate for _dump_append()'s adjustment. */
 1694                                 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid;
 1695                         di->kdcomp->kdc_resid = 0;
 1696                 }
 1697                 if (error != 0)
 1698                         return (error);
 1699 
 1700                 /*
 1701                  * We now know the size of the compressed dump, so update the
 1702                  * header accordingly and recompute parity.
 1703                  */
 1704                 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff);
 1705                 kdh->parity = 0;
 1706                 kdh->parity = kerneldump_parity(kdh);
 1707 
 1708                 compressor_reset(di->kdcomp->kdc_stream);
 1709         }
 1710 
 1711         error = dump_write_headers(di, kdh);
 1712         if (error != 0)
 1713                 return (error);
 1714 
 1715         (void)dump_write(di, NULL, 0, 0, 0);
 1716         return (0);
 1717 }
 1718 
 1719 void
 1720 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh,
 1721     const char *magic, uint32_t archver, uint64_t dumplen)
 1722 {
 1723         size_t dstsize;
 1724 
 1725         bzero(kdh, sizeof(*kdh));
 1726         strlcpy(kdh->magic, magic, sizeof(kdh->magic));
 1727         strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
 1728         kdh->version = htod32(KERNELDUMPVERSION);
 1729         kdh->architectureversion = htod32(archver);
 1730         kdh->dumplength = htod64(dumplen);
 1731         kdh->dumpextent = kdh->dumplength;
 1732         kdh->dumptime = htod64(time_second);
 1733 #ifdef EKCD
 1734         kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto));
 1735 #else
 1736         kdh->dumpkeysize = 0;
 1737 #endif
 1738         kdh->blocksize = htod32(di->blocksize);
 1739         strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname));
 1740         dstsize = sizeof(kdh->versionstring);
 1741         if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize)
 1742                 kdh->versionstring[dstsize - 2] = '\n';
 1743         if (panicstr != NULL)
 1744                 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
 1745         if (di->kdcomp != NULL)
 1746                 kdh->compression = di->kdcomp->kdc_format;
 1747         kdh->parity = kerneldump_parity(kdh);
 1748 }
 1749 
 1750 #ifdef DDB
 1751 DB_SHOW_COMMAND(panic, db_show_panic)
 1752 {
 1753 
 1754         if (panicstr == NULL)
 1755                 db_printf("panicstr not set\n");
 1756         else
 1757                 db_printf("panic: %s\n", panicstr);
 1758 }
 1759 #endif

Cache object: a7cc0d95afdf2df52dd5a000884bc67f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.