The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 2018 The FreeBSD Foundation
    5  * Copyright (c) 1992 Terrence R. Lambert.
    6  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * William Jolitz.
   11  *
   12  * Portions of this software were developed by A. Joseph Koshy under
   13  * sponsorship from the FreeBSD Foundation and Google, Inc.
   14  *
   15  * Redistribution and use in source and binary forms, with or without
   16  * modification, are permitted provided that the following conditions
   17  * are met:
   18  * 1. Redistributions of source code must retain the above copyright
   19  *    notice, this list of conditions and the following disclaimer.
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  * 3. All advertising materials mentioning features or use of this software
   24  *    must display the following acknowledgement:
   25  *      This product includes software developed by the University of
   26  *      California, Berkeley and its contributors.
   27  * 4. Neither the name of the University nor the names of its contributors
   28  *    may be used to endorse or promote products derived from this software
   29  *    without specific prior written permission.
   30  *
   31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   41  * SUCH DAMAGE.
   42  *
   43  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   44  */
   45 
   46 #include <sys/cdefs.h>
   47 __FBSDID("$FreeBSD$");
   48 
   49 #include "opt_apic.h"
   50 #include "opt_atpic.h"
   51 #include "opt_cpu.h"
   52 #include "opt_ddb.h"
   53 #include "opt_inet.h"
   54 #include "opt_isa.h"
   55 #include "opt_kstack_pages.h"
   56 #include "opt_maxmem.h"
   57 #include "opt_mp_watchdog.h"
   58 #include "opt_perfmon.h"
   59 #include "opt_platform.h"
   60 
   61 #include <sys/param.h>
   62 #include <sys/proc.h>
   63 #include <sys/systm.h>
   64 #include <sys/bio.h>
   65 #include <sys/buf.h>
   66 #include <sys/bus.h>
   67 #include <sys/callout.h>
   68 #include <sys/cons.h>
   69 #include <sys/cpu.h>
   70 #include <sys/eventhandler.h>
   71 #include <sys/exec.h>
   72 #include <sys/imgact.h>
   73 #include <sys/kdb.h>
   74 #include <sys/kernel.h>
   75 #include <sys/ktr.h>
   76 #include <sys/linker.h>
   77 #include <sys/lock.h>
   78 #include <sys/malloc.h>
   79 #include <sys/memrange.h>
   80 #include <sys/msgbuf.h>
   81 #include <sys/mutex.h>
   82 #include <sys/pcpu.h>
   83 #include <sys/ptrace.h>
   84 #include <sys/reboot.h>
   85 #include <sys/rwlock.h>
   86 #include <sys/sched.h>
   87 #include <sys/signalvar.h>
   88 #include <sys/smp.h>
   89 #include <sys/syscallsubr.h>
   90 #include <sys/sysctl.h>
   91 #include <sys/sysent.h>
   92 #include <sys/sysproto.h>
   93 #include <sys/ucontext.h>
   94 #include <sys/vmmeter.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_param.h>
   98 #include <vm/vm_extern.h>
   99 #include <vm/vm_kern.h>
  100 #include <vm/vm_page.h>
  101 #include <vm/vm_map.h>
  102 #include <vm/vm_object.h>
  103 #include <vm/vm_pager.h>
  104 #include <vm/vm_phys.h>
  105 #include <vm/vm_dumpset.h>
  106 
  107 #ifdef DDB
  108 #ifndef KDB
  109 #error KDB must be enabled in order for DDB to work!
  110 #endif
  111 #include <ddb/ddb.h>
  112 #include <ddb/db_sym.h>
  113 #endif
  114 
  115 #include <isa/rtc.h>
  116 
  117 #include <net/netisr.h>
  118 
  119 #include <machine/bootinfo.h>
  120 #include <machine/clock.h>
  121 #include <machine/cpu.h>
  122 #include <machine/cputypes.h>
  123 #include <machine/intr_machdep.h>
  124 #include <x86/mca.h>
  125 #include <machine/md_var.h>
  126 #include <machine/metadata.h>
  127 #include <machine/mp_watchdog.h>
  128 #include <machine/pc/bios.h>
  129 #include <machine/pcb.h>
  130 #include <machine/pcb_ext.h>
  131 #include <machine/proc.h>
  132 #include <machine/reg.h>
  133 #include <machine/sigframe.h>
  134 #include <machine/specialreg.h>
  135 #include <machine/sysarch.h>
  136 #include <machine/trap.h>
  137 #include <x86/ucode.h>
  138 #include <machine/vm86.h>
  139 #include <x86/init.h>
  140 #ifdef PERFMON
  141 #include <machine/perfmon.h>
  142 #endif
  143 #ifdef SMP
  144 #include <machine/smp.h>
  145 #endif
  146 #ifdef FDT
  147 #include <x86/fdt.h>
  148 #endif
  149 
  150 #ifdef DEV_APIC
  151 #include <x86/apicvar.h>
  152 #endif
  153 
  154 #ifdef DEV_ISA
  155 #include <x86/isa/icu.h>
  156 #endif
  157 
  158 /* Sanity check for __curthread() */
  159 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  160 
  161 register_t init386(int first);
  162 void dblfault_handler(void);
  163 void identify_cpu(void);
  164 
  165 static void cpu_startup(void *);
  166 static void fpstate_drop(struct thread *td);
  167 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  168     char *xfpusave, size_t xfpusave_len);
  169 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  170     char *xfpustate, size_t xfpustate_len);
  171 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  172 
  173 /* Intel ICH registers */
  174 #define ICH_PMBASE      0x400
  175 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  176 
  177 int     _udatasel, _ucodesel;
  178 u_int   basemem;
  179 static int above4g_allow = 1;
  180 static int above24g_allow = 0;
  181 
  182 int cold = 1;
  183 
  184 #ifdef COMPAT_43
  185 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  186 #endif
  187 #ifdef COMPAT_FREEBSD4
  188 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  189 #endif
  190 
  191 long Maxmem = 0;
  192 long realmem = 0;
  193 
  194 #ifdef PAE
  195 FEATURE(pae, "Physical Address Extensions");
  196 #endif
  197 
  198 struct kva_md_info kmi;
  199 
  200 static struct trapframe proc0_tf;
  201 struct pcpu __pcpu[MAXCPU];
  202 
  203 struct mtx icu_lock;
  204 
  205 struct mem_range_softc mem_range_softc;
  206 
  207 extern char start_exceptions[], end_exceptions[];
  208 
  209 extern struct sysentvec elf32_freebsd_sysvec;
  210 
  211 /* Default init_ops implementation. */
  212 struct init_ops init_ops = {
  213         .early_clock_source_init =      i8254_init,
  214         .early_delay =                  i8254_delay,
  215 #ifdef DEV_APIC
  216         .msi_init =                     msi_init,
  217 #endif
  218 };
  219 
  220 static void
  221 cpu_startup(dummy)
  222         void *dummy;
  223 {
  224         uintmax_t memsize;
  225         char *sysenv;
  226 
  227         /*
  228          * On MacBooks, we need to disallow the legacy USB circuit to
  229          * generate an SMI# because this can cause several problems,
  230          * namely: incorrect CPU frequency detection and failure to
  231          * start the APs.
  232          * We do this by disabling a bit in the SMI_EN (SMI Control and
  233          * Enable register) of the Intel ICH LPC Interface Bridge.
  234          */
  235         sysenv = kern_getenv("smbios.system.product");
  236         if (sysenv != NULL) {
  237                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  238                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  239                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  240                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  241                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  242                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  243                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  244                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  245                         if (bootverbose)
  246                                 printf("Disabling LEGACY_USB_EN bit on "
  247                                     "Intel ICH.\n");
  248                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  249                 }
  250                 freeenv(sysenv);
  251         }
  252 
  253         /*
  254          * Good {morning,afternoon,evening,night}.
  255          */
  256         startrtclock();
  257         printcpuinfo();
  258         panicifcpuunsupported();
  259 #ifdef PERFMON
  260         perfmon_init();
  261 #endif
  262 
  263         /*
  264          * Display physical memory if SMBIOS reports reasonable amount.
  265          */
  266         memsize = 0;
  267         sysenv = kern_getenv("smbios.memory.enabled");
  268         if (sysenv != NULL) {
  269                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  270                 freeenv(sysenv);
  271         }
  272         if (memsize < ptoa((uintmax_t)vm_free_count()))
  273                 memsize = ptoa((uintmax_t)Maxmem);
  274         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  275         realmem = atop(memsize);
  276 
  277         /*
  278          * Display any holes after the first chunk of extended memory.
  279          */
  280         if (bootverbose) {
  281                 int indx;
  282 
  283                 printf("Physical memory chunk(s):\n");
  284                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  285                         vm_paddr_t size;
  286 
  287                         size = phys_avail[indx + 1] - phys_avail[indx];
  288                         printf(
  289                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  290                             (uintmax_t)phys_avail[indx],
  291                             (uintmax_t)phys_avail[indx + 1] - 1,
  292                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  293                 }
  294         }
  295 
  296         vm_ksubmap_init(&kmi);
  297 
  298         printf("avail memory = %ju (%ju MB)\n",
  299             ptoa((uintmax_t)vm_free_count()),
  300             ptoa((uintmax_t)vm_free_count()) / 1048576);
  301 
  302         /*
  303          * Set up buffers, so they can be used to read disk labels.
  304          */
  305         bufinit();
  306         vm_pager_bufferinit();
  307         cpu_setregs();
  308 }
  309 
  310 /*
  311  * Send an interrupt to process.
  312  *
  313  * Stack is set up to allow sigcode stored
  314  * at top to call routine, followed by call
  315  * to sigreturn routine below.  After sigreturn
  316  * resets the signal mask, the stack, and the
  317  * frame pointer, it returns to the user
  318  * specified pc, psl.
  319  */
  320 #ifdef COMPAT_43
  321 static void
  322 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  323 {
  324         struct osigframe sf, *fp;
  325         struct proc *p;
  326         struct thread *td;
  327         struct sigacts *psp;
  328         struct trapframe *regs;
  329         int sig;
  330         int oonstack;
  331 
  332         td = curthread;
  333         p = td->td_proc;
  334         PROC_LOCK_ASSERT(p, MA_OWNED);
  335         sig = ksi->ksi_signo;
  336         psp = p->p_sigacts;
  337         mtx_assert(&psp->ps_mtx, MA_OWNED);
  338         regs = td->td_frame;
  339         oonstack = sigonstack(regs->tf_esp);
  340 
  341         /* Allocate space for the signal handler context. */
  342         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  343             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  344                 fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
  345                     td->td_sigstk.ss_size - sizeof(struct osigframe));
  346 #if defined(COMPAT_43)
  347                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  348 #endif
  349         } else
  350                 fp = (struct osigframe *)regs->tf_esp - 1;
  351 
  352         /* Build the argument list for the signal handler. */
  353         sf.sf_signum = sig;
  354         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  355         bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
  356         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  357                 /* Signal handler installed with SA_SIGINFO. */
  358                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  359                 sf.sf_siginfo.si_signo = sig;
  360                 sf.sf_siginfo.si_code = ksi->ksi_code;
  361                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  362                 sf.sf_addr = 0;
  363         } else {
  364                 /* Old FreeBSD-style arguments. */
  365                 sf.sf_arg2 = ksi->ksi_code;
  366                 sf.sf_addr = (register_t)ksi->ksi_addr;
  367                 sf.sf_ahu.sf_handler = catcher;
  368         }
  369         mtx_unlock(&psp->ps_mtx);
  370         PROC_UNLOCK(p);
  371 
  372         /* Save most if not all of trap frame. */
  373         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  374         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  375         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  376         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  377         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  378         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  379         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  380         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  381         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  382         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  383         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  384         sf.sf_siginfo.si_sc.sc_gs = rgs();
  385         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  386 
  387         /* Build the signal context to be used by osigreturn(). */
  388         sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
  389         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  390         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  391         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  392         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  393         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  394         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  395         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  396 
  397         /*
  398          * If we're a vm86 process, we want to save the segment registers.
  399          * We also change eflags to be our emulated eflags, not the actual
  400          * eflags.
  401          */
  402         if (regs->tf_eflags & PSL_VM) {
  403                 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
  404                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  405                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  406 
  407                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  408                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  409                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  410                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  411 
  412                 if (vm86->vm86_has_vme == 0)
  413                         sf.sf_siginfo.si_sc.sc_ps =
  414                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  415                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  416 
  417                 /* See sendsig() for comments. */
  418                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  419         }
  420 
  421         /*
  422          * Copy the sigframe out to the user's stack.
  423          */
  424         if (copyout(&sf, fp, sizeof(*fp)) != 0) {
  425                 PROC_LOCK(p);
  426                 sigexit(td, SIGILL);
  427         }
  428 
  429         regs->tf_esp = (int)fp;
  430         if (p->p_sysent->sv_sigcode_base != 0) {
  431                 regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  432                     szosigcode;
  433         } else {
  434                 /* a.out sysentvec does not use shared page */
  435                 regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
  436         }
  437         regs->tf_eflags &= ~(PSL_T | PSL_D);
  438         regs->tf_cs = _ucodesel;
  439         regs->tf_ds = _udatasel;
  440         regs->tf_es = _udatasel;
  441         regs->tf_fs = _udatasel;
  442         load_gs(_udatasel);
  443         regs->tf_ss = _udatasel;
  444         PROC_LOCK(p);
  445         mtx_lock(&psp->ps_mtx);
  446 }
  447 #endif /* COMPAT_43 */
  448 
  449 #ifdef COMPAT_FREEBSD4
  450 static void
  451 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  452 {
  453         struct sigframe4 sf, *sfp;
  454         struct proc *p;
  455         struct thread *td;
  456         struct sigacts *psp;
  457         struct trapframe *regs;
  458         int sig;
  459         int oonstack;
  460 
  461         td = curthread;
  462         p = td->td_proc;
  463         PROC_LOCK_ASSERT(p, MA_OWNED);
  464         sig = ksi->ksi_signo;
  465         psp = p->p_sigacts;
  466         mtx_assert(&psp->ps_mtx, MA_OWNED);
  467         regs = td->td_frame;
  468         oonstack = sigonstack(regs->tf_esp);
  469 
  470         /* Save user context. */
  471         bzero(&sf, sizeof(sf));
  472         sf.sf_uc.uc_sigmask = *mask;
  473         sf.sf_uc.uc_stack = td->td_sigstk;
  474         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  475             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  476         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  477         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  478         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  479         bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
  480             sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
  481         bzero(sf.sf_uc.uc_mcontext.__spare__,
  482             sizeof(sf.sf_uc.uc_mcontext.__spare__));
  483         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  484 
  485         /* Allocate space for the signal handler context. */
  486         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  487             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  488                 sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp +
  489                     td->td_sigstk.ss_size - sizeof(struct sigframe4));
  490 #if defined(COMPAT_43)
  491                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  492 #endif
  493         } else
  494                 sfp = (struct sigframe4 *)regs->tf_esp - 1;
  495 
  496         /* Build the argument list for the signal handler. */
  497         sf.sf_signum = sig;
  498         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  499         bzero(&sf.sf_si, sizeof(sf.sf_si));
  500         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  501                 /* Signal handler installed with SA_SIGINFO. */
  502                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  503                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  504 
  505                 /* Fill in POSIX parts */
  506                 sf.sf_si.si_signo = sig;
  507                 sf.sf_si.si_code = ksi->ksi_code;
  508                 sf.sf_si.si_addr = ksi->ksi_addr;
  509         } else {
  510                 /* Old FreeBSD-style arguments. */
  511                 sf.sf_siginfo = ksi->ksi_code;
  512                 sf.sf_addr = (register_t)ksi->ksi_addr;
  513                 sf.sf_ahu.sf_handler = catcher;
  514         }
  515         mtx_unlock(&psp->ps_mtx);
  516         PROC_UNLOCK(p);
  517 
  518         /*
  519          * If we're a vm86 process, we want to save the segment registers.
  520          * We also change eflags to be our emulated eflags, not the actual
  521          * eflags.
  522          */
  523         if (regs->tf_eflags & PSL_VM) {
  524                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  525                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  526 
  527                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  528                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  529                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  530                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  531 
  532                 if (vm86->vm86_has_vme == 0)
  533                         sf.sf_uc.uc_mcontext.mc_eflags =
  534                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  535                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  536 
  537                 /*
  538                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  539                  * syscalls made by the signal handler.  This just avoids
  540                  * wasting time for our lazy fixup of such faults.  PSL_NT
  541                  * does nothing in vm86 mode, but vm86 programs can set it
  542                  * almost legitimately in probes for old cpu types.
  543                  */
  544                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  545         }
  546 
  547         /*
  548          * Copy the sigframe out to the user's stack.
  549          */
  550         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  551                 PROC_LOCK(p);
  552                 sigexit(td, SIGILL);
  553         }
  554 
  555         regs->tf_esp = (int)sfp;
  556         regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  557             szfreebsd4_sigcode;
  558         regs->tf_eflags &= ~(PSL_T | PSL_D);
  559         regs->tf_cs = _ucodesel;
  560         regs->tf_ds = _udatasel;
  561         regs->tf_es = _udatasel;
  562         regs->tf_fs = _udatasel;
  563         regs->tf_ss = _udatasel;
  564         PROC_LOCK(p);
  565         mtx_lock(&psp->ps_mtx);
  566 }
  567 #endif  /* COMPAT_FREEBSD4 */
  568 
  569 void
  570 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  571 {
  572         struct sigframe sf, *sfp;
  573         struct proc *p;
  574         struct thread *td;
  575         struct sigacts *psp;
  576         char *sp;
  577         struct trapframe *regs;
  578         struct segment_descriptor *sdp;
  579         char *xfpusave;
  580         size_t xfpusave_len;
  581         int sig;
  582         int oonstack;
  583 
  584         td = curthread;
  585         p = td->td_proc;
  586         PROC_LOCK_ASSERT(p, MA_OWNED);
  587         sig = ksi->ksi_signo;
  588         psp = p->p_sigacts;
  589         mtx_assert(&psp->ps_mtx, MA_OWNED);
  590 #ifdef COMPAT_FREEBSD4
  591         if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
  592                 freebsd4_sendsig(catcher, ksi, mask);
  593                 return;
  594         }
  595 #endif
  596 #ifdef COMPAT_43
  597         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  598                 osendsig(catcher, ksi, mask);
  599                 return;
  600         }
  601 #endif
  602         regs = td->td_frame;
  603         oonstack = sigonstack(regs->tf_esp);
  604 
  605         if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
  606                 xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
  607                 xfpusave = __builtin_alloca(xfpusave_len);
  608         } else {
  609                 xfpusave_len = 0;
  610                 xfpusave = NULL;
  611         }
  612 
  613         /* Save user context. */
  614         bzero(&sf, sizeof(sf));
  615         sf.sf_uc.uc_sigmask = *mask;
  616         sf.sf_uc.uc_stack = td->td_sigstk;
  617         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  618             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  619         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  620         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  621         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  622         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  623         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  624         fpstate_drop(td);
  625         /*
  626          * Unconditionally fill the fsbase and gsbase into the mcontext.
  627          */
  628         sdp = &td->td_pcb->pcb_fsd;
  629         sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
  630             sdp->sd_lobase;
  631         sdp = &td->td_pcb->pcb_gsd;
  632         sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
  633             sdp->sd_lobase;
  634         bzero(sf.sf_uc.uc_mcontext.mc_spare2,
  635             sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
  636 
  637         /* Allocate space for the signal handler context. */
  638         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  639             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  640                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  641 #if defined(COMPAT_43)
  642                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  643 #endif
  644         } else
  645                 sp = (char *)regs->tf_esp - 128;
  646         if (xfpusave != NULL) {
  647                 sp -= xfpusave_len;
  648                 sp = (char *)((unsigned int)sp & ~0x3F);
  649                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  650         }
  651         sp -= sizeof(struct sigframe);
  652 
  653         /* Align to 16 bytes. */
  654         sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
  655 
  656         /* Build the argument list for the signal handler. */
  657         sf.sf_signum = sig;
  658         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  659         bzero(&sf.sf_si, sizeof(sf.sf_si));
  660         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  661                 /* Signal handler installed with SA_SIGINFO. */
  662                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  663                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  664 
  665                 /* Fill in POSIX parts */
  666                 sf.sf_si = ksi->ksi_info;
  667                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  668         } else {
  669                 /* Old FreeBSD-style arguments. */
  670                 sf.sf_siginfo = ksi->ksi_code;
  671                 sf.sf_addr = (register_t)ksi->ksi_addr;
  672                 sf.sf_ahu.sf_handler = catcher;
  673         }
  674         mtx_unlock(&psp->ps_mtx);
  675         PROC_UNLOCK(p);
  676 
  677         /*
  678          * If we're a vm86 process, we want to save the segment registers.
  679          * We also change eflags to be our emulated eflags, not the actual
  680          * eflags.
  681          */
  682         if (regs->tf_eflags & PSL_VM) {
  683                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  684                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  685 
  686                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  687                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  688                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  689                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  690 
  691                 if (vm86->vm86_has_vme == 0)
  692                         sf.sf_uc.uc_mcontext.mc_eflags =
  693                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  694                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  695 
  696                 /*
  697                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  698                  * syscalls made by the signal handler.  This just avoids
  699                  * wasting time for our lazy fixup of such faults.  PSL_NT
  700                  * does nothing in vm86 mode, but vm86 programs can set it
  701                  * almost legitimately in probes for old cpu types.
  702                  */
  703                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  704         }
  705 
  706         /*
  707          * Copy the sigframe out to the user's stack.
  708          */
  709         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  710             (xfpusave != NULL && copyout(xfpusave,
  711             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  712             != 0)) {
  713                 PROC_LOCK(p);
  714                 sigexit(td, SIGILL);
  715         }
  716 
  717         regs->tf_esp = (int)sfp;
  718         regs->tf_eip = p->p_sysent->sv_sigcode_base;
  719         if (regs->tf_eip == 0)
  720                 regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
  721         regs->tf_eflags &= ~(PSL_T | PSL_D);
  722         regs->tf_cs = _ucodesel;
  723         regs->tf_ds = _udatasel;
  724         regs->tf_es = _udatasel;
  725         regs->tf_fs = _udatasel;
  726         regs->tf_ss = _udatasel;
  727         PROC_LOCK(p);
  728         mtx_lock(&psp->ps_mtx);
  729 }
  730 
  731 /*
  732  * System call to cleanup state after a signal
  733  * has been taken.  Reset signal mask and
  734  * stack state from context left by sendsig (above).
  735  * Return to previous pc and psl as specified by
  736  * context left by sendsig. Check carefully to
  737  * make sure that the user has not modified the
  738  * state to gain improper privileges.
  739  *
  740  * MPSAFE
  741  */
  742 #ifdef COMPAT_43
  743 int
  744 osigreturn(td, uap)
  745         struct thread *td;
  746         struct osigreturn_args /* {
  747                 struct osigcontext *sigcntxp;
  748         } */ *uap;
  749 {
  750         struct osigcontext sc;
  751         struct trapframe *regs;
  752         struct osigcontext *scp;
  753         int eflags, error;
  754         ksiginfo_t ksi;
  755 
  756         regs = td->td_frame;
  757         error = copyin(uap->sigcntxp, &sc, sizeof(sc));
  758         if (error != 0)
  759                 return (error);
  760         scp = &sc;
  761         eflags = scp->sc_ps;
  762         if (eflags & PSL_VM) {
  763                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  764                 struct vm86_kernel *vm86;
  765 
  766                 /*
  767                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  768                  * set up the vm86 area, and we can't enter vm86 mode.
  769                  */
  770                 if (td->td_pcb->pcb_ext == 0)
  771                         return (EINVAL);
  772                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  773                 if (vm86->vm86_inited == 0)
  774                         return (EINVAL);
  775 
  776                 /* Go back to user mode if both flags are set. */
  777                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  778                         ksiginfo_init_trap(&ksi);
  779                         ksi.ksi_signo = SIGBUS;
  780                         ksi.ksi_code = BUS_OBJERR;
  781                         ksi.ksi_addr = (void *)regs->tf_eip;
  782                         trapsignal(td, &ksi);
  783                 }
  784 
  785                 if (vm86->vm86_has_vme) {
  786                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  787                             (eflags & VME_USERCHANGE) | PSL_VM;
  788                 } else {
  789                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  790                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  791                             (eflags & VM_USERCHANGE) | PSL_VM;
  792                 }
  793                 tf->tf_vm86_ds = scp->sc_ds;
  794                 tf->tf_vm86_es = scp->sc_es;
  795                 tf->tf_vm86_fs = scp->sc_fs;
  796                 tf->tf_vm86_gs = scp->sc_gs;
  797                 tf->tf_ds = _udatasel;
  798                 tf->tf_es = _udatasel;
  799                 tf->tf_fs = _udatasel;
  800         } else {
  801                 /*
  802                  * Don't allow users to change privileged or reserved flags.
  803                  */
  804                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  805                         return (EINVAL);
  806                 }
  807 
  808                 /*
  809                  * Don't allow users to load a valid privileged %cs.  Let the
  810                  * hardware check for invalid selectors, excess privilege in
  811                  * other selectors, invalid %eip's and invalid %esp's.
  812                  */
  813                 if (!CS_SECURE(scp->sc_cs)) {
  814                         ksiginfo_init_trap(&ksi);
  815                         ksi.ksi_signo = SIGBUS;
  816                         ksi.ksi_code = BUS_OBJERR;
  817                         ksi.ksi_trapno = T_PROTFLT;
  818                         ksi.ksi_addr = (void *)regs->tf_eip;
  819                         trapsignal(td, &ksi);
  820                         return (EINVAL);
  821                 }
  822                 regs->tf_ds = scp->sc_ds;
  823                 regs->tf_es = scp->sc_es;
  824                 regs->tf_fs = scp->sc_fs;
  825         }
  826 
  827         /* Restore remaining registers. */
  828         regs->tf_eax = scp->sc_eax;
  829         regs->tf_ebx = scp->sc_ebx;
  830         regs->tf_ecx = scp->sc_ecx;
  831         regs->tf_edx = scp->sc_edx;
  832         regs->tf_esi = scp->sc_esi;
  833         regs->tf_edi = scp->sc_edi;
  834         regs->tf_cs = scp->sc_cs;
  835         regs->tf_ss = scp->sc_ss;
  836         regs->tf_isp = scp->sc_isp;
  837         regs->tf_ebp = scp->sc_fp;
  838         regs->tf_esp = scp->sc_sp;
  839         regs->tf_eip = scp->sc_pc;
  840         regs->tf_eflags = eflags;
  841 
  842 #if defined(COMPAT_43)
  843         if (scp->sc_onstack & 1)
  844                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  845         else
  846                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  847 #endif
  848         kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
  849             SIGPROCMASK_OLD);
  850         return (EJUSTRETURN);
  851 }
  852 #endif /* COMPAT_43 */
  853 
  854 #ifdef COMPAT_FREEBSD4
  855 /*
  856  * MPSAFE
  857  */
  858 int
  859 freebsd4_sigreturn(td, uap)
  860         struct thread *td;
  861         struct freebsd4_sigreturn_args /* {
  862                 const ucontext4 *sigcntxp;
  863         } */ *uap;
  864 {
  865         struct ucontext4 uc;
  866         struct trapframe *regs;
  867         struct ucontext4 *ucp;
  868         int cs, eflags, error;
  869         ksiginfo_t ksi;
  870 
  871         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  872         if (error != 0)
  873                 return (error);
  874         ucp = &uc;
  875         regs = td->td_frame;
  876         eflags = ucp->uc_mcontext.mc_eflags;
  877         if (eflags & PSL_VM) {
  878                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  879                 struct vm86_kernel *vm86;
  880 
  881                 /*
  882                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  883                  * set up the vm86 area, and we can't enter vm86 mode.
  884                  */
  885                 if (td->td_pcb->pcb_ext == 0)
  886                         return (EINVAL);
  887                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  888                 if (vm86->vm86_inited == 0)
  889                         return (EINVAL);
  890 
  891                 /* Go back to user mode if both flags are set. */
  892                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  893                         ksiginfo_init_trap(&ksi);
  894                         ksi.ksi_signo = SIGBUS;
  895                         ksi.ksi_code = BUS_OBJERR;
  896                         ksi.ksi_addr = (void *)regs->tf_eip;
  897                         trapsignal(td, &ksi);
  898                 }
  899                 if (vm86->vm86_has_vme) {
  900                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  901                             (eflags & VME_USERCHANGE) | PSL_VM;
  902                 } else {
  903                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  904                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  905                             (eflags & VM_USERCHANGE) | PSL_VM;
  906                 }
  907                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  908                 tf->tf_eflags = eflags;
  909                 tf->tf_vm86_ds = tf->tf_ds;
  910                 tf->tf_vm86_es = tf->tf_es;
  911                 tf->tf_vm86_fs = tf->tf_fs;
  912                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  913                 tf->tf_ds = _udatasel;
  914                 tf->tf_es = _udatasel;
  915                 tf->tf_fs = _udatasel;
  916         } else {
  917                 /*
  918                  * Don't allow users to change privileged or reserved flags.
  919                  */
  920                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  921                         uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
  922                             td->td_proc->p_pid, td->td_name, eflags);
  923                         return (EINVAL);
  924                 }
  925 
  926                 /*
  927                  * Don't allow users to load a valid privileged %cs.  Let the
  928                  * hardware check for invalid selectors, excess privilege in
  929                  * other selectors, invalid %eip's and invalid %esp's.
  930                  */
  931                 cs = ucp->uc_mcontext.mc_cs;
  932                 if (!CS_SECURE(cs)) {
  933                         uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
  934                             td->td_proc->p_pid, td->td_name, cs);
  935                         ksiginfo_init_trap(&ksi);
  936                         ksi.ksi_signo = SIGBUS;
  937                         ksi.ksi_code = BUS_OBJERR;
  938                         ksi.ksi_trapno = T_PROTFLT;
  939                         ksi.ksi_addr = (void *)regs->tf_eip;
  940                         trapsignal(td, &ksi);
  941                         return (EINVAL);
  942                 }
  943 
  944                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  945         }
  946 
  947 #if defined(COMPAT_43)
  948         if (ucp->uc_mcontext.mc_onstack & 1)
  949                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  950         else
  951                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  952 #endif
  953         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  954         return (EJUSTRETURN);
  955 }
  956 #endif  /* COMPAT_FREEBSD4 */
  957 
  958 /*
  959  * MPSAFE
  960  */
  961 int
  962 sys_sigreturn(td, uap)
  963         struct thread *td;
  964         struct sigreturn_args /* {
  965                 const struct __ucontext *sigcntxp;
  966         } */ *uap;
  967 {
  968         ucontext_t uc;
  969         struct proc *p;
  970         struct trapframe *regs;
  971         ucontext_t *ucp;
  972         char *xfpustate;
  973         size_t xfpustate_len;
  974         int cs, eflags, error, ret;
  975         ksiginfo_t ksi;
  976 
  977         p = td->td_proc;
  978 
  979         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  980         if (error != 0)
  981                 return (error);
  982         ucp = &uc;
  983         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
  984                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
  985                     td->td_name, ucp->uc_mcontext.mc_flags);
  986                 return (EINVAL);
  987         }
  988         regs = td->td_frame;
  989         eflags = ucp->uc_mcontext.mc_eflags;
  990         if (eflags & PSL_VM) {
  991                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  992                 struct vm86_kernel *vm86;
  993 
  994                 /*
  995                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  996                  * set up the vm86 area, and we can't enter vm86 mode.
  997                  */
  998                 if (td->td_pcb->pcb_ext == 0)
  999                         return (EINVAL);
 1000                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 1001                 if (vm86->vm86_inited == 0)
 1002                         return (EINVAL);
 1003 
 1004                 /* Go back to user mode if both flags are set. */
 1005                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 1006                         ksiginfo_init_trap(&ksi);
 1007                         ksi.ksi_signo = SIGBUS;
 1008                         ksi.ksi_code = BUS_OBJERR;
 1009                         ksi.ksi_addr = (void *)regs->tf_eip;
 1010                         trapsignal(td, &ksi);
 1011                 }
 1012 
 1013                 if (vm86->vm86_has_vme) {
 1014                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 1015                             (eflags & VME_USERCHANGE) | PSL_VM;
 1016                 } else {
 1017                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
 1018                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 1019                             (eflags & VM_USERCHANGE) | PSL_VM;
 1020                 }
 1021                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 1022                 tf->tf_eflags = eflags;
 1023                 tf->tf_vm86_ds = tf->tf_ds;
 1024                 tf->tf_vm86_es = tf->tf_es;
 1025                 tf->tf_vm86_fs = tf->tf_fs;
 1026                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 1027                 tf->tf_ds = _udatasel;
 1028                 tf->tf_es = _udatasel;
 1029                 tf->tf_fs = _udatasel;
 1030         } else {
 1031                 /*
 1032                  * Don't allow users to change privileged or reserved flags.
 1033                  */
 1034                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 1035                         uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 1036                             td->td_proc->p_pid, td->td_name, eflags);
 1037                         return (EINVAL);
 1038                 }
 1039 
 1040                 /*
 1041                  * Don't allow users to load a valid privileged %cs.  Let the
 1042                  * hardware check for invalid selectors, excess privilege in
 1043                  * other selectors, invalid %eip's and invalid %esp's.
 1044                  */
 1045                 cs = ucp->uc_mcontext.mc_cs;
 1046                 if (!CS_SECURE(cs)) {
 1047                         uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 1048                             td->td_proc->p_pid, td->td_name, cs);
 1049                         ksiginfo_init_trap(&ksi);
 1050                         ksi.ksi_signo = SIGBUS;
 1051                         ksi.ksi_code = BUS_OBJERR;
 1052                         ksi.ksi_trapno = T_PROTFLT;
 1053                         ksi.ksi_addr = (void *)regs->tf_eip;
 1054                         trapsignal(td, &ksi);
 1055                         return (EINVAL);
 1056                 }
 1057 
 1058                 if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 1059                         xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 1060                         if (xfpustate_len > cpu_max_ext_state_size -
 1061                             sizeof(union savefpu)) {
 1062                                 uprintf(
 1063                             "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 1064                                     p->p_pid, td->td_name, xfpustate_len);
 1065                                 return (EINVAL);
 1066                         }
 1067                         xfpustate = __builtin_alloca(xfpustate_len);
 1068                         error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 1069                             xfpustate, xfpustate_len);
 1070                         if (error != 0) {
 1071                                 uprintf(
 1072         "pid %d (%s): sigreturn copying xfpustate failed\n",
 1073                                     p->p_pid, td->td_name);
 1074                                 return (error);
 1075                         }
 1076                 } else {
 1077                         xfpustate = NULL;
 1078                         xfpustate_len = 0;
 1079                 }
 1080                 ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 1081                     xfpustate_len);
 1082                 if (ret != 0)
 1083                         return (ret);
 1084                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 1085         }
 1086 
 1087 #if defined(COMPAT_43)
 1088         if (ucp->uc_mcontext.mc_onstack & 1)
 1089                 td->td_sigstk.ss_flags |= SS_ONSTACK;
 1090         else
 1091                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 1092 #endif
 1093 
 1094         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 1095         return (EJUSTRETURN);
 1096 }
 1097 
 1098 #ifdef COMPAT_43
 1099 static void
 1100 setup_priv_lcall_gate(struct proc *p)
 1101 {
 1102         struct i386_ldt_args uap;
 1103         union descriptor desc;
 1104         u_int lcall_addr;
 1105 
 1106         bzero(&uap, sizeof(uap));
 1107         uap.start = 0;
 1108         uap.num = 1;
 1109         lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
 1110         bzero(&desc, sizeof(desc));
 1111         desc.sd.sd_type = SDT_MEMERA;
 1112         desc.sd.sd_dpl = SEL_UPL;
 1113         desc.sd.sd_p = 1;
 1114         desc.sd.sd_def32 = 1;
 1115         desc.sd.sd_gran = 1;
 1116         desc.sd.sd_lolimit = 0xffff;
 1117         desc.sd.sd_hilimit = 0xf;
 1118         desc.sd.sd_lobase = lcall_addr;
 1119         desc.sd.sd_hibase = lcall_addr >> 24;
 1120         i386_set_ldt(curthread, &uap, &desc);
 1121 }
 1122 #endif
 1123 
 1124 /*
 1125  * Reset registers to default values on exec.
 1126  */
 1127 void
 1128 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 1129 {
 1130         struct trapframe *regs;
 1131         struct pcb *pcb;
 1132         register_t saved_eflags;
 1133 
 1134         regs = td->td_frame;
 1135         pcb = td->td_pcb;
 1136 
 1137         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1138         pcb->pcb_gs = _udatasel;
 1139         load_gs(_udatasel);
 1140 
 1141         mtx_lock_spin(&dt_lock);
 1142         if (td->td_proc->p_md.md_ldt != NULL)
 1143                 user_ldt_free(td);
 1144         else
 1145                 mtx_unlock_spin(&dt_lock);
 1146 
 1147 #ifdef COMPAT_43
 1148         if (td->td_proc->p_sysent->sv_psstrings !=
 1149             elf32_freebsd_sysvec.sv_psstrings)
 1150                 setup_priv_lcall_gate(td->td_proc);
 1151 #endif
 1152   
 1153         /*
 1154          * Reset the fs and gs bases.  The values from the old address
 1155          * space do not make sense for the new program.  In particular,
 1156          * gsbase might be the TLS base for the old program but the new
 1157          * program has no TLS now.
 1158          */
 1159         set_fsbase(td, 0);
 1160         set_gsbase(td, 0);
 1161 
 1162         /* Make sure edx is 0x0 on entry. Linux binaries depend on it. */
 1163         saved_eflags = regs->tf_eflags & PSL_T;
 1164         bzero((char *)regs, sizeof(struct trapframe));
 1165         regs->tf_eip = imgp->entry_addr;
 1166         regs->tf_esp = stack;
 1167         regs->tf_eflags = PSL_USER | saved_eflags;
 1168         regs->tf_ss = _udatasel;
 1169         regs->tf_ds = _udatasel;
 1170         regs->tf_es = _udatasel;
 1171         regs->tf_fs = _udatasel;
 1172         regs->tf_cs = _ucodesel;
 1173 
 1174         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1175         regs->tf_ebx = (register_t)imgp->ps_strings;
 1176 
 1177         /*
 1178          * Reset the hardware debug registers if they were in use.
 1179          * They won't have any meaning for the newly exec'd process.  
 1180          */
 1181         if (pcb->pcb_flags & PCB_DBREGS) {
 1182                 pcb->pcb_dr0 = 0;
 1183                 pcb->pcb_dr1 = 0;
 1184                 pcb->pcb_dr2 = 0;
 1185                 pcb->pcb_dr3 = 0;
 1186                 pcb->pcb_dr6 = 0;
 1187                 pcb->pcb_dr7 = 0;
 1188                 if (pcb == curpcb) {
 1189                         /*
 1190                          * Clear the debug registers on the running
 1191                          * CPU, otherwise they will end up affecting
 1192                          * the next process we switch to.
 1193                          */
 1194                         reset_dbregs();
 1195                 }
 1196                 pcb->pcb_flags &= ~PCB_DBREGS;
 1197         }
 1198 
 1199         pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 1200 
 1201         /*
 1202          * Drop the FP state if we hold it, so that the process gets a
 1203          * clean FP state if it uses the FPU again.
 1204          */
 1205         fpstate_drop(td);
 1206 }
 1207 
 1208 void
 1209 cpu_setregs(void)
 1210 {
 1211         unsigned int cr0;
 1212 
 1213         cr0 = rcr0();
 1214 
 1215         /*
 1216          * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 1217          *
 1218          * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 1219          * instructions.  We must set the CR0_MP bit and use the CR0_TS
 1220          * bit to control the trap, because setting the CR0_EM bit does
 1221          * not cause WAIT instructions to trap.  It's important to trap
 1222          * WAIT instructions - otherwise the "wait" variants of no-wait
 1223          * control instructions would degenerate to the "no-wait" variants
 1224          * after FP context switches but work correctly otherwise.  It's
 1225          * particularly important to trap WAITs when there is no NPX -
 1226          * otherwise the "wait" variants would always degenerate.
 1227          *
 1228          * Try setting CR0_NE to get correct error reporting on 486DX's.
 1229          * Setting it should fail or do nothing on lesser processors.
 1230          */
 1231         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 1232         load_cr0(cr0);
 1233         load_gs(_udatasel);
 1234 }
 1235 
 1236 u_long bootdev;         /* not a struct cdev *- encoding is different */
 1237 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1238         CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 1239 
 1240 static char bootmethod[16] = "BIOS";
 1241 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1242     "System firmware boot method");
 1243 
 1244 /*
 1245  * Initialize 386 and configure to run kernel
 1246  */
 1247 
 1248 /*
 1249  * Initialize segments & interrupt table
 1250  */
 1251 
 1252 int _default_ldt;
 1253 
 1254 struct mtx dt_lock;                     /* lock for GDT and LDT */
 1255 
 1256 union descriptor gdt0[NGDT];    /* initial global descriptor table */
 1257 union descriptor *gdt = gdt0;   /* global descriptor table */
 1258 
 1259 union descriptor *ldt;          /* local descriptor table */
 1260 
 1261 static struct gate_descriptor idt0[NIDT];
 1262 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1263 
 1264 static struct i386tss *dblfault_tss;
 1265 static char *dblfault_stack;
 1266 
 1267 static struct i386tss common_tss0;
 1268 
 1269 vm_offset_t proc0kstack;
 1270 
 1271 /*
 1272  * software prototypes -- in more palatable form.
 1273  *
 1274  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
 1275  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
 1276  */
 1277 struct soft_segment_descriptor gdt_segs[] = {
 1278 /* GNULL_SEL    0 Null Descriptor */
 1279 {       .ssd_base = 0x0,
 1280         .ssd_limit = 0x0,
 1281         .ssd_type = 0,
 1282         .ssd_dpl = SEL_KPL,
 1283         .ssd_p = 0,
 1284         .ssd_xx = 0, .ssd_xx1 = 0,
 1285         .ssd_def32 = 0,
 1286         .ssd_gran = 0           },
 1287 /* GPRIV_SEL    1 SMP Per-Processor Private Data Descriptor */
 1288 {       .ssd_base = 0x0,
 1289         .ssd_limit = 0xfffff,
 1290         .ssd_type = SDT_MEMRWA,
 1291         .ssd_dpl = SEL_KPL,
 1292         .ssd_p = 1,
 1293         .ssd_xx = 0, .ssd_xx1 = 0,
 1294         .ssd_def32 = 1,
 1295         .ssd_gran = 1           },
 1296 /* GUFS_SEL     2 %fs Descriptor for user */
 1297 {       .ssd_base = 0x0,
 1298         .ssd_limit = 0xfffff,
 1299         .ssd_type = SDT_MEMRWA,
 1300         .ssd_dpl = SEL_UPL,
 1301         .ssd_p = 1,
 1302         .ssd_xx = 0, .ssd_xx1 = 0,
 1303         .ssd_def32 = 1,
 1304         .ssd_gran = 1           },
 1305 /* GUGS_SEL     3 %gs Descriptor for user */
 1306 {       .ssd_base = 0x0,
 1307         .ssd_limit = 0xfffff,
 1308         .ssd_type = SDT_MEMRWA,
 1309         .ssd_dpl = SEL_UPL,
 1310         .ssd_p = 1,
 1311         .ssd_xx = 0, .ssd_xx1 = 0,
 1312         .ssd_def32 = 1,
 1313         .ssd_gran = 1           },
 1314 /* GCODE_SEL    4 Code Descriptor for kernel */
 1315 {       .ssd_base = 0x0,
 1316         .ssd_limit = 0xfffff,
 1317         .ssd_type = SDT_MEMERA,
 1318         .ssd_dpl = SEL_KPL,
 1319         .ssd_p = 1,
 1320         .ssd_xx = 0, .ssd_xx1 = 0,
 1321         .ssd_def32 = 1,
 1322         .ssd_gran = 1           },
 1323 /* GDATA_SEL    5 Data Descriptor for kernel */
 1324 {       .ssd_base = 0x0,
 1325         .ssd_limit = 0xfffff,
 1326         .ssd_type = SDT_MEMRWA,
 1327         .ssd_dpl = SEL_KPL,
 1328         .ssd_p = 1,
 1329         .ssd_xx = 0, .ssd_xx1 = 0,
 1330         .ssd_def32 = 1,
 1331         .ssd_gran = 1           },
 1332 /* GUCODE_SEL   6 Code Descriptor for user */
 1333 {       .ssd_base = 0x0,
 1334         .ssd_limit = 0xfffff,
 1335         .ssd_type = SDT_MEMERA,
 1336         .ssd_dpl = SEL_UPL,
 1337         .ssd_p = 1,
 1338         .ssd_xx = 0, .ssd_xx1 = 0,
 1339         .ssd_def32 = 1,
 1340         .ssd_gran = 1           },
 1341 /* GUDATA_SEL   7 Data Descriptor for user */
 1342 {       .ssd_base = 0x0,
 1343         .ssd_limit = 0xfffff,
 1344         .ssd_type = SDT_MEMRWA,
 1345         .ssd_dpl = SEL_UPL,
 1346         .ssd_p = 1,
 1347         .ssd_xx = 0, .ssd_xx1 = 0,
 1348         .ssd_def32 = 1,
 1349         .ssd_gran = 1           },
 1350 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1351 {       .ssd_base = 0x400,
 1352         .ssd_limit = 0xfffff,
 1353         .ssd_type = SDT_MEMRWA,
 1354         .ssd_dpl = SEL_KPL,
 1355         .ssd_p = 1,
 1356         .ssd_xx = 0, .ssd_xx1 = 0,
 1357         .ssd_def32 = 1,
 1358         .ssd_gran = 1           },
 1359 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
 1360 {
 1361         .ssd_base = 0x0,
 1362         .ssd_limit = sizeof(struct i386tss)-1,
 1363         .ssd_type = SDT_SYS386TSS,
 1364         .ssd_dpl = 0,
 1365         .ssd_p = 1,
 1366         .ssd_xx = 0, .ssd_xx1 = 0,
 1367         .ssd_def32 = 0,
 1368         .ssd_gran = 0           },
 1369 /* GLDT_SEL     10 LDT Descriptor */
 1370 {       .ssd_base = 0,
 1371         .ssd_limit = sizeof(union descriptor) * NLDT - 1,
 1372         .ssd_type = SDT_SYSLDT,
 1373         .ssd_dpl = SEL_UPL,
 1374         .ssd_p = 1,
 1375         .ssd_xx = 0, .ssd_xx1 = 0,
 1376         .ssd_def32 = 0,
 1377         .ssd_gran = 0           },
 1378 /* GUSERLDT_SEL 11 User LDT Descriptor per process */
 1379 {       .ssd_base = 0,
 1380         .ssd_limit = (512 * sizeof(union descriptor)-1),
 1381         .ssd_type = SDT_SYSLDT,
 1382         .ssd_dpl = 0,
 1383         .ssd_p = 1,
 1384         .ssd_xx = 0, .ssd_xx1 = 0,
 1385         .ssd_def32 = 0,
 1386         .ssd_gran = 0           },
 1387 /* GPANIC_SEL   12 Panic Tss Descriptor */
 1388 {       .ssd_base = 0,
 1389         .ssd_limit = sizeof(struct i386tss)-1,
 1390         .ssd_type = SDT_SYS386TSS,
 1391         .ssd_dpl = 0,
 1392         .ssd_p = 1,
 1393         .ssd_xx = 0, .ssd_xx1 = 0,
 1394         .ssd_def32 = 0,
 1395         .ssd_gran = 0           },
 1396 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 1397 {       .ssd_base = 0,
 1398         .ssd_limit = 0xfffff,
 1399         .ssd_type = SDT_MEMERA,
 1400         .ssd_dpl = 0,
 1401         .ssd_p = 1,
 1402         .ssd_xx = 0, .ssd_xx1 = 0,
 1403         .ssd_def32 = 0,
 1404         .ssd_gran = 1           },
 1405 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 1406 {       .ssd_base = 0,
 1407         .ssd_limit = 0xfffff,
 1408         .ssd_type = SDT_MEMERA,
 1409         .ssd_dpl = 0,
 1410         .ssd_p = 1,
 1411         .ssd_xx = 0, .ssd_xx1 = 0,
 1412         .ssd_def32 = 0,
 1413         .ssd_gran = 1           },
 1414 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 1415 {       .ssd_base = 0,
 1416         .ssd_limit = 0xfffff,
 1417         .ssd_type = SDT_MEMRWA,
 1418         .ssd_dpl = 0,
 1419         .ssd_p = 1,
 1420         .ssd_xx = 0, .ssd_xx1 = 0,
 1421         .ssd_def32 = 1,
 1422         .ssd_gran = 1           },
 1423 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 1424 {       .ssd_base = 0,
 1425         .ssd_limit = 0xfffff,
 1426         .ssd_type = SDT_MEMRWA,
 1427         .ssd_dpl = 0,
 1428         .ssd_p = 1,
 1429         .ssd_xx = 0, .ssd_xx1 = 0,
 1430         .ssd_def32 = 0,
 1431         .ssd_gran = 1           },
 1432 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 1433 {       .ssd_base = 0,
 1434         .ssd_limit = 0xfffff,
 1435         .ssd_type = SDT_MEMRWA,
 1436         .ssd_dpl = 0,
 1437         .ssd_p = 1,
 1438         .ssd_xx = 0, .ssd_xx1 = 0,
 1439         .ssd_def32 = 0,
 1440         .ssd_gran = 1           },
 1441 /* GNDIS_SEL    18 NDIS Descriptor */
 1442 {       .ssd_base = 0x0,
 1443         .ssd_limit = 0x0,
 1444         .ssd_type = 0,
 1445         .ssd_dpl = 0,
 1446         .ssd_p = 0,
 1447         .ssd_xx = 0, .ssd_xx1 = 0,
 1448         .ssd_def32 = 0,
 1449         .ssd_gran = 0           },
 1450 };
 1451 
 1452 static struct soft_segment_descriptor ldt_segs[] = {
 1453         /* Null Descriptor - overwritten by call gate */
 1454 {       .ssd_base = 0x0,
 1455         .ssd_limit = 0x0,
 1456         .ssd_type = 0,
 1457         .ssd_dpl = 0,
 1458         .ssd_p = 0,
 1459         .ssd_xx = 0, .ssd_xx1 = 0,
 1460         .ssd_def32 = 0,
 1461         .ssd_gran = 0           },
 1462         /* Null Descriptor - overwritten by call gate */
 1463 {       .ssd_base = 0x0,
 1464         .ssd_limit = 0x0,
 1465         .ssd_type = 0,
 1466         .ssd_dpl = 0,
 1467         .ssd_p = 0,
 1468         .ssd_xx = 0, .ssd_xx1 = 0,
 1469         .ssd_def32 = 0,
 1470         .ssd_gran = 0           },
 1471         /* Null Descriptor - overwritten by call gate */
 1472 {       .ssd_base = 0x0,
 1473         .ssd_limit = 0x0,
 1474         .ssd_type = 0,
 1475         .ssd_dpl = 0,
 1476         .ssd_p = 0,
 1477         .ssd_xx = 0, .ssd_xx1 = 0,
 1478         .ssd_def32 = 0,
 1479         .ssd_gran = 0           },
 1480         /* Code Descriptor for user */
 1481 {       .ssd_base = 0x0,
 1482         .ssd_limit = 0xfffff,
 1483         .ssd_type = SDT_MEMERA,
 1484         .ssd_dpl = SEL_UPL,
 1485         .ssd_p = 1,
 1486         .ssd_xx = 0, .ssd_xx1 = 0,
 1487         .ssd_def32 = 1,
 1488         .ssd_gran = 1           },
 1489         /* Null Descriptor - overwritten by call gate */
 1490 {       .ssd_base = 0x0,
 1491         .ssd_limit = 0x0,
 1492         .ssd_type = 0,
 1493         .ssd_dpl = 0,
 1494         .ssd_p = 0,
 1495         .ssd_xx = 0, .ssd_xx1 = 0,
 1496         .ssd_def32 = 0,
 1497         .ssd_gran = 0           },
 1498         /* Data Descriptor for user */
 1499 {       .ssd_base = 0x0,
 1500         .ssd_limit = 0xfffff,
 1501         .ssd_type = SDT_MEMRWA,
 1502         .ssd_dpl = SEL_UPL,
 1503         .ssd_p = 1,
 1504         .ssd_xx = 0, .ssd_xx1 = 0,
 1505         .ssd_def32 = 1,
 1506         .ssd_gran = 1           },
 1507 };
 1508 
 1509 uintptr_t setidt_disp;
 1510 
 1511 void
 1512 setidt(int idx, inthand_t *func, int typ, int dpl, int selec)
 1513 {
 1514         uintptr_t off;
 1515 
 1516         off = func != NULL ? (uintptr_t)func + setidt_disp : 0;
 1517         setidt_nodisp(idx, off, typ, dpl, selec);
 1518 }
 1519 
 1520 void
 1521 setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec)
 1522 {
 1523         struct gate_descriptor *ip;
 1524 
 1525         ip = idt + idx;
 1526         ip->gd_looffset = off;
 1527         ip->gd_selector = selec;
 1528         ip->gd_stkcpy = 0;
 1529         ip->gd_xx = 0;
 1530         ip->gd_type = typ;
 1531         ip->gd_dpl = dpl;
 1532         ip->gd_p = 1;
 1533         ip->gd_hioffset = ((u_int)off) >> 16 ;
 1534 }
 1535 
 1536 extern inthand_t
 1537         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1538         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1539         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1540         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1541         IDTVEC(xmm),
 1542 #ifdef KDTRACE_HOOKS
 1543         IDTVEC(dtrace_ret),
 1544 #endif
 1545 #ifdef XENHVM
 1546         IDTVEC(xen_intr_upcall),
 1547 #endif
 1548         IDTVEC(int0x80_syscall);
 1549 
 1550 #ifdef DDB
 1551 /*
 1552  * Display the index and function name of any IDT entries that don't use
 1553  * the default 'rsvd' entry point.
 1554  */
 1555 DB_SHOW_COMMAND(idt, db_show_idt)
 1556 {
 1557         struct gate_descriptor *ip;
 1558         int idx;
 1559         uintptr_t func, func_trm;
 1560         bool trm;
 1561 
 1562         ip = idt;
 1563         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 1564                 if (ip->gd_type == SDT_SYSTASKGT) {
 1565                         db_printf("%3d\t<TASK>\n", idx);
 1566                 } else {
 1567                         func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 1568                         if (func >= PMAP_TRM_MIN_ADDRESS) {
 1569                                 func_trm = func;
 1570                                 func -= setidt_disp;
 1571                                 trm = true;
 1572                         } else
 1573                                 trm = false;
 1574                         if (func != (uintptr_t)&IDTVEC(rsvd)) {
 1575                                 db_printf("%3d\t", idx);
 1576                                 db_printsym(func, DB_STGY_PROC);
 1577                                 if (trm)
 1578                                         db_printf(" (trampoline %#x)",
 1579                                             func_trm);
 1580                                 db_printf("\n");
 1581                         }
 1582                 }
 1583                 ip++;
 1584         }
 1585 }
 1586 
 1587 /* Show privileged registers. */
 1588 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 1589 {
 1590         uint64_t idtr, gdtr;
 1591 
 1592         idtr = ridt();
 1593         db_printf("idtr\t0x%08x/%04x\n",
 1594             (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 1595         gdtr = rgdt();
 1596         db_printf("gdtr\t0x%08x/%04x\n",
 1597             (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 1598         db_printf("ldtr\t0x%04x\n", rldt());
 1599         db_printf("tr\t0x%04x\n", rtr());
 1600         db_printf("cr0\t0x%08x\n", rcr0());
 1601         db_printf("cr2\t0x%08x\n", rcr2());
 1602         db_printf("cr3\t0x%08x\n", rcr3());
 1603         db_printf("cr4\t0x%08x\n", rcr4());
 1604         if (rcr4() & CR4_XSAVE)
 1605                 db_printf("xcr0\t0x%016llx\n", rxcr(0));
 1606         if (amd_feature & (AMDID_NX | AMDID_LM))
 1607                 db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
 1608         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 1609                 db_printf("FEATURES_CTL\t0x%016llx\n",
 1610                     rdmsr(MSR_IA32_FEATURE_CONTROL));
 1611         if (((cpu_vendor_id == CPU_VENDOR_INTEL ||
 1612             cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) ||
 1613             cpu_vendor_id == CPU_VENDOR_HYGON)
 1614                 db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
 1615         if (cpu_feature & CPUID_PAT)
 1616                 db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 1617 }
 1618 
 1619 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 1620 {
 1621 
 1622         db_printf("dr0\t0x%08x\n", rdr0());
 1623         db_printf("dr1\t0x%08x\n", rdr1());
 1624         db_printf("dr2\t0x%08x\n", rdr2());
 1625         db_printf("dr3\t0x%08x\n", rdr3());
 1626         db_printf("dr6\t0x%08x\n", rdr6());
 1627         db_printf("dr7\t0x%08x\n", rdr7());     
 1628 }
 1629 
 1630 DB_SHOW_COMMAND(frame, db_show_frame)
 1631 {
 1632         struct trapframe *frame;
 1633 
 1634         frame = have_addr ? (struct trapframe *)addr : curthread->td_frame;
 1635         printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n",
 1636             frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs,
 1637             frame->tf_eip);
 1638         printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno);
 1639         printf("ds %#x es %#x fs %#x\n",
 1640             frame->tf_ds, frame->tf_es, frame->tf_fs);
 1641         printf("eax %#x ecx %#x edx %#x ebx %#x\n",
 1642             frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx);
 1643         printf("ebp %#x esi %#x edi %#x\n",
 1644             frame->tf_ebp, frame->tf_esi, frame->tf_edi);
 1645 
 1646 }
 1647 #endif
 1648 
 1649 void
 1650 sdtossd(sd, ssd)
 1651         struct segment_descriptor *sd;
 1652         struct soft_segment_descriptor *ssd;
 1653 {
 1654         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1655         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1656         ssd->ssd_type  = sd->sd_type;
 1657         ssd->ssd_dpl   = sd->sd_dpl;
 1658         ssd->ssd_p     = sd->sd_p;
 1659         ssd->ssd_def32 = sd->sd_def32;
 1660         ssd->ssd_gran  = sd->sd_gran;
 1661 }
 1662 
 1663 static int
 1664 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
 1665     int *physmap_idxp)
 1666 {
 1667         uint64_t lim, ign;
 1668         int i, insert_idx, physmap_idx;
 1669 
 1670         physmap_idx = *physmap_idxp;
 1671 
 1672         if (length == 0)
 1673                 return (1);
 1674 
 1675         lim = 0x100000000;                                      /*  4G */
 1676         if (pae_mode && above4g_allow)
 1677                 lim = above24g_allow ? -1ULL : 0x600000000;     /* 24G */
 1678         if (base >= lim) {
 1679                 printf("%uK of memory above %uGB ignored, pae %d "
 1680                     "above4g_allow %d above24g_allow %d\n",
 1681                     (u_int)(length / 1024), (u_int)(lim >> 30), pae_mode,
 1682                     above4g_allow, above24g_allow);
 1683                 return (1);
 1684         }
 1685         if (base + length >= lim) {
 1686                 ign = base + length - lim;
 1687                 length -= ign;
 1688                 printf("%uK of memory above %uGB ignored, pae %d "
 1689                     "above4g_allow %d above24g_allow %d\n",
 1690                     (u_int)(ign / 1024), (u_int)(lim >> 30), pae_mode,
 1691                     above4g_allow, above24g_allow);
 1692         }
 1693 
 1694         /*
 1695          * Find insertion point while checking for overlap.  Start off by
 1696          * assuming the new entry will be added to the end.
 1697          */
 1698         insert_idx = physmap_idx + 2;
 1699         for (i = 0; i <= physmap_idx; i += 2) {
 1700                 if (base < physmap[i + 1]) {
 1701                         if (base + length <= physmap[i]) {
 1702                                 insert_idx = i;
 1703                                 break;
 1704                         }
 1705                         if (boothowto & RB_VERBOSE)
 1706                                 printf(
 1707                     "Overlapping memory regions, ignoring second region\n");
 1708                         return (1);
 1709                 }
 1710         }
 1711 
 1712         /* See if we can prepend to the next entry. */
 1713         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1714                 physmap[insert_idx] = base;
 1715                 return (1);
 1716         }
 1717 
 1718         /* See if we can append to the previous entry. */
 1719         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1720                 physmap[insert_idx - 1] += length;
 1721                 return (1);
 1722         }
 1723 
 1724         physmap_idx += 2;
 1725         *physmap_idxp = physmap_idx;
 1726         if (physmap_idx == PHYS_AVAIL_ENTRIES) {
 1727                 printf(
 1728                 "Too many segments in the physical address map, giving up\n");
 1729                 return (0);
 1730         }
 1731 
 1732         /*
 1733          * Move the last 'N' entries down to make room for the new
 1734          * entry if needed.
 1735          */
 1736         for (i = physmap_idx; i > insert_idx; i -= 2) {
 1737                 physmap[i] = physmap[i - 2];
 1738                 physmap[i + 1] = physmap[i - 1];
 1739         }
 1740 
 1741         /* Insert the new entry. */
 1742         physmap[insert_idx] = base;
 1743         physmap[insert_idx + 1] = base + length;
 1744         return (1);
 1745 }
 1746 
 1747 static int
 1748 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 1749 {
 1750         if (boothowto & RB_VERBOSE)
 1751                 printf("SMAP type=%02x base=%016llx len=%016llx\n",
 1752                     smap->type, smap->base, smap->length);
 1753 
 1754         if (smap->type != SMAP_TYPE_MEMORY)
 1755                 return (1);
 1756 
 1757         return (add_physmap_entry(smap->base, smap->length, physmap,
 1758             physmap_idxp));
 1759 }
 1760 
 1761 static void
 1762 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
 1763     int *physmap_idxp)
 1764 {
 1765         struct bios_smap *smap, *smapend;
 1766         u_int32_t smapsize;
 1767         /*
 1768          * Memory map from INT 15:E820.
 1769          *
 1770          * subr_module.c says:
 1771          * "Consumer may safely assume that size value precedes data."
 1772          * ie: an int32_t immediately precedes SMAP.
 1773          */
 1774         smapsize = *((u_int32_t *)smapbase - 1);
 1775         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1776 
 1777         for (smap = smapbase; smap < smapend; smap++)
 1778                 if (!add_smap_entry(smap, physmap, physmap_idxp))
 1779                         break;
 1780 }
 1781 
 1782 static void
 1783 basemem_setup(void)
 1784 {
 1785 
 1786         if (basemem > 640) {
 1787                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1788                         basemem);
 1789                 basemem = 640;
 1790         }
 1791 
 1792         pmap_basemem_setup(basemem);
 1793 }
 1794 
 1795 /*
 1796  * Populate the (physmap) array with base/bound pairs describing the
 1797  * available physical memory in the system, then test this memory and
 1798  * build the phys_avail array describing the actually-available memory.
 1799  *
 1800  * If we cannot accurately determine the physical memory map, then use
 1801  * value from the 0xE801 call, and failing that, the RTC.
 1802  *
 1803  * Total memory size may be set by the kernel environment variable
 1804  * hw.physmem or the compile-time define MAXMEM.
 1805  *
 1806  * XXX first should be vm_paddr_t.
 1807  */
 1808 static void
 1809 getmemsize(int first)
 1810 {
 1811         int has_smap, off, physmap_idx, pa_indx, da_indx;
 1812         u_long memtest;
 1813         vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
 1814         quad_t dcons_addr, dcons_size, physmem_tunable;
 1815         int hasbrokenint12, i, res;
 1816         u_int extmem;
 1817         struct vm86frame vmf;
 1818         struct vm86context vmc;
 1819         vm_paddr_t pa;
 1820         struct bios_smap *smap, *smapbase;
 1821         caddr_t kmdp;
 1822 
 1823         has_smap = 0;
 1824         bzero(&vmf, sizeof(vmf));
 1825         bzero(physmap, sizeof(physmap));
 1826         basemem = 0;
 1827 
 1828         /*
 1829          * Tell the physical memory allocator about pages used to store
 1830          * the kernel and preloaded data.  See kmem_bootstrap_free().
 1831          */
 1832         vm_phys_early_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
 1833 
 1834         TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow);
 1835         TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow);
 1836 
 1837         /*
 1838          * Check if the loader supplied an SMAP memory map.  If so,
 1839          * use that and do not make any VM86 calls.
 1840          */
 1841         physmap_idx = 0;
 1842         kmdp = preload_search_by_type("elf kernel");
 1843         if (kmdp == NULL)
 1844                 kmdp = preload_search_by_type("elf32 kernel");
 1845         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 1846             MODINFO_METADATA | MODINFOMD_SMAP);
 1847         if (smapbase != NULL) {
 1848                 add_smap_entries(smapbase, physmap, &physmap_idx);
 1849                 has_smap = 1;
 1850                 goto have_smap;
 1851         }
 1852 
 1853         /*
 1854          * Some newer BIOSes have a broken INT 12H implementation
 1855          * which causes a kernel panic immediately.  In this case, we
 1856          * need use the SMAP to determine the base memory size.
 1857          */
 1858         hasbrokenint12 = 0;
 1859         TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 1860         if (hasbrokenint12 == 0) {
 1861                 /* Use INT12 to determine base memory size. */
 1862                 vm86_intcall(0x12, &vmf);
 1863                 basemem = vmf.vmf_ax;
 1864                 basemem_setup();
 1865         }
 1866 
 1867         /*
 1868          * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 1869          * the kernel page table so we can use it as a buffer.  The
 1870          * kernel will unmap this page later.
 1871          */
 1872         vmc.npages = 0;
 1873         smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1));
 1874         res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 1875         KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 1876 
 1877         vmf.vmf_ebx = 0;
 1878         do {
 1879                 vmf.vmf_eax = 0xE820;
 1880                 vmf.vmf_edx = SMAP_SIG;
 1881                 vmf.vmf_ecx = sizeof(struct bios_smap);
 1882                 i = vm86_datacall(0x15, &vmf, &vmc);
 1883                 if (i || vmf.vmf_eax != SMAP_SIG)
 1884                         break;
 1885                 has_smap = 1;
 1886                 if (!add_smap_entry(smap, physmap, &physmap_idx))
 1887                         break;
 1888         } while (vmf.vmf_ebx != 0);
 1889 
 1890 have_smap:
 1891         /*
 1892          * If we didn't fetch the "base memory" size from INT12,
 1893          * figure it out from the SMAP (or just guess).
 1894          */
 1895         if (basemem == 0) {
 1896                 for (i = 0; i <= physmap_idx; i += 2) {
 1897                         if (physmap[i] == 0x00000000) {
 1898                                 basemem = physmap[i + 1] / 1024;
 1899                                 break;
 1900                         }
 1901                 }
 1902 
 1903                 /* XXX: If we couldn't find basemem from SMAP, just guess. */
 1904                 if (basemem == 0)
 1905                         basemem = 640;
 1906                 basemem_setup();
 1907         }
 1908 
 1909         if (physmap[1] != 0)
 1910                 goto physmap_done;
 1911 
 1912         /*
 1913          * If we failed to find an SMAP, figure out the extended
 1914          * memory size.  We will then build a simple memory map with
 1915          * two segments, one for "base memory" and the second for
 1916          * "extended memory".  Note that "extended memory" starts at a
 1917          * physical address of 1MB and that both basemem and extmem
 1918          * are in units of 1KB.
 1919          *
 1920          * First, try to fetch the extended memory size via INT 15:E801.
 1921          */
 1922         vmf.vmf_ax = 0xE801;
 1923         if (vm86_intcall(0x15, &vmf) == 0) {
 1924                 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 1925         } else {
 1926                 /*
 1927                  * If INT15:E801 fails, this is our last ditch effort
 1928                  * to determine the extended memory size.  Currently
 1929                  * we prefer the RTC value over INT15:88.
 1930                  */
 1931 #if 0
 1932                 vmf.vmf_ah = 0x88;
 1933                 vm86_intcall(0x15, &vmf);
 1934                 extmem = vmf.vmf_ax;
 1935 #else
 1936                 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 1937 #endif
 1938         }
 1939 
 1940         /*
 1941          * Special hack for chipsets that still remap the 384k hole when
 1942          * there's 16MB of memory - this really confuses people that
 1943          * are trying to use bus mastering ISA controllers with the
 1944          * "16MB limit"; they only have 16MB, but the remapping puts
 1945          * them beyond the limit.
 1946          *
 1947          * If extended memory is between 15-16MB (16-17MB phys address range),
 1948          *      chop it to 15MB.
 1949          */
 1950         if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 1951                 extmem = 15 * 1024;
 1952 
 1953         physmap[0] = 0;
 1954         physmap[1] = basemem * 1024;
 1955         physmap_idx = 2;
 1956         physmap[physmap_idx] = 0x100000;
 1957         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1958 
 1959 physmap_done:
 1960         /*
 1961          * Now, physmap contains a map of physical memory.
 1962          */
 1963 
 1964 #ifdef SMP
 1965         /* make hole for AP bootstrap code */
 1966         alloc_ap_trampoline(physmap, &physmap_idx);
 1967 #endif
 1968 
 1969         /*
 1970          * Maxmem isn't the "maximum memory", it's one larger than the
 1971          * highest page of the physical address space.  It should be
 1972          * called something like "Maxphyspage".  We may adjust this 
 1973          * based on ``hw.physmem'' and the results of the memory test.
 1974          *
 1975          * This is especially confusing when it is much larger than the
 1976          * memory size and is displayed as "realmem".
 1977          */
 1978         Maxmem = atop(physmap[physmap_idx + 1]);
 1979 
 1980 #ifdef MAXMEM
 1981         Maxmem = MAXMEM / 4;
 1982 #endif
 1983 
 1984         if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 1985                 Maxmem = atop(physmem_tunable);
 1986 
 1987         /*
 1988          * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 1989          * the amount of memory in the system.
 1990          */
 1991         if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 1992                 Maxmem = atop(physmap[physmap_idx + 1]);
 1993 
 1994         /*
 1995          * The boot memory test is disabled by default, as it takes a
 1996          * significant amount of time on large-memory systems, and is
 1997          * unfriendly to virtual machines as it unnecessarily touches all
 1998          * pages.
 1999          *
 2000          * A general name is used as the code may be extended to support
 2001          * additional tests beyond the current "page present" test.
 2002          */
 2003         memtest = 0;
 2004         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 2005 
 2006         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 2007             (boothowto & RB_VERBOSE))
 2008                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 2009 
 2010         /*
 2011          * If Maxmem has been increased beyond what the system has detected,
 2012          * extend the last memory segment to the new limit.
 2013          */ 
 2014         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 2015                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 2016 
 2017         /* call pmap initialization to make new kernel address space */
 2018         pmap_bootstrap(first);
 2019 
 2020         /*
 2021          * Size up each available chunk of physical memory.
 2022          */
 2023         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 2024         pa_indx = 0;
 2025         da_indx = 1;
 2026         phys_avail[pa_indx++] = physmap[0];
 2027         phys_avail[pa_indx] = physmap[0];
 2028         dump_avail[da_indx] = physmap[0];
 2029 
 2030         /*
 2031          * Get dcons buffer address
 2032          */
 2033         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 2034             getenv_quad("dcons.size", &dcons_size) == 0)
 2035                 dcons_addr = 0;
 2036 
 2037         /*
 2038          * physmap is in bytes, so when converting to page boundaries,
 2039          * round up the start address and round down the end address.
 2040          */
 2041         for (i = 0; i <= physmap_idx; i += 2) {
 2042                 vm_paddr_t end;
 2043 
 2044                 end = ptoa((vm_paddr_t)Maxmem);
 2045                 if (physmap[i + 1] < end)
 2046                         end = trunc_page(physmap[i + 1]);
 2047                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 2048                         int tmp, page_bad, full;
 2049                         int *ptr;
 2050 
 2051                         full = FALSE;
 2052                         /*
 2053                          * block out kernel memory as not available.
 2054                          */
 2055                         if (pa >= KERNLOAD && pa < first)
 2056                                 goto do_dump_avail;
 2057 
 2058                         /*
 2059                          * block out dcons buffer
 2060                          */
 2061                         if (dcons_addr > 0
 2062                             && pa >= trunc_page(dcons_addr)
 2063                             && pa < dcons_addr + dcons_size)
 2064                                 goto do_dump_avail;
 2065 
 2066                         page_bad = FALSE;
 2067                         if (memtest == 0)
 2068                                 goto skip_memtest;
 2069 
 2070                         /*
 2071                          * map page into kernel: valid, read/write,non-cacheable
 2072                          */
 2073                         ptr = (int *)pmap_cmap3(pa, PG_V | PG_RW | PG_N);
 2074 
 2075                         tmp = *(int *)ptr;
 2076                         /*
 2077                          * Test for alternating 1's and 0's
 2078                          */
 2079                         *(volatile int *)ptr = 0xaaaaaaaa;
 2080                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 2081                                 page_bad = TRUE;
 2082                         /*
 2083                          * Test for alternating 0's and 1's
 2084                          */
 2085                         *(volatile int *)ptr = 0x55555555;
 2086                         if (*(volatile int *)ptr != 0x55555555)
 2087                                 page_bad = TRUE;
 2088                         /*
 2089                          * Test for all 1's
 2090                          */
 2091                         *(volatile int *)ptr = 0xffffffff;
 2092                         if (*(volatile int *)ptr != 0xffffffff)
 2093                                 page_bad = TRUE;
 2094                         /*
 2095                          * Test for all 0's
 2096                          */
 2097                         *(volatile int *)ptr = 0x0;
 2098                         if (*(volatile int *)ptr != 0x0)
 2099                                 page_bad = TRUE;
 2100                         /*
 2101                          * Restore original value.
 2102                          */
 2103                         *(int *)ptr = tmp;
 2104 
 2105 skip_memtest:
 2106                         /*
 2107                          * Adjust array of valid/good pages.
 2108                          */
 2109                         if (page_bad == TRUE)
 2110                                 continue;
 2111                         /*
 2112                          * If this good page is a continuation of the
 2113                          * previous set of good pages, then just increase
 2114                          * the end pointer. Otherwise start a new chunk.
 2115                          * Note that "end" points one higher than end,
 2116                          * making the range >= start and < end.
 2117                          * If we're also doing a speculative memory
 2118                          * test and we at or past the end, bump up Maxmem
 2119                          * so that we keep going. The first bad page
 2120                          * will terminate the loop.
 2121                          */
 2122                         if (phys_avail[pa_indx] == pa) {
 2123                                 phys_avail[pa_indx] += PAGE_SIZE;
 2124                         } else {
 2125                                 pa_indx++;
 2126                                 if (pa_indx == PHYS_AVAIL_ENTRIES) {
 2127                                         printf(
 2128                 "Too many holes in the physical address space, giving up\n");
 2129                                         pa_indx--;
 2130                                         full = TRUE;
 2131                                         goto do_dump_avail;
 2132                                 }
 2133                                 phys_avail[pa_indx++] = pa;     /* start */
 2134                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 2135                         }
 2136                         physmem++;
 2137 do_dump_avail:
 2138                         if (dump_avail[da_indx] == pa) {
 2139                                 dump_avail[da_indx] += PAGE_SIZE;
 2140                         } else {
 2141                                 da_indx++;
 2142                                 if (da_indx == PHYS_AVAIL_ENTRIES) {
 2143                                         da_indx--;
 2144                                         goto do_next;
 2145                                 }
 2146                                 dump_avail[da_indx++] = pa;     /* start */
 2147                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 2148                         }
 2149 do_next:
 2150                         if (full)
 2151                                 break;
 2152                 }
 2153         }
 2154         pmap_cmap3(0, 0);
 2155 
 2156         /*
 2157          * XXX
 2158          * The last chunk must contain at least one page plus the message
 2159          * buffer to avoid complicating other code (message buffer address
 2160          * calculation, etc.).
 2161          */
 2162         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 2163             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 2164                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 2165                 phys_avail[pa_indx--] = 0;
 2166                 phys_avail[pa_indx--] = 0;
 2167         }
 2168 
 2169         Maxmem = atop(phys_avail[pa_indx]);
 2170 
 2171         /* Trim off space for the message buffer. */
 2172         phys_avail[pa_indx] -= round_page(msgbufsize);
 2173 
 2174         /* Map the message buffer. */
 2175         for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 2176                 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 2177                     off);
 2178 }
 2179 
 2180 static void
 2181 i386_kdb_init(void)
 2182 {
 2183 #ifdef DDB
 2184         db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab, 0);
 2185 #endif
 2186         kdb_init();
 2187 #ifdef KDB
 2188         if (boothowto & RB_KDB)
 2189                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 2190 #endif
 2191 }
 2192 
 2193 static void
 2194 fixup_idt(void)
 2195 {
 2196         struct gate_descriptor *ip;
 2197         uintptr_t off;
 2198         int x;
 2199 
 2200         for (x = 0; x < NIDT; x++) {
 2201                 ip = &idt[x];
 2202                 if (ip->gd_type != SDT_SYS386IGT &&
 2203                     ip->gd_type != SDT_SYS386TGT)
 2204                         continue;
 2205                 off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16);
 2206                 KASSERT(off >= (uintptr_t)start_exceptions &&
 2207                     off < (uintptr_t)end_exceptions,
 2208                     ("IDT[%d] type %d off %#x", x, ip->gd_type, off));
 2209                 off += setidt_disp;
 2210                 MPASS(off >= PMAP_TRM_MIN_ADDRESS &&
 2211                     off < PMAP_TRM_MAX_ADDRESS);
 2212                 ip->gd_looffset = off;
 2213                 ip->gd_hioffset = off >> 16;
 2214         }
 2215 }
 2216 
 2217 static void
 2218 i386_setidt1(void)
 2219 {
 2220         int x;
 2221 
 2222         /* exceptions */
 2223         for (x = 0; x < NIDT; x++)
 2224                 setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
 2225                     GSEL(GCODE_SEL, SEL_KPL));
 2226         setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL,
 2227             GSEL(GCODE_SEL, SEL_KPL));
 2228         setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
 2229             GSEL(GCODE_SEL, SEL_KPL));
 2230         setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
 2231             GSEL(GCODE_SEL, SEL_KPL));
 2232         setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
 2233             GSEL(GCODE_SEL, SEL_KPL));
 2234         setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL,
 2235             GSEL(GCODE_SEL, SEL_KPL));
 2236         setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL,
 2237             GSEL(GCODE_SEL, SEL_KPL));
 2238         setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 2239             GSEL(GCODE_SEL, SEL_KPL));
 2240         setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL,
 2241             GSEL(GCODE_SEL, SEL_KPL));
 2242         setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL,
 2243             SEL_KPL));
 2244         setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT,
 2245             SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2246         setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL,
 2247             GSEL(GCODE_SEL, SEL_KPL));
 2248         setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL,
 2249             GSEL(GCODE_SEL, SEL_KPL));
 2250         setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL,
 2251             GSEL(GCODE_SEL, SEL_KPL));
 2252         setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 2253             GSEL(GCODE_SEL, SEL_KPL));
 2254         setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
 2255             GSEL(GCODE_SEL, SEL_KPL));
 2256         setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386IGT, SEL_KPL,
 2257             GSEL(GCODE_SEL, SEL_KPL));
 2258         setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
 2259             GSEL(GCODE_SEL, SEL_KPL));
 2260         setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL,
 2261             GSEL(GCODE_SEL, SEL_KPL));
 2262         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
 2263             GSEL(GCODE_SEL, SEL_KPL));
 2264         setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall),
 2265             SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 2266 #ifdef KDTRACE_HOOKS
 2267         setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret),
 2268             SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 2269 #endif
 2270 #ifdef XENHVM
 2271         setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall),
 2272             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2273 #endif
 2274 }
 2275 
 2276 static void
 2277 i386_setidt2(void)
 2278 {
 2279 
 2280         setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 2281             GSEL(GCODE_SEL, SEL_KPL));
 2282         setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 2283             GSEL(GCODE_SEL, SEL_KPL));
 2284 }
 2285 
 2286 #if defined(DEV_ISA) && !defined(DEV_ATPIC)
 2287 static void
 2288 i386_setidt3(void)
 2289 {
 2290 
 2291         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint),
 2292             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2293         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint),
 2294             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2295 }
 2296 #endif
 2297 
 2298 register_t
 2299 init386(int first)
 2300 {
 2301         struct region_descriptor r_gdt, r_idt;  /* table descriptors */
 2302         int gsel_tss, metadata_missing, x, pa;
 2303         struct pcpu *pc;
 2304         struct xstate_hdr *xhdr;
 2305         caddr_t kmdp;
 2306         vm_offset_t addend;
 2307         size_t ucode_len;
 2308         int late_console;
 2309 
 2310         thread0.td_kstack = proc0kstack;
 2311         thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 2312 
 2313         /*
 2314          * This may be done better later if it gets more high level
 2315          * components in it. If so just link td->td_proc here.
 2316          */
 2317         proc_linkup0(&proc0, &thread0);
 2318 
 2319         if (bootinfo.bi_modulep) {
 2320                 metadata_missing = 0;
 2321                 addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ?
 2322                     PMAP_MAP_LOW : 0;
 2323                 preload_metadata = (caddr_t)bootinfo.bi_modulep + addend;
 2324                 preload_bootstrap_relocate(addend);
 2325         } else {
 2326                 metadata_missing = 1;
 2327         }
 2328 
 2329         if (bootinfo.bi_envp != 0) {
 2330                 addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ?
 2331                     PMAP_MAP_LOW : 0;
 2332                 init_static_kenv((char *)bootinfo.bi_envp + addend, 0);
 2333         } else {
 2334                 init_static_kenv(NULL, 0);
 2335         }
 2336 
 2337         /*
 2338          * Re-evaluate CPU features if we loaded a microcode update.
 2339          */
 2340         ucode_len = ucode_load_bsp(first);
 2341         if (ucode_len != 0) {
 2342                 identify_cpu();
 2343                 first = roundup2(first + ucode_len, PAGE_SIZE);
 2344         }
 2345 
 2346         identify_hypervisor();
 2347 
 2348         /* Init basic tunables, hz etc */
 2349         init_param1();
 2350 
 2351         /*
 2352          * Make gdt memory segments.  All segments cover the full 4GB
 2353          * of address space and permissions are enforced at page level.
 2354          */
 2355         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 2356         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 2357         gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 2358         gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 2359         gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 2360         gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 2361 
 2362         pc = &__pcpu[0];
 2363         gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 2364         gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
 2365         gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0;
 2366 
 2367         for (x = 0; x < NGDT; x++)
 2368                 ssdtosd(&gdt_segs[x], &gdt0[x].sd);
 2369 
 2370         r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1;
 2371         r_gdt.rd_base =  (int)gdt0;
 2372         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 2373         lgdt(&r_gdt);
 2374 
 2375         pcpu_init(pc, 0, sizeof(struct pcpu));
 2376         for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 2377                 pmap_kenter(pa, pa);
 2378         dpcpu_init((void *)first, 0);
 2379         first += DPCPU_SIZE;
 2380         PCPU_SET(prvspace, pc);
 2381         PCPU_SET(curthread, &thread0);
 2382         /* Non-late cninit() and printf() can be moved up to here. */
 2383 
 2384         /*
 2385          * Initialize mutexes.
 2386          *
 2387          * icu_lock: in order to allow an interrupt to occur in a critical
 2388          *           section, to set pcpu->ipending (etc...) properly, we
 2389          *           must be able to get the icu lock, so it can't be
 2390          *           under witness.
 2391          */
 2392         mutex_init();
 2393         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 2394 
 2395         i386_setidt1();
 2396 
 2397         r_idt.rd_limit = sizeof(idt0) - 1;
 2398         r_idt.rd_base = (int) idt;
 2399         lidt(&r_idt);
 2400 
 2401         /*
 2402          * Initialize the clock before the console so that console
 2403          * initialization can use DELAY().
 2404          */
 2405         clock_init();
 2406 
 2407         finishidentcpu();       /* Final stage of CPU initialization */
 2408         i386_setidt2();
 2409         pmap_set_nx();
 2410         initializecpu();        /* Initialize CPU registers */
 2411         initializecpucache();
 2412 
 2413         /* pointer to selector slot for %fs/%gs */
 2414         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2415 
 2416         /* Initialize the tss (except for the final esp0) early for vm86. */
 2417         common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages *
 2418             PAGE_SIZE - VM86_STACK_SPACE;
 2419         common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 2420         common_tss0.tss_ioopt = sizeof(struct i386tss) << 16;
 2421         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 2422         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2423         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2424         ltr(gsel_tss);
 2425 
 2426         /* Initialize the PIC early for vm86 calls. */
 2427 #ifdef DEV_ISA
 2428 #ifdef DEV_ATPIC
 2429         elcr_probe();
 2430         atpic_startup();
 2431 #else
 2432         /* Reset and mask the atpics and leave them shut down. */
 2433         atpic_reset();
 2434 
 2435         /*
 2436          * Point the ICU spurious interrupt vectors at the APIC spurious
 2437          * interrupt handler.
 2438          */
 2439         i386_setidt3();
 2440 #endif
 2441 #endif
 2442 
 2443         /*
 2444          * The console and kdb should be initialized even earlier than here,
 2445          * but some console drivers don't work until after getmemsize().
 2446          * Default to late console initialization to support these drivers.
 2447          * This loses mainly printf()s in getmemsize() and early debugging.
 2448          */
 2449         late_console = 1;
 2450         TUNABLE_INT_FETCH("debug.late_console", &late_console);
 2451         if (!late_console) {
 2452                 cninit();
 2453                 i386_kdb_init();
 2454         }
 2455 
 2456         kmdp = preload_search_by_type("elf kernel");
 2457         link_elf_ireloc(kmdp);
 2458 
 2459         vm86_initialize();
 2460         getmemsize(first);
 2461         init_param2(physmem);
 2462 
 2463         /* now running on new page tables, configured,and u/iom is accessible */
 2464 
 2465         if (late_console)
 2466                 cninit();
 2467 
 2468         if (metadata_missing)
 2469                 printf("WARNING: loader(8) metadata is missing!\n");
 2470 
 2471         if (late_console)
 2472                 i386_kdb_init();
 2473 
 2474         msgbufinit(msgbufp, msgbufsize);
 2475         npxinit(true);
 2476         /*
 2477          * Set up thread0 pcb after npxinit calculated pcb + fpu save
 2478          * area size.  Zero out the extended state header in fpu save
 2479          * area.
 2480          */
 2481         thread0.td_pcb = get_pcb_td(&thread0);
 2482         thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 2483         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 2484         if (use_xsave) {
 2485                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 2486                     1);
 2487                 xhdr->xstate_bv = xsave_mask;
 2488         }
 2489         PCPU_SET(curpcb, thread0.td_pcb);
 2490         /* Move esp0 in the tss to its final place. */
 2491         /* Note: -16 is so we can grow the trapframe if we came from vm86 */
 2492         common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE;
 2493         PCPU_SET(kesp0, common_tss0.tss_esp0);
 2494         gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;     /* clear busy bit */
 2495         ltr(gsel_tss);
 2496 
 2497         /* transfer to user mode */
 2498 
 2499         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 2500         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 2501 
 2502         /* setup proc 0's pcb */
 2503         thread0.td_pcb->pcb_flags = 0;
 2504         thread0.td_pcb->pcb_cr3 = pmap_get_kcr3();
 2505         thread0.td_pcb->pcb_ext = 0;
 2506         thread0.td_frame = &proc0_tf;
 2507 
 2508 #ifdef FDT
 2509         x86_init_fdt();
 2510 #endif
 2511 
 2512         /* Location of kernel stack for locore */
 2513         return ((register_t)thread0.td_pcb);
 2514 }
 2515 
 2516 static void
 2517 machdep_init_trampoline(void)
 2518 {
 2519         struct region_descriptor r_gdt, r_idt;
 2520         struct i386tss *tss;
 2521         char *copyout_buf, *trampoline, *tramp_stack_base;
 2522         int x;
 2523 
 2524         gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus,
 2525             M_NOWAIT | M_ZERO);
 2526         bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT);
 2527         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 2528         r_gdt.rd_base = (int)gdt;
 2529         lgdt(&r_gdt);
 2530 
 2531         tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus,
 2532             M_NOWAIT | M_ZERO);
 2533         bcopy(&common_tss0, tss, sizeof(struct i386tss));
 2534         gdt[GPROC0_SEL].sd.sd_lobase = (int)tss;
 2535         gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24;
 2536         gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 2537 
 2538         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2539         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2540         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2541         PCPU_SET(common_tssp, tss);
 2542         ltr(GSEL(GPROC0_SEL, SEL_KPL));
 2543 
 2544         trampoline = pmap_trm_alloc(end_exceptions - start_exceptions,
 2545             M_NOWAIT);
 2546         bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions);
 2547         tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
 2548         PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ -
 2549             VM86_STACK_SPACE);
 2550         tss[0].tss_esp0 = PCPU_GET(trampstk);
 2551 
 2552         idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO);
 2553         bcopy(idt0, idt, sizeof(idt0));
 2554 
 2555         /* Re-initialize new IDT since the handlers were relocated */
 2556         setidt_disp = trampoline - start_exceptions;
 2557         fixup_idt();
 2558 
 2559         r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
 2560         r_idt.rd_base = (int)idt;
 2561         lidt(&r_idt);
 2562 
 2563         /* dblfault TSS */
 2564         dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO);
 2565         dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT);
 2566         dblfault_tss->tss_esp = dblfault_tss->tss_esp0 =
 2567             dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 =
 2568             (int)dblfault_stack + PAGE_SIZE;
 2569         dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
 2570             dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 2571         dblfault_tss->tss_cr3 = pmap_get_kcr3();
 2572         dblfault_tss->tss_eip = (int)dblfault_handler;
 2573         dblfault_tss->tss_eflags = PSL_KERNEL;
 2574         dblfault_tss->tss_ds = dblfault_tss->tss_es =
 2575             dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2576         dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2577         dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2578         dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2579         gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss;
 2580         gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24;
 2581 
 2582         /* make ldt memory segments */
 2583         ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT,
 2584             M_NOWAIT | M_ZERO);
 2585         gdt[GLDT_SEL].sd.sd_lobase = (int)ldt;
 2586         gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24;
 2587         ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 2588         ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 2589         for (x = 0; x < nitems(ldt_segs); x++)
 2590                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 2591 
 2592         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2593         lldt(_default_ldt);
 2594         PCPU_SET(currentldt, _default_ldt);
 2595 
 2596         copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
 2597         PCPU_SET(copyout_buf, copyout_buf);
 2598         copyout_init_tramp();
 2599 }
 2600 SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL);
 2601 
 2602 #ifdef COMPAT_43
 2603 static void
 2604 i386_setup_lcall_gate(void)
 2605 {
 2606         struct sysentvec *sv;
 2607         struct user_segment_descriptor desc;
 2608         u_int lcall_addr;
 2609 
 2610         sv = &elf32_freebsd_sysvec;
 2611         lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp;
 2612 
 2613         bzero(&desc, sizeof(desc));
 2614         desc.sd_type = SDT_MEMERA;
 2615         desc.sd_dpl = SEL_UPL;
 2616         desc.sd_p = 1;
 2617         desc.sd_def32 = 1;
 2618         desc.sd_gran = 1;
 2619         desc.sd_lolimit = 0xffff;
 2620         desc.sd_hilimit = 0xf;
 2621         desc.sd_lobase = lcall_addr;
 2622         desc.sd_hibase = lcall_addr >> 24;
 2623         bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc));
 2624 }
 2625 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL);
 2626 #endif
 2627 
 2628 void
 2629 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 2630 {
 2631 
 2632         pcpu->pc_acpi_id = 0xffffffff;
 2633 }
 2634 
 2635 static int
 2636 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 2637 {
 2638         struct bios_smap *smapbase;
 2639         struct bios_smap_xattr smap;
 2640         caddr_t kmdp;
 2641         uint32_t *smapattr;
 2642         int count, error, i;
 2643 
 2644         /* Retrieve the system memory map from the loader. */
 2645         kmdp = preload_search_by_type("elf kernel");
 2646         if (kmdp == NULL)
 2647                 kmdp = preload_search_by_type("elf32 kernel");
 2648         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 2649             MODINFO_METADATA | MODINFOMD_SMAP);
 2650         if (smapbase == NULL)
 2651                 return (0);
 2652         smapattr = (uint32_t *)preload_search_info(kmdp,
 2653             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 2654         count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 2655         error = 0;
 2656         for (i = 0; i < count; i++) {
 2657                 smap.base = smapbase[i].base;
 2658                 smap.length = smapbase[i].length;
 2659                 smap.type = smapbase[i].type;
 2660                 if (smapattr != NULL)
 2661                         smap.xattr = smapattr[i];
 2662                 else
 2663                         smap.xattr = 0;
 2664                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 2665         }
 2666         return (error);
 2667 }
 2668 SYSCTL_PROC(_machdep, OID_AUTO, smap,
 2669     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 2670     smap_sysctl_handler, "S,bios_smap_xattr",
 2671     "Raw BIOS SMAP data");
 2672 
 2673 void
 2674 spinlock_enter(void)
 2675 {
 2676         struct thread *td;
 2677         register_t flags;
 2678 
 2679         td = curthread;
 2680         if (td->td_md.md_spinlock_count == 0) {
 2681                 flags = intr_disable();
 2682                 td->td_md.md_spinlock_count = 1;
 2683                 td->td_md.md_saved_flags = flags;
 2684                 critical_enter();
 2685         } else
 2686                 td->td_md.md_spinlock_count++;
 2687 }
 2688 
 2689 void
 2690 spinlock_exit(void)
 2691 {
 2692         struct thread *td;
 2693         register_t flags;
 2694 
 2695         td = curthread;
 2696         flags = td->td_md.md_saved_flags;
 2697         td->td_md.md_spinlock_count--;
 2698         if (td->td_md.md_spinlock_count == 0) {
 2699                 critical_exit();
 2700                 intr_restore(flags);
 2701         }
 2702 }
 2703 
 2704 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2705 static void f00f_hack(void *unused);
 2706 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 2707 
 2708 static void
 2709 f00f_hack(void *unused)
 2710 {
 2711         struct region_descriptor r_idt;
 2712         struct gate_descriptor *new_idt;
 2713         vm_offset_t tmp;
 2714 
 2715         if (!has_f00f_bug)
 2716                 return;
 2717 
 2718         GIANT_REQUIRED;
 2719 
 2720         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2721 
 2722         tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO);
 2723         if (tmp == 0)
 2724                 panic("kmem_malloc returned 0");
 2725         tmp = round_page(tmp);
 2726 
 2727         /* Put the problematic entry (#6) at the end of the lower page. */
 2728         new_idt = (struct gate_descriptor *)
 2729             (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 2730         bcopy(idt, new_idt, sizeof(idt0));
 2731         r_idt.rd_base = (u_int)new_idt;
 2732         r_idt.rd_limit = sizeof(idt0) - 1;
 2733         lidt(&r_idt);
 2734         /* SMP machines do not need the F00F hack. */
 2735         idt = new_idt;
 2736         pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 2737 }
 2738 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2739 
 2740 /*
 2741  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2742  * we want to start a backtrace from the function that caused us to enter
 2743  * the debugger. We have the context in the trapframe, but base the trace
 2744  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2745  * enough for a backtrace.
 2746  */
 2747 void
 2748 makectx(struct trapframe *tf, struct pcb *pcb)
 2749 {
 2750 
 2751         pcb->pcb_edi = tf->tf_edi;
 2752         pcb->pcb_esi = tf->tf_esi;
 2753         pcb->pcb_ebp = tf->tf_ebp;
 2754         pcb->pcb_ebx = tf->tf_ebx;
 2755         pcb->pcb_eip = tf->tf_eip;
 2756         pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 2757         pcb->pcb_gs = rgs();
 2758 }
 2759 
 2760 int
 2761 ptrace_set_pc(struct thread *td, u_long addr)
 2762 {
 2763 
 2764         td->td_frame->tf_eip = addr;
 2765         return (0);
 2766 }
 2767 
 2768 int
 2769 ptrace_single_step(struct thread *td)
 2770 {
 2771 
 2772         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2773         if ((td->td_frame->tf_eflags & PSL_T) == 0) {
 2774                 td->td_frame->tf_eflags |= PSL_T;
 2775                 td->td_dbgflags |= TDB_STEP;
 2776         }
 2777         return (0);
 2778 }
 2779 
 2780 int
 2781 ptrace_clear_single_step(struct thread *td)
 2782 {
 2783 
 2784         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2785         td->td_frame->tf_eflags &= ~PSL_T;
 2786         td->td_dbgflags &= ~TDB_STEP;
 2787         return (0);
 2788 }
 2789 
 2790 int
 2791 fill_regs(struct thread *td, struct reg *regs)
 2792 {
 2793         struct pcb *pcb;
 2794         struct trapframe *tp;
 2795 
 2796         tp = td->td_frame;
 2797         pcb = td->td_pcb;
 2798         regs->r_gs = pcb->pcb_gs;
 2799         return (fill_frame_regs(tp, regs));
 2800 }
 2801 
 2802 int
 2803 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2804 {
 2805 
 2806         regs->r_fs = tp->tf_fs;
 2807         regs->r_es = tp->tf_es;
 2808         regs->r_ds = tp->tf_ds;
 2809         regs->r_edi = tp->tf_edi;
 2810         regs->r_esi = tp->tf_esi;
 2811         regs->r_ebp = tp->tf_ebp;
 2812         regs->r_ebx = tp->tf_ebx;
 2813         regs->r_edx = tp->tf_edx;
 2814         regs->r_ecx = tp->tf_ecx;
 2815         regs->r_eax = tp->tf_eax;
 2816         regs->r_eip = tp->tf_eip;
 2817         regs->r_cs = tp->tf_cs;
 2818         regs->r_eflags = tp->tf_eflags;
 2819         regs->r_esp = tp->tf_esp;
 2820         regs->r_ss = tp->tf_ss;
 2821         regs->r_err = 0;
 2822         regs->r_trapno = 0;
 2823         return (0);
 2824 }
 2825 
 2826 int
 2827 set_regs(struct thread *td, struct reg *regs)
 2828 {
 2829         struct pcb *pcb;
 2830         struct trapframe *tp;
 2831 
 2832         tp = td->td_frame;
 2833         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2834             !CS_SECURE(regs->r_cs))
 2835                 return (EINVAL);
 2836         pcb = td->td_pcb;
 2837         tp->tf_fs = regs->r_fs;
 2838         tp->tf_es = regs->r_es;
 2839         tp->tf_ds = regs->r_ds;
 2840         tp->tf_edi = regs->r_edi;
 2841         tp->tf_esi = regs->r_esi;
 2842         tp->tf_ebp = regs->r_ebp;
 2843         tp->tf_ebx = regs->r_ebx;
 2844         tp->tf_edx = regs->r_edx;
 2845         tp->tf_ecx = regs->r_ecx;
 2846         tp->tf_eax = regs->r_eax;
 2847         tp->tf_eip = regs->r_eip;
 2848         tp->tf_cs = regs->r_cs;
 2849         tp->tf_eflags = regs->r_eflags;
 2850         tp->tf_esp = regs->r_esp;
 2851         tp->tf_ss = regs->r_ss;
 2852         pcb->pcb_gs = regs->r_gs;
 2853         return (0);
 2854 }
 2855 
 2856 int
 2857 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2858 {
 2859 
 2860         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2861             P_SHOULDSTOP(td->td_proc),
 2862             ("not suspended thread %p", td));
 2863         npxgetregs(td);
 2864         if (cpu_fxsr)
 2865                 npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 2866                     (struct save87 *)fpregs);
 2867         else
 2868                 bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 2869                     sizeof(*fpregs));
 2870         return (0);
 2871 }
 2872 
 2873 int
 2874 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2875 {
 2876 
 2877         critical_enter();
 2878         if (cpu_fxsr)
 2879                 npx_set_fpregs_xmm((struct save87 *)fpregs,
 2880                     &get_pcb_user_save_td(td)->sv_xmm);
 2881         else
 2882                 bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 2883                     sizeof(*fpregs));
 2884         npxuserinited(td);
 2885         critical_exit();
 2886         return (0);
 2887 }
 2888 
 2889 /*
 2890  * Get machine context.
 2891  */
 2892 int
 2893 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2894 {
 2895         struct trapframe *tp;
 2896         struct segment_descriptor *sdp;
 2897 
 2898         tp = td->td_frame;
 2899 
 2900         PROC_LOCK(curthread->td_proc);
 2901         mcp->mc_onstack = sigonstack(tp->tf_esp);
 2902         PROC_UNLOCK(curthread->td_proc);
 2903         mcp->mc_gs = td->td_pcb->pcb_gs;
 2904         mcp->mc_fs = tp->tf_fs;
 2905         mcp->mc_es = tp->tf_es;
 2906         mcp->mc_ds = tp->tf_ds;
 2907         mcp->mc_edi = tp->tf_edi;
 2908         mcp->mc_esi = tp->tf_esi;
 2909         mcp->mc_ebp = tp->tf_ebp;
 2910         mcp->mc_isp = tp->tf_isp;
 2911         mcp->mc_eflags = tp->tf_eflags;
 2912         if (flags & GET_MC_CLEAR_RET) {
 2913                 mcp->mc_eax = 0;
 2914                 mcp->mc_edx = 0;
 2915                 mcp->mc_eflags &= ~PSL_C;
 2916         } else {
 2917                 mcp->mc_eax = tp->tf_eax;
 2918                 mcp->mc_edx = tp->tf_edx;
 2919         }
 2920         mcp->mc_ebx = tp->tf_ebx;
 2921         mcp->mc_ecx = tp->tf_ecx;
 2922         mcp->mc_eip = tp->tf_eip;
 2923         mcp->mc_cs = tp->tf_cs;
 2924         mcp->mc_esp = tp->tf_esp;
 2925         mcp->mc_ss = tp->tf_ss;
 2926         mcp->mc_len = sizeof(*mcp);
 2927         get_fpcontext(td, mcp, NULL, 0);
 2928         sdp = &td->td_pcb->pcb_fsd;
 2929         mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2930         sdp = &td->td_pcb->pcb_gsd;
 2931         mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2932         mcp->mc_flags = 0;
 2933         mcp->mc_xfpustate = 0;
 2934         mcp->mc_xfpustate_len = 0;
 2935         bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 2936         return (0);
 2937 }
 2938 
 2939 /*
 2940  * Set machine context.
 2941  *
 2942  * However, we don't set any but the user modifiable flags, and we won't
 2943  * touch the cs selector.
 2944  */
 2945 int
 2946 set_mcontext(struct thread *td, mcontext_t *mcp)
 2947 {
 2948         struct trapframe *tp;
 2949         char *xfpustate;
 2950         int eflags, ret;
 2951 
 2952         tp = td->td_frame;
 2953         if (mcp->mc_len != sizeof(*mcp) ||
 2954             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 2955                 return (EINVAL);
 2956         eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 2957             (tp->tf_eflags & ~PSL_USERCHANGE);
 2958         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 2959                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 2960                     sizeof(union savefpu))
 2961                         return (EINVAL);
 2962                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 2963                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 2964                     mcp->mc_xfpustate_len);
 2965                 if (ret != 0)
 2966                         return (ret);
 2967         } else
 2968                 xfpustate = NULL;
 2969         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 2970         if (ret != 0)
 2971                 return (ret);
 2972         tp->tf_fs = mcp->mc_fs;
 2973         tp->tf_es = mcp->mc_es;
 2974         tp->tf_ds = mcp->mc_ds;
 2975         tp->tf_edi = mcp->mc_edi;
 2976         tp->tf_esi = mcp->mc_esi;
 2977         tp->tf_ebp = mcp->mc_ebp;
 2978         tp->tf_ebx = mcp->mc_ebx;
 2979         tp->tf_edx = mcp->mc_edx;
 2980         tp->tf_ecx = mcp->mc_ecx;
 2981         tp->tf_eax = mcp->mc_eax;
 2982         tp->tf_eip = mcp->mc_eip;
 2983         tp->tf_eflags = eflags;
 2984         tp->tf_esp = mcp->mc_esp;
 2985         tp->tf_ss = mcp->mc_ss;
 2986         td->td_pcb->pcb_gs = mcp->mc_gs;
 2987         return (0);
 2988 }
 2989 
 2990 static void
 2991 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 2992     size_t xfpusave_len)
 2993 {
 2994         size_t max_len, len;
 2995 
 2996         mcp->mc_ownedfp = npxgetregs(td);
 2997         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 2998             sizeof(mcp->mc_fpstate));
 2999         mcp->mc_fpformat = npxformat();
 3000         if (!use_xsave || xfpusave_len == 0)
 3001                 return;
 3002         max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 3003         len = xfpusave_len;
 3004         if (len > max_len) {
 3005                 len = max_len;
 3006                 bzero(xfpusave + max_len, len - max_len);
 3007         }
 3008         mcp->mc_flags |= _MC_HASFPXSTATE;
 3009         mcp->mc_xfpustate_len = len;
 3010         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 3011 }
 3012 
 3013 static int
 3014 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 3015     size_t xfpustate_len)
 3016 {
 3017         int error;
 3018 
 3019         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 3020                 return (0);
 3021         else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 3022             mcp->mc_fpformat != _MC_FPFMT_XMM)
 3023                 return (EINVAL);
 3024         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 3025                 /* We don't care what state is left in the FPU or PCB. */
 3026                 fpstate_drop(td);
 3027                 error = 0;
 3028         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 3029             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 3030                 error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
 3031                     xfpustate, xfpustate_len);
 3032         } else
 3033                 return (EINVAL);
 3034         return (error);
 3035 }
 3036 
 3037 static void
 3038 fpstate_drop(struct thread *td)
 3039 {
 3040 
 3041         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 3042         critical_enter();
 3043         if (PCPU_GET(fpcurthread) == td)
 3044                 npxdrop();
 3045         /*
 3046          * XXX force a full drop of the npx.  The above only drops it if we
 3047          * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 3048          *
 3049          * XXX I don't much like npxgetregs()'s semantics of doing a full
 3050          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 3051          * We only need to drop to !PCB_INITDONE in sendsig().  But
 3052          * sendsig() is the only caller of npxgetregs()... perhaps we just
 3053          * have too many layers.
 3054          */
 3055         curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 3056             PCB_NPXUSERINITDONE);
 3057         critical_exit();
 3058 }
 3059 
 3060 int
 3061 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 3062 {
 3063         struct pcb *pcb;
 3064 
 3065         if (td == NULL) {
 3066                 dbregs->dr[0] = rdr0();
 3067                 dbregs->dr[1] = rdr1();
 3068                 dbregs->dr[2] = rdr2();
 3069                 dbregs->dr[3] = rdr3();
 3070                 dbregs->dr[6] = rdr6();
 3071                 dbregs->dr[7] = rdr7();
 3072         } else {
 3073                 pcb = td->td_pcb;
 3074                 dbregs->dr[0] = pcb->pcb_dr0;
 3075                 dbregs->dr[1] = pcb->pcb_dr1;
 3076                 dbregs->dr[2] = pcb->pcb_dr2;
 3077                 dbregs->dr[3] = pcb->pcb_dr3;
 3078                 dbregs->dr[6] = pcb->pcb_dr6;
 3079                 dbregs->dr[7] = pcb->pcb_dr7;
 3080         }
 3081         dbregs->dr[4] = 0;
 3082         dbregs->dr[5] = 0;
 3083         return (0);
 3084 }
 3085 
 3086 int
 3087 set_dbregs(struct thread *td, struct dbreg *dbregs)
 3088 {
 3089         struct pcb *pcb;
 3090         int i;
 3091 
 3092         if (td == NULL) {
 3093                 load_dr0(dbregs->dr[0]);
 3094                 load_dr1(dbregs->dr[1]);
 3095                 load_dr2(dbregs->dr[2]);
 3096                 load_dr3(dbregs->dr[3]);
 3097                 load_dr6(dbregs->dr[6]);
 3098                 load_dr7(dbregs->dr[7]);
 3099         } else {
 3100                 /*
 3101                  * Don't let an illegal value for dr7 get set.  Specifically,
 3102                  * check for undefined settings.  Setting these bit patterns
 3103                  * result in undefined behaviour and can lead to an unexpected
 3104                  * TRCTRAP.
 3105                  */
 3106                 for (i = 0; i < 4; i++) {
 3107                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 3108                                 return (EINVAL);
 3109                         if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 3110                                 return (EINVAL);
 3111                 }
 3112                 
 3113                 pcb = td->td_pcb;
 3114                 
 3115                 /*
 3116                  * Don't let a process set a breakpoint that is not within the
 3117                  * process's address space.  If a process could do this, it
 3118                  * could halt the system by setting a breakpoint in the kernel
 3119                  * (if ddb was enabled).  Thus, we need to check to make sure
 3120                  * that no breakpoints are being enabled for addresses outside
 3121                  * process's address space.
 3122                  *
 3123                  * XXX - what about when the watched area of the user's
 3124                  * address space is written into from within the kernel
 3125                  * ... wouldn't that still cause a breakpoint to be generated
 3126                  * from within kernel mode?
 3127                  */
 3128 
 3129                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 3130                         /* dr0 is enabled */
 3131                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 3132                                 return (EINVAL);
 3133                 }
 3134                         
 3135                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 3136                         /* dr1 is enabled */
 3137                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 3138                                 return (EINVAL);
 3139                 }
 3140                         
 3141                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 3142                         /* dr2 is enabled */
 3143                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 3144                                 return (EINVAL);
 3145                 }
 3146                         
 3147                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 3148                         /* dr3 is enabled */
 3149                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 3150                                 return (EINVAL);
 3151                 }
 3152 
 3153                 pcb->pcb_dr0 = dbregs->dr[0];
 3154                 pcb->pcb_dr1 = dbregs->dr[1];
 3155                 pcb->pcb_dr2 = dbregs->dr[2];
 3156                 pcb->pcb_dr3 = dbregs->dr[3];
 3157                 pcb->pcb_dr6 = dbregs->dr[6];
 3158                 pcb->pcb_dr7 = dbregs->dr[7];
 3159 
 3160                 pcb->pcb_flags |= PCB_DBREGS;
 3161         }
 3162 
 3163         return (0);
 3164 }
 3165 
 3166 /*
 3167  * Return > 0 if a hardware breakpoint has been hit, and the
 3168  * breakpoint was in user space.  Return 0, otherwise.
 3169  */
 3170 int
 3171 user_dbreg_trap(register_t dr6)
 3172 {
 3173         u_int32_t dr7;
 3174         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 3175         int nbp;            /* number of breakpoints that triggered */
 3176         caddr_t addr[4];    /* breakpoint addresses */
 3177         int i;
 3178 
 3179         bp = dr6 & DBREG_DR6_BMASK;
 3180         if (bp == 0) {
 3181                 /*
 3182                  * None of the breakpoint bits are set meaning this
 3183                  * trap was not caused by any of the debug registers
 3184                  */
 3185                 return 0;
 3186         }
 3187 
 3188         dr7 = rdr7();
 3189         if ((dr7 & 0x000000ff) == 0) {
 3190                 /*
 3191                  * all GE and LE bits in the dr7 register are zero,
 3192                  * thus the trap couldn't have been caused by the
 3193                  * hardware debug registers
 3194                  */
 3195                 return 0;
 3196         }
 3197 
 3198         nbp = 0;
 3199 
 3200         /*
 3201          * at least one of the breakpoints were hit, check to see
 3202          * which ones and if any of them are user space addresses
 3203          */
 3204 
 3205         if (bp & 0x01) {
 3206                 addr[nbp++] = (caddr_t)rdr0();
 3207         }
 3208         if (bp & 0x02) {
 3209                 addr[nbp++] = (caddr_t)rdr1();
 3210         }
 3211         if (bp & 0x04) {
 3212                 addr[nbp++] = (caddr_t)rdr2();
 3213         }
 3214         if (bp & 0x08) {
 3215                 addr[nbp++] = (caddr_t)rdr3();
 3216         }
 3217 
 3218         for (i = 0; i < nbp; i++) {
 3219                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 3220                         /*
 3221                          * addr[i] is in user space
 3222                          */
 3223                         return nbp;
 3224                 }
 3225         }
 3226 
 3227         /*
 3228          * None of the breakpoints are in user space.
 3229          */
 3230         return 0;
 3231 }
 3232 
 3233 #ifdef KDB
 3234 
 3235 /*
 3236  * Provide inb() and outb() as functions.  They are normally only available as
 3237  * inline functions, thus cannot be called from the debugger.
 3238  */
 3239 
 3240 /* silence compiler warnings */
 3241 u_char inb_(u_short);
 3242 void outb_(u_short, u_char);
 3243 
 3244 u_char
 3245 inb_(u_short port)
 3246 {
 3247         return inb(port);
 3248 }
 3249 
 3250 void
 3251 outb_(u_short port, u_char data)
 3252 {
 3253         outb(port, data);
 3254 }
 3255 
 3256 #endif /* KDB */

Cache object: c3397bc377a5642d6938de88e70cf3c4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.