The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1992 Terrence R. Lambert.
    3  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/11.0/sys/i386/i386/machdep.c 298308 2016-04-19 23:41:46Z pfg $");
   42 
   43 #include "opt_apic.h"
   44 #include "opt_atpic.h"
   45 #include "opt_compat.h"
   46 #include "opt_cpu.h"
   47 #include "opt_ddb.h"
   48 #include "opt_inet.h"
   49 #include "opt_isa.h"
   50 #include "opt_kstack_pages.h"
   51 #include "opt_maxmem.h"
   52 #include "opt_mp_watchdog.h"
   53 #include "opt_npx.h"
   54 #include "opt_perfmon.h"
   55 #include "opt_platform.h"
   56 #include "opt_xbox.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/proc.h>
   60 #include <sys/systm.h>
   61 #include <sys/bio.h>
   62 #include <sys/buf.h>
   63 #include <sys/bus.h>
   64 #include <sys/callout.h>
   65 #include <sys/cons.h>
   66 #include <sys/cpu.h>
   67 #include <sys/eventhandler.h>
   68 #include <sys/exec.h>
   69 #include <sys/imgact.h>
   70 #include <sys/kdb.h>
   71 #include <sys/kernel.h>
   72 #include <sys/ktr.h>
   73 #include <sys/linker.h>
   74 #include <sys/lock.h>
   75 #include <sys/malloc.h>
   76 #include <sys/memrange.h>
   77 #include <sys/msgbuf.h>
   78 #include <sys/mutex.h>
   79 #include <sys/pcpu.h>
   80 #include <sys/ptrace.h>
   81 #include <sys/reboot.h>
   82 #include <sys/rwlock.h>
   83 #include <sys/sched.h>
   84 #include <sys/signalvar.h>
   85 #ifdef SMP
   86 #include <sys/smp.h>
   87 #endif
   88 #include <sys/syscallsubr.h>
   89 #include <sys/sysctl.h>
   90 #include <sys/sysent.h>
   91 #include <sys/sysproto.h>
   92 #include <sys/ucontext.h>
   93 #include <sys/vmmeter.h>
   94 
   95 #include <vm/vm.h>
   96 #include <vm/vm_extern.h>
   97 #include <vm/vm_kern.h>
   98 #include <vm/vm_page.h>
   99 #include <vm/vm_map.h>
  100 #include <vm/vm_object.h>
  101 #include <vm/vm_pager.h>
  102 #include <vm/vm_param.h>
  103 
  104 #ifdef DDB
  105 #ifndef KDB
  106 #error KDB must be enabled in order for DDB to work!
  107 #endif
  108 #include <ddb/ddb.h>
  109 #include <ddb/db_sym.h>
  110 #endif
  111 
  112 #ifdef PC98
  113 #include <pc98/pc98/pc98_machdep.h>
  114 #else
  115 #include <isa/rtc.h>
  116 #endif
  117 
  118 #include <net/netisr.h>
  119 
  120 #include <machine/bootinfo.h>
  121 #include <machine/clock.h>
  122 #include <machine/cpu.h>
  123 #include <machine/cputypes.h>
  124 #include <machine/intr_machdep.h>
  125 #include <x86/mca.h>
  126 #include <machine/md_var.h>
  127 #include <machine/metadata.h>
  128 #include <machine/mp_watchdog.h>
  129 #include <machine/pc/bios.h>
  130 #include <machine/pcb.h>
  131 #include <machine/pcb_ext.h>
  132 #include <machine/proc.h>
  133 #include <machine/reg.h>
  134 #include <machine/sigframe.h>
  135 #include <machine/specialreg.h>
  136 #include <machine/vm86.h>
  137 #include <x86/init.h>
  138 #ifdef PERFMON
  139 #include <machine/perfmon.h>
  140 #endif
  141 #ifdef SMP
  142 #include <machine/smp.h>
  143 #endif
  144 #ifdef FDT
  145 #include <x86/fdt.h>
  146 #endif
  147 
  148 #ifdef DEV_APIC
  149 #include <x86/apicvar.h>
  150 #endif
  151 
  152 #ifdef DEV_ISA
  153 #include <x86/isa/icu.h>
  154 #endif
  155 
  156 #ifdef XBOX
  157 #include <machine/xbox.h>
  158 
  159 int arch_i386_is_xbox = 0;
  160 uint32_t arch_i386_xbox_memsize = 0;
  161 #endif
  162 
  163 /* Sanity check for __curthread() */
  164 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  165 
  166 extern register_t init386(int first);
  167 extern void dblfault_handler(void);
  168 
  169 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
  170 #define CPU_ENABLE_SSE
  171 #endif
  172 
  173 static void cpu_startup(void *);
  174 static void fpstate_drop(struct thread *td);
  175 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  176     char *xfpusave, size_t xfpusave_len);
  177 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  178     char *xfpustate, size_t xfpustate_len);
  179 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  180 
  181 /* Intel ICH registers */
  182 #define ICH_PMBASE      0x400
  183 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  184 
  185 int     _udatasel, _ucodesel;
  186 u_int   basemem;
  187 
  188 #ifdef PC98
  189 int     need_pre_dma_flush;     /* If 1, use wbinvd befor DMA transfer. */
  190 int     need_post_dma_flush;    /* If 1, use invd after DMA transfer. */
  191 
  192 static int      ispc98 = 1;
  193 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
  194 #endif
  195 
  196 int cold = 1;
  197 
  198 #ifdef COMPAT_43
  199 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  200 #endif
  201 #ifdef COMPAT_FREEBSD4
  202 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  203 #endif
  204 
  205 long Maxmem = 0;
  206 long realmem = 0;
  207 
  208 #ifdef PAE
  209 FEATURE(pae, "Physical Address Extensions");
  210 #endif
  211 
  212 /*
  213  * The number of PHYSMAP entries must be one less than the number of
  214  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  215  * physical address that is accessible by ISA DMA is split into two
  216  * PHYSSEG entries.
  217  */
  218 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  219 
  220 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  221 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  222 
  223 /* must be 2 less so 0 0 can signal end of chunks */
  224 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
  225 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
  226 
  227 struct kva_md_info kmi;
  228 
  229 static struct trapframe proc0_tf;
  230 struct pcpu __pcpu[MAXCPU];
  231 
  232 struct mtx icu_lock;
  233 
  234 struct mem_range_softc mem_range_softc;
  235 
  236  /* Default init_ops implementation. */
  237  struct init_ops init_ops = {
  238         .early_clock_source_init =      i8254_init,
  239         .early_delay =                  i8254_delay,
  240 #ifdef DEV_APIC
  241         .msi_init =                     msi_init,
  242 #endif
  243  };
  244 
  245 static void
  246 cpu_startup(dummy)
  247         void *dummy;
  248 {
  249         uintmax_t memsize;
  250         char *sysenv;
  251 
  252 #ifndef PC98
  253         /*
  254          * On MacBooks, we need to disallow the legacy USB circuit to
  255          * generate an SMI# because this can cause several problems,
  256          * namely: incorrect CPU frequency detection and failure to
  257          * start the APs.
  258          * We do this by disabling a bit in the SMI_EN (SMI Control and
  259          * Enable register) of the Intel ICH LPC Interface Bridge.
  260          */
  261         sysenv = kern_getenv("smbios.system.product");
  262         if (sysenv != NULL) {
  263                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  264                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  265                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  266                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  267                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  268                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  269                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  270                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  271                         if (bootverbose)
  272                                 printf("Disabling LEGACY_USB_EN bit on "
  273                                     "Intel ICH.\n");
  274                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  275                 }
  276                 freeenv(sysenv);
  277         }
  278 #endif /* !PC98 */
  279 
  280         /*
  281          * Good {morning,afternoon,evening,night}.
  282          */
  283         startrtclock();
  284         printcpuinfo();
  285         panicifcpuunsupported();
  286 #ifdef PERFMON
  287         perfmon_init();
  288 #endif
  289 
  290         /*
  291          * Display physical memory if SMBIOS reports reasonable amount.
  292          */
  293         memsize = 0;
  294         sysenv = kern_getenv("smbios.memory.enabled");
  295         if (sysenv != NULL) {
  296                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  297                 freeenv(sysenv);
  298         }
  299         if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
  300                 memsize = ptoa((uintmax_t)Maxmem);
  301         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  302         realmem = atop(memsize);
  303 
  304         /*
  305          * Display any holes after the first chunk of extended memory.
  306          */
  307         if (bootverbose) {
  308                 int indx;
  309 
  310                 printf("Physical memory chunk(s):\n");
  311                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  312                         vm_paddr_t size;
  313 
  314                         size = phys_avail[indx + 1] - phys_avail[indx];
  315                         printf(
  316                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  317                             (uintmax_t)phys_avail[indx],
  318                             (uintmax_t)phys_avail[indx + 1] - 1,
  319                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  320                 }
  321         }
  322 
  323         vm_ksubmap_init(&kmi);
  324 
  325         printf("avail memory = %ju (%ju MB)\n",
  326             ptoa((uintmax_t)vm_cnt.v_free_count),
  327             ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
  328 
  329         /*
  330          * Set up buffers, so they can be used to read disk labels.
  331          */
  332         bufinit();
  333         vm_pager_bufferinit();
  334         cpu_setregs();
  335 }
  336 
  337 /*
  338  * Send an interrupt to process.
  339  *
  340  * Stack is set up to allow sigcode stored
  341  * at top to call routine, followed by call
  342  * to sigreturn routine below.  After sigreturn
  343  * resets the signal mask, the stack, and the
  344  * frame pointer, it returns to the user
  345  * specified pc, psl.
  346  */
  347 #ifdef COMPAT_43
  348 static void
  349 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  350 {
  351         struct osigframe sf, *fp;
  352         struct proc *p;
  353         struct thread *td;
  354         struct sigacts *psp;
  355         struct trapframe *regs;
  356         int sig;
  357         int oonstack;
  358 
  359         td = curthread;
  360         p = td->td_proc;
  361         PROC_LOCK_ASSERT(p, MA_OWNED);
  362         sig = ksi->ksi_signo;
  363         psp = p->p_sigacts;
  364         mtx_assert(&psp->ps_mtx, MA_OWNED);
  365         regs = td->td_frame;
  366         oonstack = sigonstack(regs->tf_esp);
  367 
  368         /* Allocate space for the signal handler context. */
  369         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  370             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  371                 fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
  372                     td->td_sigstk.ss_size - sizeof(struct osigframe));
  373 #if defined(COMPAT_43)
  374                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  375 #endif
  376         } else
  377                 fp = (struct osigframe *)regs->tf_esp - 1;
  378 
  379         /* Build the argument list for the signal handler. */
  380         sf.sf_signum = sig;
  381         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  382         bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
  383         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  384                 /* Signal handler installed with SA_SIGINFO. */
  385                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  386                 sf.sf_siginfo.si_signo = sig;
  387                 sf.sf_siginfo.si_code = ksi->ksi_code;
  388                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  389                 sf.sf_addr = 0;
  390         } else {
  391                 /* Old FreeBSD-style arguments. */
  392                 sf.sf_arg2 = ksi->ksi_code;
  393                 sf.sf_addr = (register_t)ksi->ksi_addr;
  394                 sf.sf_ahu.sf_handler = catcher;
  395         }
  396         mtx_unlock(&psp->ps_mtx);
  397         PROC_UNLOCK(p);
  398 
  399         /* Save most if not all of trap frame. */
  400         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  401         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  402         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  403         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  404         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  405         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  406         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  407         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  408         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  409         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  410         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  411         sf.sf_siginfo.si_sc.sc_gs = rgs();
  412         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  413 
  414         /* Build the signal context to be used by osigreturn(). */
  415         sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
  416         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  417         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  418         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  419         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  420         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  421         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  422         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  423 
  424         /*
  425          * If we're a vm86 process, we want to save the segment registers.
  426          * We also change eflags to be our emulated eflags, not the actual
  427          * eflags.
  428          */
  429         if (regs->tf_eflags & PSL_VM) {
  430                 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
  431                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  432                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  433 
  434                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  435                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  436                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  437                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  438 
  439                 if (vm86->vm86_has_vme == 0)
  440                         sf.sf_siginfo.si_sc.sc_ps =
  441                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  442                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  443 
  444                 /* See sendsig() for comments. */
  445                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  446         }
  447 
  448         /*
  449          * Copy the sigframe out to the user's stack.
  450          */
  451         if (copyout(&sf, fp, sizeof(*fp)) != 0) {
  452 #ifdef DEBUG
  453                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  454 #endif
  455                 PROC_LOCK(p);
  456                 sigexit(td, SIGILL);
  457         }
  458 
  459         regs->tf_esp = (int)fp;
  460         if (p->p_sysent->sv_sigcode_base != 0) {
  461                 regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  462                     szosigcode;
  463         } else {
  464                 /* a.out sysentvec does not use shared page */
  465                 regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
  466         }
  467         regs->tf_eflags &= ~(PSL_T | PSL_D);
  468         regs->tf_cs = _ucodesel;
  469         regs->tf_ds = _udatasel;
  470         regs->tf_es = _udatasel;
  471         regs->tf_fs = _udatasel;
  472         load_gs(_udatasel);
  473         regs->tf_ss = _udatasel;
  474         PROC_LOCK(p);
  475         mtx_lock(&psp->ps_mtx);
  476 }
  477 #endif /* COMPAT_43 */
  478 
  479 #ifdef COMPAT_FREEBSD4
  480 static void
  481 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  482 {
  483         struct sigframe4 sf, *sfp;
  484         struct proc *p;
  485         struct thread *td;
  486         struct sigacts *psp;
  487         struct trapframe *regs;
  488         int sig;
  489         int oonstack;
  490 
  491         td = curthread;
  492         p = td->td_proc;
  493         PROC_LOCK_ASSERT(p, MA_OWNED);
  494         sig = ksi->ksi_signo;
  495         psp = p->p_sigacts;
  496         mtx_assert(&psp->ps_mtx, MA_OWNED);
  497         regs = td->td_frame;
  498         oonstack = sigonstack(regs->tf_esp);
  499 
  500         /* Save user context. */
  501         bzero(&sf, sizeof(sf));
  502         sf.sf_uc.uc_sigmask = *mask;
  503         sf.sf_uc.uc_stack = td->td_sigstk;
  504         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  505             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  506         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  507         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  508         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  509         bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
  510             sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
  511         bzero(sf.sf_uc.uc_mcontext.__spare__,
  512             sizeof(sf.sf_uc.uc_mcontext.__spare__));
  513         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  514 
  515         /* Allocate space for the signal handler context. */
  516         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  517             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  518                 sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp +
  519                     td->td_sigstk.ss_size - sizeof(struct sigframe4));
  520 #if defined(COMPAT_43)
  521                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  522 #endif
  523         } else
  524                 sfp = (struct sigframe4 *)regs->tf_esp - 1;
  525 
  526         /* Build the argument list for the signal handler. */
  527         sf.sf_signum = sig;
  528         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  529         bzero(&sf.sf_si, sizeof(sf.sf_si));
  530         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  531                 /* Signal handler installed with SA_SIGINFO. */
  532                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  533                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  534 
  535                 /* Fill in POSIX parts */
  536                 sf.sf_si.si_signo = sig;
  537                 sf.sf_si.si_code = ksi->ksi_code;
  538                 sf.sf_si.si_addr = ksi->ksi_addr;
  539         } else {
  540                 /* Old FreeBSD-style arguments. */
  541                 sf.sf_siginfo = ksi->ksi_code;
  542                 sf.sf_addr = (register_t)ksi->ksi_addr;
  543                 sf.sf_ahu.sf_handler = catcher;
  544         }
  545         mtx_unlock(&psp->ps_mtx);
  546         PROC_UNLOCK(p);
  547 
  548         /*
  549          * If we're a vm86 process, we want to save the segment registers.
  550          * We also change eflags to be our emulated eflags, not the actual
  551          * eflags.
  552          */
  553         if (regs->tf_eflags & PSL_VM) {
  554                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  555                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  556 
  557                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  558                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  559                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  560                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  561 
  562                 if (vm86->vm86_has_vme == 0)
  563                         sf.sf_uc.uc_mcontext.mc_eflags =
  564                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  565                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  566 
  567                 /*
  568                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  569                  * syscalls made by the signal handler.  This just avoids
  570                  * wasting time for our lazy fixup of such faults.  PSL_NT
  571                  * does nothing in vm86 mode, but vm86 programs can set it
  572                  * almost legitimately in probes for old cpu types.
  573                  */
  574                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  575         }
  576 
  577         /*
  578          * Copy the sigframe out to the user's stack.
  579          */
  580         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  581 #ifdef DEBUG
  582                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  583 #endif
  584                 PROC_LOCK(p);
  585                 sigexit(td, SIGILL);
  586         }
  587 
  588         regs->tf_esp = (int)sfp;
  589         regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  590             szfreebsd4_sigcode;
  591         regs->tf_eflags &= ~(PSL_T | PSL_D);
  592         regs->tf_cs = _ucodesel;
  593         regs->tf_ds = _udatasel;
  594         regs->tf_es = _udatasel;
  595         regs->tf_fs = _udatasel;
  596         regs->tf_ss = _udatasel;
  597         PROC_LOCK(p);
  598         mtx_lock(&psp->ps_mtx);
  599 }
  600 #endif  /* COMPAT_FREEBSD4 */
  601 
  602 void
  603 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  604 {
  605         struct sigframe sf, *sfp;
  606         struct proc *p;
  607         struct thread *td;
  608         struct sigacts *psp;
  609         char *sp;
  610         struct trapframe *regs;
  611         struct segment_descriptor *sdp;
  612         char *xfpusave;
  613         size_t xfpusave_len;
  614         int sig;
  615         int oonstack;
  616 
  617         td = curthread;
  618         p = td->td_proc;
  619         PROC_LOCK_ASSERT(p, MA_OWNED);
  620         sig = ksi->ksi_signo;
  621         psp = p->p_sigacts;
  622         mtx_assert(&psp->ps_mtx, MA_OWNED);
  623 #ifdef COMPAT_FREEBSD4
  624         if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
  625                 freebsd4_sendsig(catcher, ksi, mask);
  626                 return;
  627         }
  628 #endif
  629 #ifdef COMPAT_43
  630         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  631                 osendsig(catcher, ksi, mask);
  632                 return;
  633         }
  634 #endif
  635         regs = td->td_frame;
  636         oonstack = sigonstack(regs->tf_esp);
  637 
  638 #ifdef CPU_ENABLE_SSE
  639         if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
  640                 xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
  641                 xfpusave = __builtin_alloca(xfpusave_len);
  642         } else {
  643 #else
  644         {
  645 #endif
  646                 xfpusave_len = 0;
  647                 xfpusave = NULL;
  648         }
  649 
  650         /* Save user context. */
  651         bzero(&sf, sizeof(sf));
  652         sf.sf_uc.uc_sigmask = *mask;
  653         sf.sf_uc.uc_stack = td->td_sigstk;
  654         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  655             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  656         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  657         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  658         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  659         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  660         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  661         fpstate_drop(td);
  662         /*
  663          * Unconditionally fill the fsbase and gsbase into the mcontext.
  664          */
  665         sdp = &td->td_pcb->pcb_fsd;
  666         sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
  667             sdp->sd_lobase;
  668         sdp = &td->td_pcb->pcb_gsd;
  669         sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
  670             sdp->sd_lobase;
  671         bzero(sf.sf_uc.uc_mcontext.mc_spare2,
  672             sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
  673         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  674 
  675         /* Allocate space for the signal handler context. */
  676         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  677             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  678                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  679 #if defined(COMPAT_43)
  680                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  681 #endif
  682         } else
  683                 sp = (char *)regs->tf_esp - 128;
  684         if (xfpusave != NULL) {
  685                 sp -= xfpusave_len;
  686                 sp = (char *)((unsigned int)sp & ~0x3F);
  687                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  688         }
  689         sp -= sizeof(struct sigframe);
  690 
  691         /* Align to 16 bytes. */
  692         sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
  693 
  694         /* Build the argument list for the signal handler. */
  695         sf.sf_signum = sig;
  696         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  697         bzero(&sf.sf_si, sizeof(sf.sf_si));
  698         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  699                 /* Signal handler installed with SA_SIGINFO. */
  700                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  701                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  702 
  703                 /* Fill in POSIX parts */
  704                 sf.sf_si = ksi->ksi_info;
  705                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  706         } else {
  707                 /* Old FreeBSD-style arguments. */
  708                 sf.sf_siginfo = ksi->ksi_code;
  709                 sf.sf_addr = (register_t)ksi->ksi_addr;
  710                 sf.sf_ahu.sf_handler = catcher;
  711         }
  712         mtx_unlock(&psp->ps_mtx);
  713         PROC_UNLOCK(p);
  714 
  715         /*
  716          * If we're a vm86 process, we want to save the segment registers.
  717          * We also change eflags to be our emulated eflags, not the actual
  718          * eflags.
  719          */
  720         if (regs->tf_eflags & PSL_VM) {
  721                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  722                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  723 
  724                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  725                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  726                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  727                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  728 
  729                 if (vm86->vm86_has_vme == 0)
  730                         sf.sf_uc.uc_mcontext.mc_eflags =
  731                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  732                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  733 
  734                 /*
  735                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  736                  * syscalls made by the signal handler.  This just avoids
  737                  * wasting time for our lazy fixup of such faults.  PSL_NT
  738                  * does nothing in vm86 mode, but vm86 programs can set it
  739                  * almost legitimately in probes for old cpu types.
  740                  */
  741                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  742         }
  743 
  744         /*
  745          * Copy the sigframe out to the user's stack.
  746          */
  747         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  748             (xfpusave != NULL && copyout(xfpusave,
  749             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  750             != 0)) {
  751 #ifdef DEBUG
  752                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  753 #endif
  754                 PROC_LOCK(p);
  755                 sigexit(td, SIGILL);
  756         }
  757 
  758         regs->tf_esp = (int)sfp;
  759         regs->tf_eip = p->p_sysent->sv_sigcode_base;
  760         if (regs->tf_eip == 0)
  761                 regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
  762         regs->tf_eflags &= ~(PSL_T | PSL_D);
  763         regs->tf_cs = _ucodesel;
  764         regs->tf_ds = _udatasel;
  765         regs->tf_es = _udatasel;
  766         regs->tf_fs = _udatasel;
  767         regs->tf_ss = _udatasel;
  768         PROC_LOCK(p);
  769         mtx_lock(&psp->ps_mtx);
  770 }
  771 
  772 /*
  773  * System call to cleanup state after a signal
  774  * has been taken.  Reset signal mask and
  775  * stack state from context left by sendsig (above).
  776  * Return to previous pc and psl as specified by
  777  * context left by sendsig. Check carefully to
  778  * make sure that the user has not modified the
  779  * state to gain improper privileges.
  780  *
  781  * MPSAFE
  782  */
  783 #ifdef COMPAT_43
  784 int
  785 osigreturn(td, uap)
  786         struct thread *td;
  787         struct osigreturn_args /* {
  788                 struct osigcontext *sigcntxp;
  789         } */ *uap;
  790 {
  791         struct osigcontext sc;
  792         struct trapframe *regs;
  793         struct osigcontext *scp;
  794         int eflags, error;
  795         ksiginfo_t ksi;
  796 
  797         regs = td->td_frame;
  798         error = copyin(uap->sigcntxp, &sc, sizeof(sc));
  799         if (error != 0)
  800                 return (error);
  801         scp = &sc;
  802         eflags = scp->sc_ps;
  803         if (eflags & PSL_VM) {
  804                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  805                 struct vm86_kernel *vm86;
  806 
  807                 /*
  808                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  809                  * set up the vm86 area, and we can't enter vm86 mode.
  810                  */
  811                 if (td->td_pcb->pcb_ext == 0)
  812                         return (EINVAL);
  813                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  814                 if (vm86->vm86_inited == 0)
  815                         return (EINVAL);
  816 
  817                 /* Go back to user mode if both flags are set. */
  818                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  819                         ksiginfo_init_trap(&ksi);
  820                         ksi.ksi_signo = SIGBUS;
  821                         ksi.ksi_code = BUS_OBJERR;
  822                         ksi.ksi_addr = (void *)regs->tf_eip;
  823                         trapsignal(td, &ksi);
  824                 }
  825 
  826                 if (vm86->vm86_has_vme) {
  827                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  828                             (eflags & VME_USERCHANGE) | PSL_VM;
  829                 } else {
  830                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  831                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  832                             (eflags & VM_USERCHANGE) | PSL_VM;
  833                 }
  834                 tf->tf_vm86_ds = scp->sc_ds;
  835                 tf->tf_vm86_es = scp->sc_es;
  836                 tf->tf_vm86_fs = scp->sc_fs;
  837                 tf->tf_vm86_gs = scp->sc_gs;
  838                 tf->tf_ds = _udatasel;
  839                 tf->tf_es = _udatasel;
  840                 tf->tf_fs = _udatasel;
  841         } else {
  842                 /*
  843                  * Don't allow users to change privileged or reserved flags.
  844                  */
  845                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  846                         return (EINVAL);
  847                 }
  848 
  849                 /*
  850                  * Don't allow users to load a valid privileged %cs.  Let the
  851                  * hardware check for invalid selectors, excess privilege in
  852                  * other selectors, invalid %eip's and invalid %esp's.
  853                  */
  854                 if (!CS_SECURE(scp->sc_cs)) {
  855                         ksiginfo_init_trap(&ksi);
  856                         ksi.ksi_signo = SIGBUS;
  857                         ksi.ksi_code = BUS_OBJERR;
  858                         ksi.ksi_trapno = T_PROTFLT;
  859                         ksi.ksi_addr = (void *)regs->tf_eip;
  860                         trapsignal(td, &ksi);
  861                         return (EINVAL);
  862                 }
  863                 regs->tf_ds = scp->sc_ds;
  864                 regs->tf_es = scp->sc_es;
  865                 regs->tf_fs = scp->sc_fs;
  866         }
  867 
  868         /* Restore remaining registers. */
  869         regs->tf_eax = scp->sc_eax;
  870         regs->tf_ebx = scp->sc_ebx;
  871         regs->tf_ecx = scp->sc_ecx;
  872         regs->tf_edx = scp->sc_edx;
  873         regs->tf_esi = scp->sc_esi;
  874         regs->tf_edi = scp->sc_edi;
  875         regs->tf_cs = scp->sc_cs;
  876         regs->tf_ss = scp->sc_ss;
  877         regs->tf_isp = scp->sc_isp;
  878         regs->tf_ebp = scp->sc_fp;
  879         regs->tf_esp = scp->sc_sp;
  880         regs->tf_eip = scp->sc_pc;
  881         regs->tf_eflags = eflags;
  882 
  883 #if defined(COMPAT_43)
  884         if (scp->sc_onstack & 1)
  885                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  886         else
  887                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  888 #endif
  889         kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
  890             SIGPROCMASK_OLD);
  891         return (EJUSTRETURN);
  892 }
  893 #endif /* COMPAT_43 */
  894 
  895 #ifdef COMPAT_FREEBSD4
  896 /*
  897  * MPSAFE
  898  */
  899 int
  900 freebsd4_sigreturn(td, uap)
  901         struct thread *td;
  902         struct freebsd4_sigreturn_args /* {
  903                 const ucontext4 *sigcntxp;
  904         } */ *uap;
  905 {
  906         struct ucontext4 uc;
  907         struct trapframe *regs;
  908         struct ucontext4 *ucp;
  909         int cs, eflags, error;
  910         ksiginfo_t ksi;
  911 
  912         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  913         if (error != 0)
  914                 return (error);
  915         ucp = &uc;
  916         regs = td->td_frame;
  917         eflags = ucp->uc_mcontext.mc_eflags;
  918         if (eflags & PSL_VM) {
  919                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  920                 struct vm86_kernel *vm86;
  921 
  922                 /*
  923                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  924                  * set up the vm86 area, and we can't enter vm86 mode.
  925                  */
  926                 if (td->td_pcb->pcb_ext == 0)
  927                         return (EINVAL);
  928                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  929                 if (vm86->vm86_inited == 0)
  930                         return (EINVAL);
  931 
  932                 /* Go back to user mode if both flags are set. */
  933                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  934                         ksiginfo_init_trap(&ksi);
  935                         ksi.ksi_signo = SIGBUS;
  936                         ksi.ksi_code = BUS_OBJERR;
  937                         ksi.ksi_addr = (void *)regs->tf_eip;
  938                         trapsignal(td, &ksi);
  939                 }
  940                 if (vm86->vm86_has_vme) {
  941                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  942                             (eflags & VME_USERCHANGE) | PSL_VM;
  943                 } else {
  944                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  945                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  946                             (eflags & VM_USERCHANGE) | PSL_VM;
  947                 }
  948                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  949                 tf->tf_eflags = eflags;
  950                 tf->tf_vm86_ds = tf->tf_ds;
  951                 tf->tf_vm86_es = tf->tf_es;
  952                 tf->tf_vm86_fs = tf->tf_fs;
  953                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  954                 tf->tf_ds = _udatasel;
  955                 tf->tf_es = _udatasel;
  956                 tf->tf_fs = _udatasel;
  957         } else {
  958                 /*
  959                  * Don't allow users to change privileged or reserved flags.
  960                  */
  961                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  962                         uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
  963                             td->td_proc->p_pid, td->td_name, eflags);
  964                         return (EINVAL);
  965                 }
  966 
  967                 /*
  968                  * Don't allow users to load a valid privileged %cs.  Let the
  969                  * hardware check for invalid selectors, excess privilege in
  970                  * other selectors, invalid %eip's and invalid %esp's.
  971                  */
  972                 cs = ucp->uc_mcontext.mc_cs;
  973                 if (!CS_SECURE(cs)) {
  974                         uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
  975                             td->td_proc->p_pid, td->td_name, cs);
  976                         ksiginfo_init_trap(&ksi);
  977                         ksi.ksi_signo = SIGBUS;
  978                         ksi.ksi_code = BUS_OBJERR;
  979                         ksi.ksi_trapno = T_PROTFLT;
  980                         ksi.ksi_addr = (void *)regs->tf_eip;
  981                         trapsignal(td, &ksi);
  982                         return (EINVAL);
  983                 }
  984 
  985                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  986         }
  987 
  988 #if defined(COMPAT_43)
  989         if (ucp->uc_mcontext.mc_onstack & 1)
  990                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  991         else
  992                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  993 #endif
  994         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  995         return (EJUSTRETURN);
  996 }
  997 #endif  /* COMPAT_FREEBSD4 */
  998 
  999 /*
 1000  * MPSAFE
 1001  */
 1002 int
 1003 sys_sigreturn(td, uap)
 1004         struct thread *td;
 1005         struct sigreturn_args /* {
 1006                 const struct __ucontext *sigcntxp;
 1007         } */ *uap;
 1008 {
 1009         ucontext_t uc;
 1010         struct proc *p;
 1011         struct trapframe *regs;
 1012         ucontext_t *ucp;
 1013         char *xfpustate;
 1014         size_t xfpustate_len;
 1015         int cs, eflags, error, ret;
 1016         ksiginfo_t ksi;
 1017 
 1018         p = td->td_proc;
 1019 
 1020         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 1021         if (error != 0)
 1022                 return (error);
 1023         ucp = &uc;
 1024         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 1025                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 1026                     td->td_name, ucp->uc_mcontext.mc_flags);
 1027                 return (EINVAL);
 1028         }
 1029         regs = td->td_frame;
 1030         eflags = ucp->uc_mcontext.mc_eflags;
 1031         if (eflags & PSL_VM) {
 1032                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 1033                 struct vm86_kernel *vm86;
 1034 
 1035                 /*
 1036                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 1037                  * set up the vm86 area, and we can't enter vm86 mode.
 1038                  */
 1039                 if (td->td_pcb->pcb_ext == 0)
 1040                         return (EINVAL);
 1041                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 1042                 if (vm86->vm86_inited == 0)
 1043                         return (EINVAL);
 1044 
 1045                 /* Go back to user mode if both flags are set. */
 1046                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 1047                         ksiginfo_init_trap(&ksi);
 1048                         ksi.ksi_signo = SIGBUS;
 1049                         ksi.ksi_code = BUS_OBJERR;
 1050                         ksi.ksi_addr = (void *)regs->tf_eip;
 1051                         trapsignal(td, &ksi);
 1052                 }
 1053 
 1054                 if (vm86->vm86_has_vme) {
 1055                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 1056                             (eflags & VME_USERCHANGE) | PSL_VM;
 1057                 } else {
 1058                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
 1059                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 1060                             (eflags & VM_USERCHANGE) | PSL_VM;
 1061                 }
 1062                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 1063                 tf->tf_eflags = eflags;
 1064                 tf->tf_vm86_ds = tf->tf_ds;
 1065                 tf->tf_vm86_es = tf->tf_es;
 1066                 tf->tf_vm86_fs = tf->tf_fs;
 1067                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 1068                 tf->tf_ds = _udatasel;
 1069                 tf->tf_es = _udatasel;
 1070                 tf->tf_fs = _udatasel;
 1071         } else {
 1072                 /*
 1073                  * Don't allow users to change privileged or reserved flags.
 1074                  */
 1075                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 1076                         uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 1077                             td->td_proc->p_pid, td->td_name, eflags);
 1078                         return (EINVAL);
 1079                 }
 1080 
 1081                 /*
 1082                  * Don't allow users to load a valid privileged %cs.  Let the
 1083                  * hardware check for invalid selectors, excess privilege in
 1084                  * other selectors, invalid %eip's and invalid %esp's.
 1085                  */
 1086                 cs = ucp->uc_mcontext.mc_cs;
 1087                 if (!CS_SECURE(cs)) {
 1088                         uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 1089                             td->td_proc->p_pid, td->td_name, cs);
 1090                         ksiginfo_init_trap(&ksi);
 1091                         ksi.ksi_signo = SIGBUS;
 1092                         ksi.ksi_code = BUS_OBJERR;
 1093                         ksi.ksi_trapno = T_PROTFLT;
 1094                         ksi.ksi_addr = (void *)regs->tf_eip;
 1095                         trapsignal(td, &ksi);
 1096                         return (EINVAL);
 1097                 }
 1098 
 1099                 if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 1100                         xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 1101                         if (xfpustate_len > cpu_max_ext_state_size -
 1102                             sizeof(union savefpu)) {
 1103                                 uprintf(
 1104                             "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 1105                                     p->p_pid, td->td_name, xfpustate_len);
 1106                                 return (EINVAL);
 1107                         }
 1108                         xfpustate = __builtin_alloca(xfpustate_len);
 1109                         error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 1110                             xfpustate, xfpustate_len);
 1111                         if (error != 0) {
 1112                                 uprintf(
 1113         "pid %d (%s): sigreturn copying xfpustate failed\n",
 1114                                     p->p_pid, td->td_name);
 1115                                 return (error);
 1116                         }
 1117                 } else {
 1118                         xfpustate = NULL;
 1119                         xfpustate_len = 0;
 1120                 }
 1121                 ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 1122                     xfpustate_len);
 1123                 if (ret != 0)
 1124                         return (ret);
 1125                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 1126         }
 1127 
 1128 #if defined(COMPAT_43)
 1129         if (ucp->uc_mcontext.mc_onstack & 1)
 1130                 td->td_sigstk.ss_flags |= SS_ONSTACK;
 1131         else
 1132                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 1133 #endif
 1134 
 1135         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 1136         return (EJUSTRETURN);
 1137 }
 1138 
 1139 /*
 1140  * Reset registers to default values on exec.
 1141  */
 1142 void
 1143 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 1144 {
 1145         struct trapframe *regs = td->td_frame;
 1146         struct pcb *pcb = td->td_pcb;
 1147 
 1148         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1149         pcb->pcb_gs = _udatasel;
 1150         load_gs(_udatasel);
 1151 
 1152         mtx_lock_spin(&dt_lock);
 1153         if (td->td_proc->p_md.md_ldt)
 1154                 user_ldt_free(td);
 1155         else
 1156                 mtx_unlock_spin(&dt_lock);
 1157   
 1158         bzero((char *)regs, sizeof(struct trapframe));
 1159         regs->tf_eip = imgp->entry_addr;
 1160         regs->tf_esp = stack;
 1161         regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 1162         regs->tf_ss = _udatasel;
 1163         regs->tf_ds = _udatasel;
 1164         regs->tf_es = _udatasel;
 1165         regs->tf_fs = _udatasel;
 1166         regs->tf_cs = _ucodesel;
 1167 
 1168         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1169         regs->tf_ebx = imgp->ps_strings;
 1170 
 1171         /*
 1172          * Reset the hardware debug registers if they were in use.
 1173          * They won't have any meaning for the newly exec'd process.  
 1174          */
 1175         if (pcb->pcb_flags & PCB_DBREGS) {
 1176                 pcb->pcb_dr0 = 0;
 1177                 pcb->pcb_dr1 = 0;
 1178                 pcb->pcb_dr2 = 0;
 1179                 pcb->pcb_dr3 = 0;
 1180                 pcb->pcb_dr6 = 0;
 1181                 pcb->pcb_dr7 = 0;
 1182                 if (pcb == curpcb) {
 1183                         /*
 1184                          * Clear the debug registers on the running
 1185                          * CPU, otherwise they will end up affecting
 1186                          * the next process we switch to.
 1187                          */
 1188                         reset_dbregs();
 1189                 }
 1190                 pcb->pcb_flags &= ~PCB_DBREGS;
 1191         }
 1192 
 1193         pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 1194 
 1195         /*
 1196          * Drop the FP state if we hold it, so that the process gets a
 1197          * clean FP state if it uses the FPU again.
 1198          */
 1199         fpstate_drop(td);
 1200 
 1201         /*
 1202          * XXX - Linux emulator
 1203          * Make sure sure edx is 0x0 on entry. Linux binaries depend
 1204          * on it.
 1205          */
 1206         td->td_retval[1] = 0;
 1207 }
 1208 
 1209 void
 1210 cpu_setregs(void)
 1211 {
 1212         unsigned int cr0;
 1213 
 1214         cr0 = rcr0();
 1215 
 1216         /*
 1217          * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 1218          *
 1219          * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 1220          * instructions.  We must set the CR0_MP bit and use the CR0_TS
 1221          * bit to control the trap, because setting the CR0_EM bit does
 1222          * not cause WAIT instructions to trap.  It's important to trap
 1223          * WAIT instructions - otherwise the "wait" variants of no-wait
 1224          * control instructions would degenerate to the "no-wait" variants
 1225          * after FP context switches but work correctly otherwise.  It's
 1226          * particularly important to trap WAITs when there is no NPX -
 1227          * otherwise the "wait" variants would always degenerate.
 1228          *
 1229          * Try setting CR0_NE to get correct error reporting on 486DX's.
 1230          * Setting it should fail or do nothing on lesser processors.
 1231          */
 1232         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 1233         load_cr0(cr0);
 1234         load_gs(_udatasel);
 1235 }
 1236 
 1237 u_long bootdev;         /* not a struct cdev *- encoding is different */
 1238 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1239         CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 1240 
 1241 static char bootmethod[16] = "BIOS";
 1242 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1243     "System firmware boot method");
 1244 
 1245 /*
 1246  * Initialize 386 and configure to run kernel
 1247  */
 1248 
 1249 /*
 1250  * Initialize segments & interrupt table
 1251  */
 1252 
 1253 int _default_ldt;
 1254 
 1255 union descriptor gdt[NGDT * MAXCPU];    /* global descriptor table */
 1256 union descriptor ldt[NLDT];             /* local descriptor table */
 1257 static struct gate_descriptor idt0[NIDT];
 1258 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1259 struct region_descriptor r_gdt, r_idt;  /* table descriptors */
 1260 struct mtx dt_lock;                     /* lock for GDT and LDT */
 1261 
 1262 static struct i386tss dblfault_tss;
 1263 static char dblfault_stack[PAGE_SIZE];
 1264 
 1265 extern  vm_offset_t     proc0kstack;
 1266 
 1267 
 1268 /*
 1269  * software prototypes -- in more palatable form.
 1270  *
 1271  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
 1272  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
 1273  */
 1274 struct soft_segment_descriptor gdt_segs[] = {
 1275 /* GNULL_SEL    0 Null Descriptor */
 1276 {       .ssd_base = 0x0,
 1277         .ssd_limit = 0x0,
 1278         .ssd_type = 0,
 1279         .ssd_dpl = SEL_KPL,
 1280         .ssd_p = 0,
 1281         .ssd_xx = 0, .ssd_xx1 = 0,
 1282         .ssd_def32 = 0,
 1283         .ssd_gran = 0           },
 1284 /* GPRIV_SEL    1 SMP Per-Processor Private Data Descriptor */
 1285 {       .ssd_base = 0x0,
 1286         .ssd_limit = 0xfffff,
 1287         .ssd_type = SDT_MEMRWA,
 1288         .ssd_dpl = SEL_KPL,
 1289         .ssd_p = 1,
 1290         .ssd_xx = 0, .ssd_xx1 = 0,
 1291         .ssd_def32 = 1,
 1292         .ssd_gran = 1           },
 1293 /* GUFS_SEL     2 %fs Descriptor for user */
 1294 {       .ssd_base = 0x0,
 1295         .ssd_limit = 0xfffff,
 1296         .ssd_type = SDT_MEMRWA,
 1297         .ssd_dpl = SEL_UPL,
 1298         .ssd_p = 1,
 1299         .ssd_xx = 0, .ssd_xx1 = 0,
 1300         .ssd_def32 = 1,
 1301         .ssd_gran = 1           },
 1302 /* GUGS_SEL     3 %gs Descriptor for user */
 1303 {       .ssd_base = 0x0,
 1304         .ssd_limit = 0xfffff,
 1305         .ssd_type = SDT_MEMRWA,
 1306         .ssd_dpl = SEL_UPL,
 1307         .ssd_p = 1,
 1308         .ssd_xx = 0, .ssd_xx1 = 0,
 1309         .ssd_def32 = 1,
 1310         .ssd_gran = 1           },
 1311 /* GCODE_SEL    4 Code Descriptor for kernel */
 1312 {       .ssd_base = 0x0,
 1313         .ssd_limit = 0xfffff,
 1314         .ssd_type = SDT_MEMERA,
 1315         .ssd_dpl = SEL_KPL,
 1316         .ssd_p = 1,
 1317         .ssd_xx = 0, .ssd_xx1 = 0,
 1318         .ssd_def32 = 1,
 1319         .ssd_gran = 1           },
 1320 /* GDATA_SEL    5 Data Descriptor for kernel */
 1321 {       .ssd_base = 0x0,
 1322         .ssd_limit = 0xfffff,
 1323         .ssd_type = SDT_MEMRWA,
 1324         .ssd_dpl = SEL_KPL,
 1325         .ssd_p = 1,
 1326         .ssd_xx = 0, .ssd_xx1 = 0,
 1327         .ssd_def32 = 1,
 1328         .ssd_gran = 1           },
 1329 /* GUCODE_SEL   6 Code Descriptor for user */
 1330 {       .ssd_base = 0x0,
 1331         .ssd_limit = 0xfffff,
 1332         .ssd_type = SDT_MEMERA,
 1333         .ssd_dpl = SEL_UPL,
 1334         .ssd_p = 1,
 1335         .ssd_xx = 0, .ssd_xx1 = 0,
 1336         .ssd_def32 = 1,
 1337         .ssd_gran = 1           },
 1338 /* GUDATA_SEL   7 Data Descriptor for user */
 1339 {       .ssd_base = 0x0,
 1340         .ssd_limit = 0xfffff,
 1341         .ssd_type = SDT_MEMRWA,
 1342         .ssd_dpl = SEL_UPL,
 1343         .ssd_p = 1,
 1344         .ssd_xx = 0, .ssd_xx1 = 0,
 1345         .ssd_def32 = 1,
 1346         .ssd_gran = 1           },
 1347 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1348 {       .ssd_base = 0x400,
 1349         .ssd_limit = 0xfffff,
 1350         .ssd_type = SDT_MEMRWA,
 1351         .ssd_dpl = SEL_KPL,
 1352         .ssd_p = 1,
 1353         .ssd_xx = 0, .ssd_xx1 = 0,
 1354         .ssd_def32 = 1,
 1355         .ssd_gran = 1           },
 1356 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
 1357 {
 1358         .ssd_base = 0x0,
 1359         .ssd_limit = sizeof(struct i386tss)-1,
 1360         .ssd_type = SDT_SYS386TSS,
 1361         .ssd_dpl = 0,
 1362         .ssd_p = 1,
 1363         .ssd_xx = 0, .ssd_xx1 = 0,
 1364         .ssd_def32 = 0,
 1365         .ssd_gran = 0           },
 1366 /* GLDT_SEL     10 LDT Descriptor */
 1367 {       .ssd_base = (int) ldt,
 1368         .ssd_limit = sizeof(ldt)-1,
 1369         .ssd_type = SDT_SYSLDT,
 1370         .ssd_dpl = SEL_UPL,
 1371         .ssd_p = 1,
 1372         .ssd_xx = 0, .ssd_xx1 = 0,
 1373         .ssd_def32 = 0,
 1374         .ssd_gran = 0           },
 1375 /* GUSERLDT_SEL 11 User LDT Descriptor per process */
 1376 {       .ssd_base = (int) ldt,
 1377         .ssd_limit = (512 * sizeof(union descriptor)-1),
 1378         .ssd_type = SDT_SYSLDT,
 1379         .ssd_dpl = 0,
 1380         .ssd_p = 1,
 1381         .ssd_xx = 0, .ssd_xx1 = 0,
 1382         .ssd_def32 = 0,
 1383         .ssd_gran = 0           },
 1384 /* GPANIC_SEL   12 Panic Tss Descriptor */
 1385 {       .ssd_base = (int) &dblfault_tss,
 1386         .ssd_limit = sizeof(struct i386tss)-1,
 1387         .ssd_type = SDT_SYS386TSS,
 1388         .ssd_dpl = 0,
 1389         .ssd_p = 1,
 1390         .ssd_xx = 0, .ssd_xx1 = 0,
 1391         .ssd_def32 = 0,
 1392         .ssd_gran = 0           },
 1393 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 1394 {       .ssd_base = 0,
 1395         .ssd_limit = 0xfffff,
 1396         .ssd_type = SDT_MEMERA,
 1397         .ssd_dpl = 0,
 1398         .ssd_p = 1,
 1399         .ssd_xx = 0, .ssd_xx1 = 0,
 1400         .ssd_def32 = 0,
 1401         .ssd_gran = 1           },
 1402 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 1403 {       .ssd_base = 0,
 1404         .ssd_limit = 0xfffff,
 1405         .ssd_type = SDT_MEMERA,
 1406         .ssd_dpl = 0,
 1407         .ssd_p = 1,
 1408         .ssd_xx = 0, .ssd_xx1 = 0,
 1409         .ssd_def32 = 0,
 1410         .ssd_gran = 1           },
 1411 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 1412 {       .ssd_base = 0,
 1413         .ssd_limit = 0xfffff,
 1414         .ssd_type = SDT_MEMRWA,
 1415         .ssd_dpl = 0,
 1416         .ssd_p = 1,
 1417         .ssd_xx = 0, .ssd_xx1 = 0,
 1418         .ssd_def32 = 1,
 1419         .ssd_gran = 1           },
 1420 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 1421 {       .ssd_base = 0,
 1422         .ssd_limit = 0xfffff,
 1423         .ssd_type = SDT_MEMRWA,
 1424         .ssd_dpl = 0,
 1425         .ssd_p = 1,
 1426         .ssd_xx = 0, .ssd_xx1 = 0,
 1427         .ssd_def32 = 0,
 1428         .ssd_gran = 1           },
 1429 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 1430 {       .ssd_base = 0,
 1431         .ssd_limit = 0xfffff,
 1432         .ssd_type = SDT_MEMRWA,
 1433         .ssd_dpl = 0,
 1434         .ssd_p = 1,
 1435         .ssd_xx = 0, .ssd_xx1 = 0,
 1436         .ssd_def32 = 0,
 1437         .ssd_gran = 1           },
 1438 /* GNDIS_SEL    18 NDIS Descriptor */
 1439 {       .ssd_base = 0x0,
 1440         .ssd_limit = 0x0,
 1441         .ssd_type = 0,
 1442         .ssd_dpl = 0,
 1443         .ssd_p = 0,
 1444         .ssd_xx = 0, .ssd_xx1 = 0,
 1445         .ssd_def32 = 0,
 1446         .ssd_gran = 0           },
 1447 };
 1448 
 1449 static struct soft_segment_descriptor ldt_segs[] = {
 1450         /* Null Descriptor - overwritten by call gate */
 1451 {       .ssd_base = 0x0,
 1452         .ssd_limit = 0x0,
 1453         .ssd_type = 0,
 1454         .ssd_dpl = 0,
 1455         .ssd_p = 0,
 1456         .ssd_xx = 0, .ssd_xx1 = 0,
 1457         .ssd_def32 = 0,
 1458         .ssd_gran = 0           },
 1459         /* Null Descriptor - overwritten by call gate */
 1460 {       .ssd_base = 0x0,
 1461         .ssd_limit = 0x0,
 1462         .ssd_type = 0,
 1463         .ssd_dpl = 0,
 1464         .ssd_p = 0,
 1465         .ssd_xx = 0, .ssd_xx1 = 0,
 1466         .ssd_def32 = 0,
 1467         .ssd_gran = 0           },
 1468         /* Null Descriptor - overwritten by call gate */
 1469 {       .ssd_base = 0x0,
 1470         .ssd_limit = 0x0,
 1471         .ssd_type = 0,
 1472         .ssd_dpl = 0,
 1473         .ssd_p = 0,
 1474         .ssd_xx = 0, .ssd_xx1 = 0,
 1475         .ssd_def32 = 0,
 1476         .ssd_gran = 0           },
 1477         /* Code Descriptor for user */
 1478 {       .ssd_base = 0x0,
 1479         .ssd_limit = 0xfffff,
 1480         .ssd_type = SDT_MEMERA,
 1481         .ssd_dpl = SEL_UPL,
 1482         .ssd_p = 1,
 1483         .ssd_xx = 0, .ssd_xx1 = 0,
 1484         .ssd_def32 = 1,
 1485         .ssd_gran = 1           },
 1486         /* Null Descriptor - overwritten by call gate */
 1487 {       .ssd_base = 0x0,
 1488         .ssd_limit = 0x0,
 1489         .ssd_type = 0,
 1490         .ssd_dpl = 0,
 1491         .ssd_p = 0,
 1492         .ssd_xx = 0, .ssd_xx1 = 0,
 1493         .ssd_def32 = 0,
 1494         .ssd_gran = 0           },
 1495         /* Data Descriptor for user */
 1496 {       .ssd_base = 0x0,
 1497         .ssd_limit = 0xfffff,
 1498         .ssd_type = SDT_MEMRWA,
 1499         .ssd_dpl = SEL_UPL,
 1500         .ssd_p = 1,
 1501         .ssd_xx = 0, .ssd_xx1 = 0,
 1502         .ssd_def32 = 1,
 1503         .ssd_gran = 1           },
 1504 };
 1505 
 1506 void
 1507 setidt(idx, func, typ, dpl, selec)
 1508         int idx;
 1509         inthand_t *func;
 1510         int typ;
 1511         int dpl;
 1512         int selec;
 1513 {
 1514         struct gate_descriptor *ip;
 1515 
 1516         ip = idt + idx;
 1517         ip->gd_looffset = (int)func;
 1518         ip->gd_selector = selec;
 1519         ip->gd_stkcpy = 0;
 1520         ip->gd_xx = 0;
 1521         ip->gd_type = typ;
 1522         ip->gd_dpl = dpl;
 1523         ip->gd_p = 1;
 1524         ip->gd_hioffset = ((int)func)>>16 ;
 1525 }
 1526 
 1527 extern inthand_t
 1528         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1529         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1530         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1531         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1532         IDTVEC(xmm),
 1533 #ifdef KDTRACE_HOOKS
 1534         IDTVEC(dtrace_ret),
 1535 #endif
 1536 #ifdef XENHVM
 1537         IDTVEC(xen_intr_upcall),
 1538 #endif
 1539         IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 1540 
 1541 #ifdef DDB
 1542 /*
 1543  * Display the index and function name of any IDT entries that don't use
 1544  * the default 'rsvd' entry point.
 1545  */
 1546 DB_SHOW_COMMAND(idt, db_show_idt)
 1547 {
 1548         struct gate_descriptor *ip;
 1549         int idx;
 1550         uintptr_t func;
 1551 
 1552         ip = idt;
 1553         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 1554                 func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 1555                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
 1556                         db_printf("%3d\t", idx);
 1557                         db_printsym(func, DB_STGY_PROC);
 1558                         db_printf("\n");
 1559                 }
 1560                 ip++;
 1561         }
 1562 }
 1563 
 1564 /* Show privileged registers. */
 1565 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 1566 {
 1567         uint64_t idtr, gdtr;
 1568 
 1569         idtr = ridt();
 1570         db_printf("idtr\t0x%08x/%04x\n",
 1571             (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 1572         gdtr = rgdt();
 1573         db_printf("gdtr\t0x%08x/%04x\n",
 1574             (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 1575         db_printf("ldtr\t0x%04x\n", rldt());
 1576         db_printf("tr\t0x%04x\n", rtr());
 1577         db_printf("cr0\t0x%08x\n", rcr0());
 1578         db_printf("cr2\t0x%08x\n", rcr2());
 1579         db_printf("cr3\t0x%08x\n", rcr3());
 1580         db_printf("cr4\t0x%08x\n", rcr4());
 1581         if (rcr4() & CR4_XSAVE)
 1582                 db_printf("xcr0\t0x%016llx\n", rxcr(0));
 1583         if (amd_feature & (AMDID_NX | AMDID_LM))
 1584                 db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
 1585         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 1586                 db_printf("FEATURES_CTL\t0x%016llx\n",
 1587                     rdmsr(MSR_IA32_FEATURE_CONTROL));
 1588         if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
 1589             cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6)
 1590                 db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
 1591         if (cpu_feature & CPUID_PAT)
 1592                 db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 1593 }
 1594 
 1595 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 1596 {
 1597 
 1598         db_printf("dr0\t0x%08x\n", rdr0());
 1599         db_printf("dr1\t0x%08x\n", rdr1());
 1600         db_printf("dr2\t0x%08x\n", rdr2());
 1601         db_printf("dr3\t0x%08x\n", rdr3());
 1602         db_printf("dr6\t0x%08x\n", rdr6());
 1603         db_printf("dr7\t0x%08x\n", rdr7());     
 1604 }
 1605 #endif
 1606 
 1607 void
 1608 sdtossd(sd, ssd)
 1609         struct segment_descriptor *sd;
 1610         struct soft_segment_descriptor *ssd;
 1611 {
 1612         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1613         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1614         ssd->ssd_type  = sd->sd_type;
 1615         ssd->ssd_dpl   = sd->sd_dpl;
 1616         ssd->ssd_p     = sd->sd_p;
 1617         ssd->ssd_def32 = sd->sd_def32;
 1618         ssd->ssd_gran  = sd->sd_gran;
 1619 }
 1620 
 1621 #if !defined(PC98)
 1622 static int
 1623 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
 1624     int *physmap_idxp)
 1625 {
 1626         int i, insert_idx, physmap_idx;
 1627 
 1628         physmap_idx = *physmap_idxp;
 1629         
 1630         if (length == 0)
 1631                 return (1);
 1632 
 1633 #ifndef PAE
 1634         if (base > 0xffffffff) {
 1635                 printf("%uK of memory above 4GB ignored\n",
 1636                     (u_int)(length / 1024));
 1637                 return (1);
 1638         }
 1639 #endif
 1640 
 1641         /*
 1642          * Find insertion point while checking for overlap.  Start off by
 1643          * assuming the new entry will be added to the end.
 1644          */
 1645         insert_idx = physmap_idx + 2;
 1646         for (i = 0; i <= physmap_idx; i += 2) {
 1647                 if (base < physmap[i + 1]) {
 1648                         if (base + length <= physmap[i]) {
 1649                                 insert_idx = i;
 1650                                 break;
 1651                         }
 1652                         if (boothowto & RB_VERBOSE)
 1653                                 printf(
 1654                     "Overlapping memory regions, ignoring second region\n");
 1655                         return (1);
 1656                 }
 1657         }
 1658 
 1659         /* See if we can prepend to the next entry. */
 1660         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1661                 physmap[insert_idx] = base;
 1662                 return (1);
 1663         }
 1664 
 1665         /* See if we can append to the previous entry. */
 1666         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1667                 physmap[insert_idx - 1] += length;
 1668                 return (1);
 1669         }
 1670 
 1671         physmap_idx += 2;
 1672         *physmap_idxp = physmap_idx;
 1673         if (physmap_idx == PHYSMAP_SIZE) {
 1674                 printf(
 1675                 "Too many segments in the physical address map, giving up\n");
 1676                 return (0);
 1677         }
 1678 
 1679         /*
 1680          * Move the last 'N' entries down to make room for the new
 1681          * entry if needed.
 1682          */
 1683         for (i = physmap_idx; i > insert_idx; i -= 2) {
 1684                 physmap[i] = physmap[i - 2];
 1685                 physmap[i + 1] = physmap[i - 1];
 1686         }
 1687 
 1688         /* Insert the new entry. */
 1689         physmap[insert_idx] = base;
 1690         physmap[insert_idx + 1] = base + length;
 1691         return (1);
 1692 }
 1693 
 1694 static int
 1695 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 1696 {
 1697         if (boothowto & RB_VERBOSE)
 1698                 printf("SMAP type=%02x base=%016llx len=%016llx\n",
 1699                     smap->type, smap->base, smap->length);
 1700 
 1701         if (smap->type != SMAP_TYPE_MEMORY)
 1702                 return (1);
 1703 
 1704         return (add_physmap_entry(smap->base, smap->length, physmap,
 1705             physmap_idxp));
 1706 }
 1707 
 1708 static void
 1709 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
 1710     int *physmap_idxp)
 1711 {
 1712         struct bios_smap *smap, *smapend;
 1713         u_int32_t smapsize;
 1714         /*
 1715          * Memory map from INT 15:E820.
 1716          *
 1717          * subr_module.c says:
 1718          * "Consumer may safely assume that size value precedes data."
 1719          * ie: an int32_t immediately precedes SMAP.
 1720          */
 1721         smapsize = *((u_int32_t *)smapbase - 1);
 1722         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1723 
 1724         for (smap = smapbase; smap < smapend; smap++)
 1725                 if (!add_smap_entry(smap, physmap, physmap_idxp))
 1726                         break;
 1727 }
 1728 #endif /* !PC98 */
 1729 
 1730 static void
 1731 basemem_setup(void)
 1732 {
 1733         vm_paddr_t pa;
 1734         pt_entry_t *pte;
 1735         int i;
 1736 
 1737         if (basemem > 640) {
 1738                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1739                         basemem);
 1740                 basemem = 640;
 1741         }
 1742 
 1743         /*
 1744          * XXX if biosbasemem is now < 640, there is a `hole'
 1745          * between the end of base memory and the start of
 1746          * ISA memory.  The hole may be empty or it may
 1747          * contain BIOS code or data.  Map it read/write so
 1748          * that the BIOS can write to it.  (Memory from 0 to
 1749          * the physical end of the kernel is mapped read-only
 1750          * to begin with and then parts of it are remapped.
 1751          * The parts that aren't remapped form holes that
 1752          * remain read-only and are unused by the kernel.
 1753          * The base memory area is below the physical end of
 1754          * the kernel and right now forms a read-only hole.
 1755          * The part of it from PAGE_SIZE to
 1756          * (trunc_page(biosbasemem * 1024) - 1) will be
 1757          * remapped and used by the kernel later.)
 1758          *
 1759          * This code is similar to the code used in
 1760          * pmap_mapdev, but since no memory needs to be
 1761          * allocated we simply change the mapping.
 1762          */
 1763         for (pa = trunc_page(basemem * 1024);
 1764              pa < ISA_HOLE_START; pa += PAGE_SIZE)
 1765                 pmap_kenter(KERNBASE + pa, pa);
 1766 
 1767         /*
 1768          * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 1769          * the vm86 page table so that vm86 can scribble on them using
 1770          * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 1771          * page 0, at least as initialized here?
 1772          */
 1773         pte = (pt_entry_t *)vm86paddr;
 1774         for (i = basemem / 4; i < 160; i++)
 1775                 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1776 }
 1777 
 1778 /*
 1779  * Populate the (physmap) array with base/bound pairs describing the
 1780  * available physical memory in the system, then test this memory and
 1781  * build the phys_avail array describing the actually-available memory.
 1782  *
 1783  * If we cannot accurately determine the physical memory map, then use
 1784  * value from the 0xE801 call, and failing that, the RTC.
 1785  *
 1786  * Total memory size may be set by the kernel environment variable
 1787  * hw.physmem or the compile-time define MAXMEM.
 1788  *
 1789  * XXX first should be vm_paddr_t.
 1790  */
 1791 #ifdef PC98
 1792 static void
 1793 getmemsize(int first)
 1794 {
 1795         int off, physmap_idx, pa_indx, da_indx;
 1796         u_long physmem_tunable, memtest;
 1797         vm_paddr_t physmap[PHYSMAP_SIZE];
 1798         pt_entry_t *pte;
 1799         quad_t dcons_addr, dcons_size;
 1800         int i;
 1801         int pg_n;
 1802         u_int extmem;
 1803         u_int under16;
 1804         vm_paddr_t pa;
 1805 
 1806         bzero(physmap, sizeof(physmap));
 1807 
 1808         /* XXX - some of EPSON machines can't use PG_N */
 1809         pg_n = PG_N;
 1810         if (pc98_machine_type & M_EPSON_PC98) {
 1811                 switch (epson_machine_id) {
 1812 #ifdef WB_CACHE
 1813                 default:
 1814 #endif
 1815                 case EPSON_PC486_HX:
 1816                 case EPSON_PC486_HG:
 1817                 case EPSON_PC486_HA:
 1818                         pg_n = 0;
 1819                         break;
 1820                 }
 1821         }
 1822 
 1823         under16 = pc98_getmemsize(&basemem, &extmem);
 1824         basemem_setup();
 1825 
 1826         physmap[0] = 0;
 1827         physmap[1] = basemem * 1024;
 1828         physmap_idx = 2;
 1829         physmap[physmap_idx] = 0x100000;
 1830         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1831 
 1832         /*
 1833          * Now, physmap contains a map of physical memory.
 1834          */
 1835 
 1836 #ifdef SMP
 1837         /* make hole for AP bootstrap code */
 1838         physmap[1] = mp_bootaddress(physmap[1]);
 1839 #endif
 1840 
 1841         /*
 1842          * Maxmem isn't the "maximum memory", it's one larger than the
 1843          * highest page of the physical address space.  It should be
 1844          * called something like "Maxphyspage".  We may adjust this 
 1845          * based on ``hw.physmem'' and the results of the memory test.
 1846          */
 1847         Maxmem = atop(physmap[physmap_idx + 1]);
 1848 
 1849 #ifdef MAXMEM
 1850         Maxmem = MAXMEM / 4;
 1851 #endif
 1852 
 1853         if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 1854                 Maxmem = atop(physmem_tunable);
 1855 
 1856         /*
 1857          * By default keep the memtest enabled.  Use a general name so that
 1858          * one could eventually do more with the code than just disable it.
 1859          */
 1860         memtest = 1;
 1861         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 1862 
 1863         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1864             (boothowto & RB_VERBOSE))
 1865                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1866 
 1867         /*
 1868          * If Maxmem has been increased beyond what the system has detected,
 1869          * extend the last memory segment to the new limit.
 1870          */ 
 1871         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 1872                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 1873 
 1874         /*
 1875          * We need to divide chunk if Maxmem is larger than 16MB and
 1876          * under 16MB area is not full of memory.
 1877          * (1) system area (15-16MB region) is cut off
 1878          * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY")
 1879          */
 1880         if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
 1881                 /* 15M - 16M region is cut off, so need to divide chunk */
 1882                 physmap[physmap_idx + 1] = under16 * 1024;
 1883                 physmap_idx += 2;
 1884                 physmap[physmap_idx] = 0x1000000;
 1885                 physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
 1886         }
 1887 
 1888         /* call pmap initialization to make new kernel address space */
 1889         pmap_bootstrap(first);
 1890 
 1891         /*
 1892          * Size up each available chunk of physical memory.
 1893          */
 1894         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 1895         pa_indx = 0;
 1896         da_indx = 1;
 1897         phys_avail[pa_indx++] = physmap[0];
 1898         phys_avail[pa_indx] = physmap[0];
 1899         dump_avail[da_indx] = physmap[0];
 1900         pte = CMAP3;
 1901 
 1902         /*
 1903          * Get dcons buffer address
 1904          */
 1905         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 1906             getenv_quad("dcons.size", &dcons_size) == 0)
 1907                 dcons_addr = 0;
 1908 
 1909         /*
 1910          * physmap is in bytes, so when converting to page boundaries,
 1911          * round up the start address and round down the end address.
 1912          */
 1913         for (i = 0; i <= physmap_idx; i += 2) {
 1914                 vm_paddr_t end;
 1915 
 1916                 end = ptoa((vm_paddr_t)Maxmem);
 1917                 if (physmap[i + 1] < end)
 1918                         end = trunc_page(physmap[i + 1]);
 1919                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1920                         int tmp, page_bad, full;
 1921                         int *ptr = (int *)CADDR3;
 1922 
 1923                         full = FALSE;
 1924                         /*
 1925                          * block out kernel memory as not available.
 1926                          */
 1927                         if (pa >= KERNLOAD && pa < first)
 1928                                 goto do_dump_avail;
 1929 
 1930                         /*
 1931                          * block out dcons buffer
 1932                          */
 1933                         if (dcons_addr > 0
 1934                             && pa >= trunc_page(dcons_addr)
 1935                             && pa < dcons_addr + dcons_size)
 1936                                 goto do_dump_avail;
 1937 
 1938                         page_bad = FALSE;
 1939                         if (memtest == 0)
 1940                                 goto skip_memtest;
 1941 
 1942                         /*
 1943                          * map page into kernel: valid, read/write,non-cacheable
 1944                          */
 1945                         *pte = pa | PG_V | PG_RW | pg_n;
 1946                         invltlb();
 1947 
 1948                         tmp = *(int *)ptr;
 1949                         /*
 1950                          * Test for alternating 1's and 0's
 1951                          */
 1952                         *(volatile int *)ptr = 0xaaaaaaaa;
 1953                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 1954                                 page_bad = TRUE;
 1955                         /*
 1956                          * Test for alternating 0's and 1's
 1957                          */
 1958                         *(volatile int *)ptr = 0x55555555;
 1959                         if (*(volatile int *)ptr != 0x55555555)
 1960                                 page_bad = TRUE;
 1961                         /*
 1962                          * Test for all 1's
 1963                          */
 1964                         *(volatile int *)ptr = 0xffffffff;
 1965                         if (*(volatile int *)ptr != 0xffffffff)
 1966                                 page_bad = TRUE;
 1967                         /*
 1968                          * Test for all 0's
 1969                          */
 1970                         *(volatile int *)ptr = 0x0;
 1971                         if (*(volatile int *)ptr != 0x0)
 1972                                 page_bad = TRUE;
 1973                         /*
 1974                          * Restore original value.
 1975                          */
 1976                         *(int *)ptr = tmp;
 1977 
 1978 skip_memtest:
 1979                         /*
 1980                          * Adjust array of valid/good pages.
 1981                          */
 1982                         if (page_bad == TRUE)
 1983                                 continue;
 1984                         /*
 1985                          * If this good page is a continuation of the
 1986                          * previous set of good pages, then just increase
 1987                          * the end pointer. Otherwise start a new chunk.
 1988                          * Note that "end" points one higher than end,
 1989                          * making the range >= start and < end.
 1990                          * If we're also doing a speculative memory
 1991                          * test and we at or past the end, bump up Maxmem
 1992                          * so that we keep going. The first bad page
 1993                          * will terminate the loop.
 1994                          */
 1995                         if (phys_avail[pa_indx] == pa) {
 1996                                 phys_avail[pa_indx] += PAGE_SIZE;
 1997                         } else {
 1998                                 pa_indx++;
 1999                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 2000                                         printf(
 2001                 "Too many holes in the physical address space, giving up\n");
 2002                                         pa_indx--;
 2003                                         full = TRUE;
 2004                                         goto do_dump_avail;
 2005                                 }
 2006                                 phys_avail[pa_indx++] = pa;     /* start */
 2007                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 2008                         }
 2009                         physmem++;
 2010 do_dump_avail:
 2011                         if (dump_avail[da_indx] == pa) {
 2012                                 dump_avail[da_indx] += PAGE_SIZE;
 2013                         } else {
 2014                                 da_indx++;
 2015                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 2016                                         da_indx--;
 2017                                         goto do_next;
 2018                                 }
 2019                                 dump_avail[da_indx++] = pa;     /* start */
 2020                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 2021                         }
 2022 do_next:
 2023                         if (full)
 2024                                 break;
 2025                 }
 2026         }
 2027         *pte = 0;
 2028         invltlb();
 2029         
 2030         /*
 2031          * XXX
 2032          * The last chunk must contain at least one page plus the message
 2033          * buffer to avoid complicating other code (message buffer address
 2034          * calculation, etc.).
 2035          */
 2036         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 2037             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 2038                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 2039                 phys_avail[pa_indx--] = 0;
 2040                 phys_avail[pa_indx--] = 0;
 2041         }
 2042 
 2043         Maxmem = atop(phys_avail[pa_indx]);
 2044 
 2045         /* Trim off space for the message buffer. */
 2046         phys_avail[pa_indx] -= round_page(msgbufsize);
 2047 
 2048         /* Map the message buffer. */
 2049         for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 2050                 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 2051                     off);
 2052 }
 2053 #else /* PC98 */
 2054 static void
 2055 getmemsize(int first)
 2056 {
 2057         int has_smap, off, physmap_idx, pa_indx, da_indx;
 2058         u_long memtest;
 2059         vm_paddr_t physmap[PHYSMAP_SIZE];
 2060         pt_entry_t *pte;
 2061         quad_t dcons_addr, dcons_size, physmem_tunable;
 2062         int hasbrokenint12, i, res;
 2063         u_int extmem;
 2064         struct vm86frame vmf;
 2065         struct vm86context vmc;
 2066         vm_paddr_t pa;
 2067         struct bios_smap *smap, *smapbase;
 2068         caddr_t kmdp;
 2069 
 2070         has_smap = 0;
 2071 #ifdef XBOX
 2072         if (arch_i386_is_xbox) {
 2073                 /*
 2074                  * We queried the memory size before, so chop off 4MB for
 2075                  * the framebuffer and inform the OS of this.
 2076                  */
 2077                 physmap[0] = 0;
 2078                 physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE;
 2079                 physmap_idx = 0;
 2080                 goto physmap_done;
 2081         }
 2082 #endif
 2083         bzero(&vmf, sizeof(vmf));
 2084         bzero(physmap, sizeof(physmap));
 2085         basemem = 0;
 2086 
 2087         /*
 2088          * Check if the loader supplied an SMAP memory map.  If so,
 2089          * use that and do not make any VM86 calls.
 2090          */
 2091         physmap_idx = 0;
 2092         smapbase = NULL;
 2093         kmdp = preload_search_by_type("elf kernel");
 2094         if (kmdp == NULL)
 2095                 kmdp = preload_search_by_type("elf32 kernel");
 2096         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 2097             MODINFO_METADATA | MODINFOMD_SMAP);
 2098         if (smapbase != NULL) {
 2099                 add_smap_entries(smapbase, physmap, &physmap_idx);
 2100                 has_smap = 1;
 2101                 goto have_smap;
 2102         }
 2103 
 2104         /*
 2105          * Some newer BIOSes have a broken INT 12H implementation
 2106          * which causes a kernel panic immediately.  In this case, we
 2107          * need use the SMAP to determine the base memory size.
 2108          */
 2109         hasbrokenint12 = 0;
 2110         TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 2111         if (hasbrokenint12 == 0) {
 2112                 /* Use INT12 to determine base memory size. */
 2113                 vm86_intcall(0x12, &vmf);
 2114                 basemem = vmf.vmf_ax;
 2115                 basemem_setup();
 2116         }
 2117 
 2118         /*
 2119          * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 2120          * the kernel page table so we can use it as a buffer.  The
 2121          * kernel will unmap this page later.
 2122          */
 2123         pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 2124         vmc.npages = 0;
 2125         smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 2126         res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 2127         KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 2128 
 2129         vmf.vmf_ebx = 0;
 2130         do {
 2131                 vmf.vmf_eax = 0xE820;
 2132                 vmf.vmf_edx = SMAP_SIG;
 2133                 vmf.vmf_ecx = sizeof(struct bios_smap);
 2134                 i = vm86_datacall(0x15, &vmf, &vmc);
 2135                 if (i || vmf.vmf_eax != SMAP_SIG)
 2136                         break;
 2137                 has_smap = 1;
 2138                 if (!add_smap_entry(smap, physmap, &physmap_idx))
 2139                         break;
 2140         } while (vmf.vmf_ebx != 0);
 2141 
 2142 have_smap:
 2143         /*
 2144          * If we didn't fetch the "base memory" size from INT12,
 2145          * figure it out from the SMAP (or just guess).
 2146          */
 2147         if (basemem == 0) {
 2148                 for (i = 0; i <= physmap_idx; i += 2) {
 2149                         if (physmap[i] == 0x00000000) {
 2150                                 basemem = physmap[i + 1] / 1024;
 2151                                 break;
 2152                         }
 2153                 }
 2154 
 2155                 /* XXX: If we couldn't find basemem from SMAP, just guess. */
 2156                 if (basemem == 0)
 2157                         basemem = 640;
 2158                 basemem_setup();
 2159         }
 2160 
 2161         if (physmap[1] != 0)
 2162                 goto physmap_done;
 2163 
 2164         /*
 2165          * If we failed to find an SMAP, figure out the extended
 2166          * memory size.  We will then build a simple memory map with
 2167          * two segments, one for "base memory" and the second for
 2168          * "extended memory".  Note that "extended memory" starts at a
 2169          * physical address of 1MB and that both basemem and extmem
 2170          * are in units of 1KB.
 2171          *
 2172          * First, try to fetch the extended memory size via INT 15:E801.
 2173          */
 2174         vmf.vmf_ax = 0xE801;
 2175         if (vm86_intcall(0x15, &vmf) == 0) {
 2176                 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 2177         } else {
 2178                 /*
 2179                  * If INT15:E801 fails, this is our last ditch effort
 2180                  * to determine the extended memory size.  Currently
 2181                  * we prefer the RTC value over INT15:88.
 2182                  */
 2183 #if 0
 2184                 vmf.vmf_ah = 0x88;
 2185                 vm86_intcall(0x15, &vmf);
 2186                 extmem = vmf.vmf_ax;
 2187 #else
 2188                 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 2189 #endif
 2190         }
 2191 
 2192         /*
 2193          * Special hack for chipsets that still remap the 384k hole when
 2194          * there's 16MB of memory - this really confuses people that
 2195          * are trying to use bus mastering ISA controllers with the
 2196          * "16MB limit"; they only have 16MB, but the remapping puts
 2197          * them beyond the limit.
 2198          *
 2199          * If extended memory is between 15-16MB (16-17MB phys address range),
 2200          *      chop it to 15MB.
 2201          */
 2202         if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 2203                 extmem = 15 * 1024;
 2204 
 2205         physmap[0] = 0;
 2206         physmap[1] = basemem * 1024;
 2207         physmap_idx = 2;
 2208         physmap[physmap_idx] = 0x100000;
 2209         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 2210 
 2211 physmap_done:
 2212         /*
 2213          * Now, physmap contains a map of physical memory.
 2214          */
 2215 
 2216 #ifdef SMP
 2217         /* make hole for AP bootstrap code */
 2218         physmap[1] = mp_bootaddress(physmap[1]);
 2219 #endif
 2220 
 2221         /*
 2222          * Maxmem isn't the "maximum memory", it's one larger than the
 2223          * highest page of the physical address space.  It should be
 2224          * called something like "Maxphyspage".  We may adjust this 
 2225          * based on ``hw.physmem'' and the results of the memory test.
 2226          */
 2227         Maxmem = atop(physmap[physmap_idx + 1]);
 2228 
 2229 #ifdef MAXMEM
 2230         Maxmem = MAXMEM / 4;
 2231 #endif
 2232 
 2233         if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 2234                 Maxmem = atop(physmem_tunable);
 2235 
 2236         /*
 2237          * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 2238          * the amount of memory in the system.
 2239          */
 2240         if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 2241                 Maxmem = atop(physmap[physmap_idx + 1]);
 2242 
 2243         /*
 2244          * By default enable the memory test on real hardware, and disable
 2245          * it if we appear to be running in a VM.  This avoids touching all
 2246          * pages unnecessarily, which doesn't matter on real hardware but is
 2247          * bad for shared VM hosts.  Use a general name so that
 2248          * one could eventually do more with the code than just disable it.
 2249          */
 2250         memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
 2251         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 2252 
 2253         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 2254             (boothowto & RB_VERBOSE))
 2255                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 2256 
 2257         /*
 2258          * If Maxmem has been increased beyond what the system has detected,
 2259          * extend the last memory segment to the new limit.
 2260          */ 
 2261         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 2262                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 2263 
 2264         /* call pmap initialization to make new kernel address space */
 2265         pmap_bootstrap(first);
 2266 
 2267         /*
 2268          * Size up each available chunk of physical memory.
 2269          */
 2270         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 2271         pa_indx = 0;
 2272         da_indx = 1;
 2273         phys_avail[pa_indx++] = physmap[0];
 2274         phys_avail[pa_indx] = physmap[0];
 2275         dump_avail[da_indx] = physmap[0];
 2276         pte = CMAP3;
 2277 
 2278         /*
 2279          * Get dcons buffer address
 2280          */
 2281         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 2282             getenv_quad("dcons.size", &dcons_size) == 0)
 2283                 dcons_addr = 0;
 2284 
 2285         /*
 2286          * physmap is in bytes, so when converting to page boundaries,
 2287          * round up the start address and round down the end address.
 2288          */
 2289         for (i = 0; i <= physmap_idx; i += 2) {
 2290                 vm_paddr_t end;
 2291 
 2292                 end = ptoa((vm_paddr_t)Maxmem);
 2293                 if (physmap[i + 1] < end)
 2294                         end = trunc_page(physmap[i + 1]);
 2295                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 2296                         int tmp, page_bad, full;
 2297                         int *ptr = (int *)CADDR3;
 2298 
 2299                         full = FALSE;
 2300                         /*
 2301                          * block out kernel memory as not available.
 2302                          */
 2303                         if (pa >= KERNLOAD && pa < first)
 2304                                 goto do_dump_avail;
 2305 
 2306                         /*
 2307                          * block out dcons buffer
 2308                          */
 2309                         if (dcons_addr > 0
 2310                             && pa >= trunc_page(dcons_addr)
 2311                             && pa < dcons_addr + dcons_size)
 2312                                 goto do_dump_avail;
 2313 
 2314                         page_bad = FALSE;
 2315                         if (memtest == 0)
 2316                                 goto skip_memtest;
 2317 
 2318                         /*
 2319                          * map page into kernel: valid, read/write,non-cacheable
 2320                          */
 2321                         *pte = pa | PG_V | PG_RW | PG_N;
 2322                         invltlb();
 2323 
 2324                         tmp = *(int *)ptr;
 2325                         /*
 2326                          * Test for alternating 1's and 0's
 2327                          */
 2328                         *(volatile int *)ptr = 0xaaaaaaaa;
 2329                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 2330                                 page_bad = TRUE;
 2331                         /*
 2332                          * Test for alternating 0's and 1's
 2333                          */
 2334                         *(volatile int *)ptr = 0x55555555;
 2335                         if (*(volatile int *)ptr != 0x55555555)
 2336                                 page_bad = TRUE;
 2337                         /*
 2338                          * Test for all 1's
 2339                          */
 2340                         *(volatile int *)ptr = 0xffffffff;
 2341                         if (*(volatile int *)ptr != 0xffffffff)
 2342                                 page_bad = TRUE;
 2343                         /*
 2344                          * Test for all 0's
 2345                          */
 2346                         *(volatile int *)ptr = 0x0;
 2347                         if (*(volatile int *)ptr != 0x0)
 2348                                 page_bad = TRUE;
 2349                         /*
 2350                          * Restore original value.
 2351                          */
 2352                         *(int *)ptr = tmp;
 2353 
 2354 skip_memtest:
 2355                         /*
 2356                          * Adjust array of valid/good pages.
 2357                          */
 2358                         if (page_bad == TRUE)
 2359                                 continue;
 2360                         /*
 2361                          * If this good page is a continuation of the
 2362                          * previous set of good pages, then just increase
 2363                          * the end pointer. Otherwise start a new chunk.
 2364                          * Note that "end" points one higher than end,
 2365                          * making the range >= start and < end.
 2366                          * If we're also doing a speculative memory
 2367                          * test and we at or past the end, bump up Maxmem
 2368                          * so that we keep going. The first bad page
 2369                          * will terminate the loop.
 2370                          */
 2371                         if (phys_avail[pa_indx] == pa) {
 2372                                 phys_avail[pa_indx] += PAGE_SIZE;
 2373                         } else {
 2374                                 pa_indx++;
 2375                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 2376                                         printf(
 2377                 "Too many holes in the physical address space, giving up\n");
 2378                                         pa_indx--;
 2379                                         full = TRUE;
 2380                                         goto do_dump_avail;
 2381                                 }
 2382                                 phys_avail[pa_indx++] = pa;     /* start */
 2383                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 2384                         }
 2385                         physmem++;
 2386 do_dump_avail:
 2387                         if (dump_avail[da_indx] == pa) {
 2388                                 dump_avail[da_indx] += PAGE_SIZE;
 2389                         } else {
 2390                                 da_indx++;
 2391                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 2392                                         da_indx--;
 2393                                         goto do_next;
 2394                                 }
 2395                                 dump_avail[da_indx++] = pa;     /* start */
 2396                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 2397                         }
 2398 do_next:
 2399                         if (full)
 2400                                 break;
 2401                 }
 2402         }
 2403         *pte = 0;
 2404         invltlb();
 2405         
 2406         /*
 2407          * XXX
 2408          * The last chunk must contain at least one page plus the message
 2409          * buffer to avoid complicating other code (message buffer address
 2410          * calculation, etc.).
 2411          */
 2412         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 2413             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 2414                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 2415                 phys_avail[pa_indx--] = 0;
 2416                 phys_avail[pa_indx--] = 0;
 2417         }
 2418 
 2419         Maxmem = atop(phys_avail[pa_indx]);
 2420 
 2421         /* Trim off space for the message buffer. */
 2422         phys_avail[pa_indx] -= round_page(msgbufsize);
 2423 
 2424         /* Map the message buffer. */
 2425         for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 2426                 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 2427                     off);
 2428 }
 2429 #endif /* PC98 */
 2430 
 2431 register_t
 2432 init386(first)
 2433         int first;
 2434 {
 2435         struct gate_descriptor *gdp;
 2436         int gsel_tss, metadata_missing, x, pa;
 2437         struct pcpu *pc;
 2438 #ifdef CPU_ENABLE_SSE
 2439         struct xstate_hdr *xhdr;
 2440 #endif
 2441 
 2442         thread0.td_kstack = proc0kstack;
 2443         thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 2444 
 2445         /*
 2446          * This may be done better later if it gets more high level
 2447          * components in it. If so just link td->td_proc here.
 2448          */
 2449         proc_linkup0(&proc0, &thread0);
 2450 
 2451 #ifdef PC98
 2452         /*
 2453          * Initialize DMAC
 2454          */
 2455         pc98_init_dmac();
 2456 #endif
 2457 
 2458         metadata_missing = 0;
 2459         if (bootinfo.bi_modulep) {
 2460                 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 2461                 preload_bootstrap_relocate(KERNBASE);
 2462         } else {
 2463                 metadata_missing = 1;
 2464         }
 2465 
 2466         if (bootinfo.bi_envp != 0)
 2467                 init_static_kenv((char *)bootinfo.bi_envp + KERNBASE, 0);
 2468         else
 2469                 init_static_kenv(NULL, 0);
 2470 
 2471         /* Init basic tunables, hz etc */
 2472         init_param1();
 2473 
 2474         /*
 2475          * Make gdt memory segments.  All segments cover the full 4GB
 2476          * of address space and permissions are enforced at page level.
 2477          */
 2478         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 2479         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 2480         gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 2481         gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 2482         gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 2483         gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 2484 
 2485         pc = &__pcpu[0];
 2486         gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 2487         gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 2488         gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 2489 
 2490         for (x = 0; x < NGDT; x++)
 2491                 ssdtosd(&gdt_segs[x], &gdt[x].sd);
 2492 
 2493         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 2494         r_gdt.rd_base =  (int) gdt;
 2495         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 2496         lgdt(&r_gdt);
 2497 
 2498         pcpu_init(pc, 0, sizeof(struct pcpu));
 2499         for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 2500                 pmap_kenter(pa + KERNBASE, pa);
 2501         dpcpu_init((void *)(first + KERNBASE), 0);
 2502         first += DPCPU_SIZE;
 2503         PCPU_SET(prvspace, pc);
 2504         PCPU_SET(curthread, &thread0);
 2505 
 2506         /*
 2507          * Initialize mutexes.
 2508          *
 2509          * icu_lock: in order to allow an interrupt to occur in a critical
 2510          *           section, to set pcpu->ipending (etc...) properly, we
 2511          *           must be able to get the icu lock, so it can't be
 2512          *           under witness.
 2513          */
 2514         mutex_init();
 2515         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 2516 
 2517         /* make ldt memory segments */
 2518         ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 2519         ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 2520         for (x = 0; x < nitems(ldt_segs); x++)
 2521                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 2522 
 2523         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2524         lldt(_default_ldt);
 2525         PCPU_SET(currentldt, _default_ldt);
 2526 
 2527         /* exceptions */
 2528         for (x = 0; x < NIDT; x++)
 2529                 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 2530                     GSEL(GCODE_SEL, SEL_KPL));
 2531         setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 2532             GSEL(GCODE_SEL, SEL_KPL));
 2533         setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 2534             GSEL(GCODE_SEL, SEL_KPL));
 2535         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYS386IGT, SEL_KPL,
 2536             GSEL(GCODE_SEL, SEL_KPL));
 2537         setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 2538             GSEL(GCODE_SEL, SEL_KPL));
 2539         setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 2540             GSEL(GCODE_SEL, SEL_KPL));
 2541         setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 2542             GSEL(GCODE_SEL, SEL_KPL));
 2543         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2544             GSEL(GCODE_SEL, SEL_KPL));
 2545         setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 2546             , GSEL(GCODE_SEL, SEL_KPL));
 2547         setidt(IDT_DF, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 2548         setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 2549             GSEL(GCODE_SEL, SEL_KPL));
 2550         setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 2551             GSEL(GCODE_SEL, SEL_KPL));
 2552         setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 2553             GSEL(GCODE_SEL, SEL_KPL));
 2554         setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 2555             GSEL(GCODE_SEL, SEL_KPL));
 2556         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2557             GSEL(GCODE_SEL, SEL_KPL));
 2558         setidt(IDT_PF, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 2559             GSEL(GCODE_SEL, SEL_KPL));
 2560         setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 2561             GSEL(GCODE_SEL, SEL_KPL));
 2562         setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 2563             GSEL(GCODE_SEL, SEL_KPL));
 2564         setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 2565             GSEL(GCODE_SEL, SEL_KPL));
 2566         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 2567             GSEL(GCODE_SEL, SEL_KPL));
 2568         setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 2569             GSEL(GCODE_SEL, SEL_KPL));
 2570 #ifdef KDTRACE_HOOKS
 2571         setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
 2572             GSEL(GCODE_SEL, SEL_KPL));
 2573 #endif
 2574 #ifdef XENHVM
 2575         setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL,
 2576             GSEL(GCODE_SEL, SEL_KPL));
 2577 #endif
 2578 
 2579         r_idt.rd_limit = sizeof(idt0) - 1;
 2580         r_idt.rd_base = (int) idt;
 2581         lidt(&r_idt);
 2582 
 2583 #ifdef XBOX
 2584         /*
 2585          * The following code queries the PCI ID of 0:0:0. For the XBOX,
 2586          * This should be 0x10de / 0x02a5.
 2587          *
 2588          * This is exactly what Linux does.
 2589          */
 2590         outl(0xcf8, 0x80000000);
 2591         if (inl(0xcfc) == 0x02a510de) {
 2592                 arch_i386_is_xbox = 1;
 2593                 pic16l_setled(XBOX_LED_GREEN);
 2594 
 2595                 /*
 2596                  * We are an XBOX, but we may have either 64MB or 128MB of
 2597                  * memory. The PCI host bridge should be programmed for this,
 2598                  * so we just query it. 
 2599                  */
 2600                 outl(0xcf8, 0x80000084);
 2601                 arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64;
 2602         }
 2603 #endif /* XBOX */
 2604 
 2605         /*
 2606          * Initialize the clock before the console so that console
 2607          * initialization can use DELAY().
 2608          */
 2609         clock_init();
 2610 
 2611         finishidentcpu();       /* Final stage of CPU initialization */
 2612         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2613             GSEL(GCODE_SEL, SEL_KPL));
 2614         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2615             GSEL(GCODE_SEL, SEL_KPL));
 2616         initializecpu();        /* Initialize CPU registers */
 2617         initializecpucache();
 2618 
 2619         /* pointer to selector slot for %fs/%gs */
 2620         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2621 
 2622         dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 2623             dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 2624         dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 2625             dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 2626 #if defined(PAE) || defined(PAE_TABLES)
 2627         dblfault_tss.tss_cr3 = (int)IdlePDPT;
 2628 #else
 2629         dblfault_tss.tss_cr3 = (int)IdlePTD;
 2630 #endif
 2631         dblfault_tss.tss_eip = (int)dblfault_handler;
 2632         dblfault_tss.tss_eflags = PSL_KERNEL;
 2633         dblfault_tss.tss_ds = dblfault_tss.tss_es =
 2634             dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2635         dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2636         dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2637         dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2638 
 2639         vm86_initialize();
 2640         getmemsize(first);
 2641         init_param2(physmem);
 2642 
 2643         /* now running on new page tables, configured,and u/iom is accessible */
 2644 
 2645         /*
 2646          * Initialize the console before we print anything out.
 2647          */
 2648         cninit();
 2649 
 2650         if (metadata_missing)
 2651                 printf("WARNING: loader(8) metadata is missing!\n");
 2652 
 2653 #ifdef DEV_ISA
 2654 #ifdef DEV_ATPIC
 2655 #ifndef PC98
 2656         elcr_probe();
 2657 #endif
 2658         atpic_startup();
 2659 #else
 2660         /* Reset and mask the atpics and leave them shut down. */
 2661         atpic_reset();
 2662 
 2663         /*
 2664          * Point the ICU spurious interrupt vectors at the APIC spurious
 2665          * interrupt handler.
 2666          */
 2667         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 2668             GSEL(GCODE_SEL, SEL_KPL));
 2669         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 2670             GSEL(GCODE_SEL, SEL_KPL));
 2671 #endif
 2672 #endif
 2673 
 2674 #ifdef DDB
 2675         db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
 2676 #endif
 2677 
 2678         kdb_init();
 2679 
 2680 #ifdef KDB
 2681         if (boothowto & RB_KDB)
 2682                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 2683 #endif
 2684 
 2685         msgbufinit(msgbufp, msgbufsize);
 2686 #ifdef DEV_NPX
 2687         npxinit(true);
 2688 #endif
 2689         /*
 2690          * Set up thread0 pcb after npxinit calculated pcb + fpu save
 2691          * area size.  Zero out the extended state header in fpu save
 2692          * area.
 2693          */
 2694         thread0.td_pcb = get_pcb_td(&thread0);
 2695         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 2696 #ifdef CPU_ENABLE_SSE
 2697         if (use_xsave) {
 2698                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 2699                     1);
 2700                 xhdr->xstate_bv = xsave_mask;
 2701         }
 2702 #endif
 2703         PCPU_SET(curpcb, thread0.td_pcb);
 2704         /* make an initial tss so cpu can get interrupt stack on syscall! */
 2705         /* Note: -16 is so we can grow the trapframe if we came from vm86 */
 2706         PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
 2707         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 2708         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 2709         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2710         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2711         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 2712         ltr(gsel_tss);
 2713 
 2714         /* make a call gate to reenter kernel with */
 2715         gdp = &ldt[LSYS5CALLS_SEL].gd;
 2716 
 2717         x = (int) &IDTVEC(lcall_syscall);
 2718         gdp->gd_looffset = x;
 2719         gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 2720         gdp->gd_stkcpy = 1;
 2721         gdp->gd_type = SDT_SYS386CGT;
 2722         gdp->gd_dpl = SEL_UPL;
 2723         gdp->gd_p = 1;
 2724         gdp->gd_hioffset = x >> 16;
 2725 
 2726         /* XXX does this work? */
 2727         /* XXX yes! */
 2728         ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2729         ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2730 
 2731         /* transfer to user mode */
 2732 
 2733         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 2734         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 2735 
 2736         /* setup proc 0's pcb */
 2737         thread0.td_pcb->pcb_flags = 0;
 2738 #if defined(PAE) || defined(PAE_TABLES)
 2739         thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 2740 #else
 2741         thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 2742 #endif
 2743         thread0.td_pcb->pcb_ext = 0;
 2744         thread0.td_frame = &proc0_tf;
 2745 
 2746         cpu_probe_amdc1e();
 2747 
 2748 #ifdef FDT
 2749         x86_init_fdt();
 2750 #endif
 2751 
 2752         /* Location of kernel stack for locore */
 2753         return ((register_t)thread0.td_pcb);
 2754 }
 2755 
 2756 void
 2757 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 2758 {
 2759 
 2760         pcpu->pc_acpi_id = 0xffffffff;
 2761 }
 2762 
 2763 #ifndef PC98
 2764 static int
 2765 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 2766 {
 2767         struct bios_smap *smapbase;
 2768         struct bios_smap_xattr smap;
 2769         caddr_t kmdp;
 2770         uint32_t *smapattr;
 2771         int count, error, i;
 2772 
 2773         /* Retrieve the system memory map from the loader. */
 2774         kmdp = preload_search_by_type("elf kernel");
 2775         if (kmdp == NULL)
 2776                 kmdp = preload_search_by_type("elf32 kernel");
 2777         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 2778             MODINFO_METADATA | MODINFOMD_SMAP);
 2779         if (smapbase == NULL)
 2780                 return (0);
 2781         smapattr = (uint32_t *)preload_search_info(kmdp,
 2782             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 2783         count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 2784         error = 0;
 2785         for (i = 0; i < count; i++) {
 2786                 smap.base = smapbase[i].base;
 2787                 smap.length = smapbase[i].length;
 2788                 smap.type = smapbase[i].type;
 2789                 if (smapattr != NULL)
 2790                         smap.xattr = smapattr[i];
 2791                 else
 2792                         smap.xattr = 0;
 2793                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 2794         }
 2795         return (error);
 2796 }
 2797 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 2798     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 2799 #endif /* !PC98 */
 2800 
 2801 void
 2802 spinlock_enter(void)
 2803 {
 2804         struct thread *td;
 2805         register_t flags;
 2806 
 2807         td = curthread;
 2808         if (td->td_md.md_spinlock_count == 0) {
 2809                 flags = intr_disable();
 2810                 td->td_md.md_spinlock_count = 1;
 2811                 td->td_md.md_saved_flags = flags;
 2812         } else
 2813                 td->td_md.md_spinlock_count++;
 2814         critical_enter();
 2815 }
 2816 
 2817 void
 2818 spinlock_exit(void)
 2819 {
 2820         struct thread *td;
 2821         register_t flags;
 2822 
 2823         td = curthread;
 2824         critical_exit();
 2825         flags = td->td_md.md_saved_flags;
 2826         td->td_md.md_spinlock_count--;
 2827         if (td->td_md.md_spinlock_count == 0)
 2828                 intr_restore(flags);
 2829 }
 2830 
 2831 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2832 static void f00f_hack(void *unused);
 2833 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 2834 
 2835 static void
 2836 f00f_hack(void *unused)
 2837 {
 2838         struct gate_descriptor *new_idt;
 2839         vm_offset_t tmp;
 2840 
 2841         if (!has_f00f_bug)
 2842                 return;
 2843 
 2844         GIANT_REQUIRED;
 2845 
 2846         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2847 
 2848         tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
 2849         if (tmp == 0)
 2850                 panic("kmem_malloc returned 0");
 2851 
 2852         /* Put the problematic entry (#6) at the end of the lower page. */
 2853         new_idt = (struct gate_descriptor*)
 2854             (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 2855         bcopy(idt, new_idt, sizeof(idt0));
 2856         r_idt.rd_base = (u_int)new_idt;
 2857         lidt(&r_idt);
 2858         idt = new_idt;
 2859         pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 2860 }
 2861 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2862 
 2863 /*
 2864  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2865  * we want to start a backtrace from the function that caused us to enter
 2866  * the debugger. We have the context in the trapframe, but base the trace
 2867  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2868  * enough for a backtrace.
 2869  */
 2870 void
 2871 makectx(struct trapframe *tf, struct pcb *pcb)
 2872 {
 2873 
 2874         pcb->pcb_edi = tf->tf_edi;
 2875         pcb->pcb_esi = tf->tf_esi;
 2876         pcb->pcb_ebp = tf->tf_ebp;
 2877         pcb->pcb_ebx = tf->tf_ebx;
 2878         pcb->pcb_eip = tf->tf_eip;
 2879         pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 2880         pcb->pcb_gs = rgs();
 2881 }
 2882 
 2883 int
 2884 ptrace_set_pc(struct thread *td, u_long addr)
 2885 {
 2886 
 2887         td->td_frame->tf_eip = addr;
 2888         return (0);
 2889 }
 2890 
 2891 int
 2892 ptrace_single_step(struct thread *td)
 2893 {
 2894         td->td_frame->tf_eflags |= PSL_T;
 2895         return (0);
 2896 }
 2897 
 2898 int
 2899 ptrace_clear_single_step(struct thread *td)
 2900 {
 2901         td->td_frame->tf_eflags &= ~PSL_T;
 2902         return (0);
 2903 }
 2904 
 2905 int
 2906 fill_regs(struct thread *td, struct reg *regs)
 2907 {
 2908         struct pcb *pcb;
 2909         struct trapframe *tp;
 2910 
 2911         tp = td->td_frame;
 2912         pcb = td->td_pcb;
 2913         regs->r_gs = pcb->pcb_gs;
 2914         return (fill_frame_regs(tp, regs));
 2915 }
 2916 
 2917 int
 2918 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2919 {
 2920         regs->r_fs = tp->tf_fs;
 2921         regs->r_es = tp->tf_es;
 2922         regs->r_ds = tp->tf_ds;
 2923         regs->r_edi = tp->tf_edi;
 2924         regs->r_esi = tp->tf_esi;
 2925         regs->r_ebp = tp->tf_ebp;
 2926         regs->r_ebx = tp->tf_ebx;
 2927         regs->r_edx = tp->tf_edx;
 2928         regs->r_ecx = tp->tf_ecx;
 2929         regs->r_eax = tp->tf_eax;
 2930         regs->r_eip = tp->tf_eip;
 2931         regs->r_cs = tp->tf_cs;
 2932         regs->r_eflags = tp->tf_eflags;
 2933         regs->r_esp = tp->tf_esp;
 2934         regs->r_ss = tp->tf_ss;
 2935         return (0);
 2936 }
 2937 
 2938 int
 2939 set_regs(struct thread *td, struct reg *regs)
 2940 {
 2941         struct pcb *pcb;
 2942         struct trapframe *tp;
 2943 
 2944         tp = td->td_frame;
 2945         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2946             !CS_SECURE(regs->r_cs))
 2947                 return (EINVAL);
 2948         pcb = td->td_pcb;
 2949         tp->tf_fs = regs->r_fs;
 2950         tp->tf_es = regs->r_es;
 2951         tp->tf_ds = regs->r_ds;
 2952         tp->tf_edi = regs->r_edi;
 2953         tp->tf_esi = regs->r_esi;
 2954         tp->tf_ebp = regs->r_ebp;
 2955         tp->tf_ebx = regs->r_ebx;
 2956         tp->tf_edx = regs->r_edx;
 2957         tp->tf_ecx = regs->r_ecx;
 2958         tp->tf_eax = regs->r_eax;
 2959         tp->tf_eip = regs->r_eip;
 2960         tp->tf_cs = regs->r_cs;
 2961         tp->tf_eflags = regs->r_eflags;
 2962         tp->tf_esp = regs->r_esp;
 2963         tp->tf_ss = regs->r_ss;
 2964         pcb->pcb_gs = regs->r_gs;
 2965         return (0);
 2966 }
 2967 
 2968 int
 2969 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2970 {
 2971 
 2972         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2973             P_SHOULDSTOP(td->td_proc),
 2974             ("not suspended thread %p", td));
 2975 #ifdef DEV_NPX
 2976         npxgetregs(td);
 2977 #else
 2978         bzero(fpregs, sizeof(*fpregs));
 2979 #endif
 2980 #ifdef CPU_ENABLE_SSE
 2981         if (cpu_fxsr)
 2982                 npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 2983                     (struct save87 *)fpregs);
 2984         else
 2985 #endif /* CPU_ENABLE_SSE */
 2986                 bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 2987                     sizeof(*fpregs));
 2988         return (0);
 2989 }
 2990 
 2991 int
 2992 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2993 {
 2994 
 2995 #ifdef CPU_ENABLE_SSE
 2996         if (cpu_fxsr)
 2997                 npx_set_fpregs_xmm((struct save87 *)fpregs,
 2998                     &get_pcb_user_save_td(td)->sv_xmm);
 2999         else
 3000 #endif /* CPU_ENABLE_SSE */
 3001                 bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 3002                     sizeof(*fpregs));
 3003 #ifdef DEV_NPX
 3004         npxuserinited(td);
 3005 #endif
 3006         return (0);
 3007 }
 3008 
 3009 /*
 3010  * Get machine context.
 3011  */
 3012 int
 3013 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 3014 {
 3015         struct trapframe *tp;
 3016         struct segment_descriptor *sdp;
 3017 
 3018         tp = td->td_frame;
 3019 
 3020         PROC_LOCK(curthread->td_proc);
 3021         mcp->mc_onstack = sigonstack(tp->tf_esp);
 3022         PROC_UNLOCK(curthread->td_proc);
 3023         mcp->mc_gs = td->td_pcb->pcb_gs;
 3024         mcp->mc_fs = tp->tf_fs;
 3025         mcp->mc_es = tp->tf_es;
 3026         mcp->mc_ds = tp->tf_ds;
 3027         mcp->mc_edi = tp->tf_edi;
 3028         mcp->mc_esi = tp->tf_esi;
 3029         mcp->mc_ebp = tp->tf_ebp;
 3030         mcp->mc_isp = tp->tf_isp;
 3031         mcp->mc_eflags = tp->tf_eflags;
 3032         if (flags & GET_MC_CLEAR_RET) {
 3033                 mcp->mc_eax = 0;
 3034                 mcp->mc_edx = 0;
 3035                 mcp->mc_eflags &= ~PSL_C;
 3036         } else {
 3037                 mcp->mc_eax = tp->tf_eax;
 3038                 mcp->mc_edx = tp->tf_edx;
 3039         }
 3040         mcp->mc_ebx = tp->tf_ebx;
 3041         mcp->mc_ecx = tp->tf_ecx;
 3042         mcp->mc_eip = tp->tf_eip;
 3043         mcp->mc_cs = tp->tf_cs;
 3044         mcp->mc_esp = tp->tf_esp;
 3045         mcp->mc_ss = tp->tf_ss;
 3046         mcp->mc_len = sizeof(*mcp);
 3047         get_fpcontext(td, mcp, NULL, 0);
 3048         sdp = &td->td_pcb->pcb_fsd;
 3049         mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 3050         sdp = &td->td_pcb->pcb_gsd;
 3051         mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 3052         mcp->mc_flags = 0;
 3053         mcp->mc_xfpustate = 0;
 3054         mcp->mc_xfpustate_len = 0;
 3055         bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 3056         return (0);
 3057 }
 3058 
 3059 /*
 3060  * Set machine context.
 3061  *
 3062  * However, we don't set any but the user modifiable flags, and we won't
 3063  * touch the cs selector.
 3064  */
 3065 int
 3066 set_mcontext(struct thread *td, mcontext_t *mcp)
 3067 {
 3068         struct trapframe *tp;
 3069         char *xfpustate;
 3070         int eflags, ret;
 3071 
 3072         tp = td->td_frame;
 3073         if (mcp->mc_len != sizeof(*mcp) ||
 3074             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 3075                 return (EINVAL);
 3076         eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 3077             (tp->tf_eflags & ~PSL_USERCHANGE);
 3078         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 3079                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 3080                     sizeof(union savefpu))
 3081                         return (EINVAL);
 3082                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 3083                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 3084                     mcp->mc_xfpustate_len);
 3085                 if (ret != 0)
 3086                         return (ret);
 3087         } else
 3088                 xfpustate = NULL;
 3089         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 3090         if (ret != 0)
 3091                 return (ret);
 3092         tp->tf_fs = mcp->mc_fs;
 3093         tp->tf_es = mcp->mc_es;
 3094         tp->tf_ds = mcp->mc_ds;
 3095         tp->tf_edi = mcp->mc_edi;
 3096         tp->tf_esi = mcp->mc_esi;
 3097         tp->tf_ebp = mcp->mc_ebp;
 3098         tp->tf_ebx = mcp->mc_ebx;
 3099         tp->tf_edx = mcp->mc_edx;
 3100         tp->tf_ecx = mcp->mc_ecx;
 3101         tp->tf_eax = mcp->mc_eax;
 3102         tp->tf_eip = mcp->mc_eip;
 3103         tp->tf_eflags = eflags;
 3104         tp->tf_esp = mcp->mc_esp;
 3105         tp->tf_ss = mcp->mc_ss;
 3106         td->td_pcb->pcb_gs = mcp->mc_gs;
 3107         return (0);
 3108 }
 3109 
 3110 static void
 3111 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 3112     size_t xfpusave_len)
 3113 {
 3114 #ifdef CPU_ENABLE_SSE
 3115         size_t max_len, len;
 3116 #endif
 3117 
 3118 #ifndef DEV_NPX
 3119         mcp->mc_fpformat = _MC_FPFMT_NODEV;
 3120         mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 3121         bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 3122 #else
 3123         mcp->mc_ownedfp = npxgetregs(td);
 3124         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 3125             sizeof(mcp->mc_fpstate));
 3126         mcp->mc_fpformat = npxformat();
 3127 #ifdef CPU_ENABLE_SSE
 3128         if (!use_xsave || xfpusave_len == 0)
 3129                 return;
 3130         max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 3131         len = xfpusave_len;
 3132         if (len > max_len) {
 3133                 len = max_len;
 3134                 bzero(xfpusave + max_len, len - max_len);
 3135         }
 3136         mcp->mc_flags |= _MC_HASFPXSTATE;
 3137         mcp->mc_xfpustate_len = len;
 3138         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 3139 #endif
 3140 #endif
 3141 }
 3142 
 3143 static int
 3144 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 3145     size_t xfpustate_len)
 3146 {
 3147         union savefpu *fpstate;
 3148         int error;
 3149 
 3150         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 3151                 return (0);
 3152         else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 3153             mcp->mc_fpformat != _MC_FPFMT_XMM)
 3154                 return (EINVAL);
 3155         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 3156                 /* We don't care what state is left in the FPU or PCB. */
 3157                 fpstate_drop(td);
 3158                 error = 0;
 3159         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 3160             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 3161 #ifdef DEV_NPX
 3162                 fpstate = (union savefpu *)&mcp->mc_fpstate;
 3163 #ifdef CPU_ENABLE_SSE
 3164                 if (cpu_fxsr)
 3165                         fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
 3166 #endif
 3167                 error = npxsetregs(td, fpstate, xfpustate, xfpustate_len);
 3168 #else
 3169                 error = EINVAL;
 3170 #endif
 3171         } else
 3172                 return (EINVAL);
 3173         return (error);
 3174 }
 3175 
 3176 static void
 3177 fpstate_drop(struct thread *td)
 3178 {
 3179 
 3180         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 3181         critical_enter();
 3182 #ifdef DEV_NPX
 3183         if (PCPU_GET(fpcurthread) == td)
 3184                 npxdrop();
 3185 #endif
 3186         /*
 3187          * XXX force a full drop of the npx.  The above only drops it if we
 3188          * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 3189          *
 3190          * XXX I don't much like npxgetregs()'s semantics of doing a full
 3191          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 3192          * We only need to drop to !PCB_INITDONE in sendsig().  But
 3193          * sendsig() is the only caller of npxgetregs()... perhaps we just
 3194          * have too many layers.
 3195          */
 3196         curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 3197             PCB_NPXUSERINITDONE);
 3198         critical_exit();
 3199 }
 3200 
 3201 int
 3202 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 3203 {
 3204         struct pcb *pcb;
 3205 
 3206         if (td == NULL) {
 3207                 dbregs->dr[0] = rdr0();
 3208                 dbregs->dr[1] = rdr1();
 3209                 dbregs->dr[2] = rdr2();
 3210                 dbregs->dr[3] = rdr3();
 3211                 dbregs->dr[4] = rdr4();
 3212                 dbregs->dr[5] = rdr5();
 3213                 dbregs->dr[6] = rdr6();
 3214                 dbregs->dr[7] = rdr7();
 3215         } else {
 3216                 pcb = td->td_pcb;
 3217                 dbregs->dr[0] = pcb->pcb_dr0;
 3218                 dbregs->dr[1] = pcb->pcb_dr1;
 3219                 dbregs->dr[2] = pcb->pcb_dr2;
 3220                 dbregs->dr[3] = pcb->pcb_dr3;
 3221                 dbregs->dr[4] = 0;
 3222                 dbregs->dr[5] = 0;
 3223                 dbregs->dr[6] = pcb->pcb_dr6;
 3224                 dbregs->dr[7] = pcb->pcb_dr7;
 3225         }
 3226         return (0);
 3227 }
 3228 
 3229 int
 3230 set_dbregs(struct thread *td, struct dbreg *dbregs)
 3231 {
 3232         struct pcb *pcb;
 3233         int i;
 3234 
 3235         if (td == NULL) {
 3236                 load_dr0(dbregs->dr[0]);
 3237                 load_dr1(dbregs->dr[1]);
 3238                 load_dr2(dbregs->dr[2]);
 3239                 load_dr3(dbregs->dr[3]);
 3240                 load_dr4(dbregs->dr[4]);
 3241                 load_dr5(dbregs->dr[5]);
 3242                 load_dr6(dbregs->dr[6]);
 3243                 load_dr7(dbregs->dr[7]);
 3244         } else {
 3245                 /*
 3246                  * Don't let an illegal value for dr7 get set.  Specifically,
 3247                  * check for undefined settings.  Setting these bit patterns
 3248                  * result in undefined behaviour and can lead to an unexpected
 3249                  * TRCTRAP.
 3250                  */
 3251                 for (i = 0; i < 4; i++) {
 3252                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 3253                                 return (EINVAL);
 3254                         if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 3255                                 return (EINVAL);
 3256                 }
 3257                 
 3258                 pcb = td->td_pcb;
 3259                 
 3260                 /*
 3261                  * Don't let a process set a breakpoint that is not within the
 3262                  * process's address space.  If a process could do this, it
 3263                  * could halt the system by setting a breakpoint in the kernel
 3264                  * (if ddb was enabled).  Thus, we need to check to make sure
 3265                  * that no breakpoints are being enabled for addresses outside
 3266                  * process's address space.
 3267                  *
 3268                  * XXX - what about when the watched area of the user's
 3269                  * address space is written into from within the kernel
 3270                  * ... wouldn't that still cause a breakpoint to be generated
 3271                  * from within kernel mode?
 3272                  */
 3273 
 3274                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 3275                         /* dr0 is enabled */
 3276                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 3277                                 return (EINVAL);
 3278                 }
 3279                         
 3280                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 3281                         /* dr1 is enabled */
 3282                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 3283                                 return (EINVAL);
 3284                 }
 3285                         
 3286                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 3287                         /* dr2 is enabled */
 3288                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 3289                                 return (EINVAL);
 3290                 }
 3291                         
 3292                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 3293                         /* dr3 is enabled */
 3294                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 3295                                 return (EINVAL);
 3296                 }
 3297 
 3298                 pcb->pcb_dr0 = dbregs->dr[0];
 3299                 pcb->pcb_dr1 = dbregs->dr[1];
 3300                 pcb->pcb_dr2 = dbregs->dr[2];
 3301                 pcb->pcb_dr3 = dbregs->dr[3];
 3302                 pcb->pcb_dr6 = dbregs->dr[6];
 3303                 pcb->pcb_dr7 = dbregs->dr[7];
 3304 
 3305                 pcb->pcb_flags |= PCB_DBREGS;
 3306         }
 3307 
 3308         return (0);
 3309 }
 3310 
 3311 /*
 3312  * Return > 0 if a hardware breakpoint has been hit, and the
 3313  * breakpoint was in user space.  Return 0, otherwise.
 3314  */
 3315 int
 3316 user_dbreg_trap(void)
 3317 {
 3318         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
 3319         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 3320         int nbp;            /* number of breakpoints that triggered */
 3321         caddr_t addr[4];    /* breakpoint addresses */
 3322         int i;
 3323         
 3324         dr7 = rdr7();
 3325         if ((dr7 & 0x000000ff) == 0) {
 3326                 /*
 3327                  * all GE and LE bits in the dr7 register are zero,
 3328                  * thus the trap couldn't have been caused by the
 3329                  * hardware debug registers
 3330                  */
 3331                 return 0;
 3332         }
 3333 
 3334         nbp = 0;
 3335         dr6 = rdr6();
 3336         bp = dr6 & 0x0000000f;
 3337 
 3338         if (!bp) {
 3339                 /*
 3340                  * None of the breakpoint bits are set meaning this
 3341                  * trap was not caused by any of the debug registers
 3342                  */
 3343                 return 0;
 3344         }
 3345 
 3346         /*
 3347          * at least one of the breakpoints were hit, check to see
 3348          * which ones and if any of them are user space addresses
 3349          */
 3350 
 3351         if (bp & 0x01) {
 3352                 addr[nbp++] = (caddr_t)rdr0();
 3353         }
 3354         if (bp & 0x02) {
 3355                 addr[nbp++] = (caddr_t)rdr1();
 3356         }
 3357         if (bp & 0x04) {
 3358                 addr[nbp++] = (caddr_t)rdr2();
 3359         }
 3360         if (bp & 0x08) {
 3361                 addr[nbp++] = (caddr_t)rdr3();
 3362         }
 3363 
 3364         for (i = 0; i < nbp; i++) {
 3365                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 3366                         /*
 3367                          * addr[i] is in user space
 3368                          */
 3369                         return nbp;
 3370                 }
 3371         }
 3372 
 3373         /*
 3374          * None of the breakpoints are in user space.
 3375          */
 3376         return 0;
 3377 }
 3378 
 3379 #ifdef KDB
 3380 
 3381 /*
 3382  * Provide inb() and outb() as functions.  They are normally only available as
 3383  * inline functions, thus cannot be called from the debugger.
 3384  */
 3385 
 3386 /* silence compiler warnings */
 3387 u_char inb_(u_short);
 3388 void outb_(u_short, u_char);
 3389 
 3390 u_char
 3391 inb_(u_short port)
 3392 {
 3393         return inb(port);
 3394 }
 3395 
 3396 void
 3397 outb_(u_short port, u_char data)
 3398 {
 3399         outb(port, data);
 3400 }
 3401 
 3402 #endif /* KDB */

Cache object: abb2cc117225185fd6eca872d6f978c2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.