The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 2018 The FreeBSD Foundation
    5  * Copyright (c) 1992 Terrence R. Lambert.
    6  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * William Jolitz.
   11  *
   12  * Portions of this software were developed by A. Joseph Koshy under
   13  * sponsorship from the FreeBSD Foundation and Google, Inc.
   14  *
   15  * Redistribution and use in source and binary forms, with or without
   16  * modification, are permitted provided that the following conditions
   17  * are met:
   18  * 1. Redistributions of source code must retain the above copyright
   19  *    notice, this list of conditions and the following disclaimer.
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  * 3. All advertising materials mentioning features or use of this software
   24  *    must display the following acknowledgement:
   25  *      This product includes software developed by the University of
   26  *      California, Berkeley and its contributors.
   27  * 4. Neither the name of the University nor the names of its contributors
   28  *    may be used to endorse or promote products derived from this software
   29  *    without specific prior written permission.
   30  *
   31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   41  * SUCH DAMAGE.
   42  *
   43  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   44  */
   45 
   46 #include <sys/cdefs.h>
   47 __FBSDID("$FreeBSD$");
   48 
   49 #include "opt_apic.h"
   50 #include "opt_atpic.h"
   51 #include "opt_cpu.h"
   52 #include "opt_ddb.h"
   53 #include "opt_inet.h"
   54 #include "opt_isa.h"
   55 #include "opt_kstack_pages.h"
   56 #include "opt_maxmem.h"
   57 #include "opt_mp_watchdog.h"
   58 #include "opt_perfmon.h"
   59 #include "opt_platform.h"
   60 
   61 #include <sys/param.h>
   62 #include <sys/proc.h>
   63 #include <sys/systm.h>
   64 #include <sys/bio.h>
   65 #include <sys/buf.h>
   66 #include <sys/bus.h>
   67 #include <sys/callout.h>
   68 #include <sys/cons.h>
   69 #include <sys/cpu.h>
   70 #include <sys/eventhandler.h>
   71 #include <sys/exec.h>
   72 #include <sys/imgact.h>
   73 #include <sys/kdb.h>
   74 #include <sys/kernel.h>
   75 #include <sys/ktr.h>
   76 #include <sys/linker.h>
   77 #include <sys/lock.h>
   78 #include <sys/malloc.h>
   79 #include <sys/memrange.h>
   80 #include <sys/msgbuf.h>
   81 #include <sys/mutex.h>
   82 #include <sys/pcpu.h>
   83 #include <sys/ptrace.h>
   84 #include <sys/reboot.h>
   85 #include <sys/rwlock.h>
   86 #include <sys/sched.h>
   87 #include <sys/signalvar.h>
   88 #include <sys/smp.h>
   89 #include <sys/syscallsubr.h>
   90 #include <sys/sysctl.h>
   91 #include <sys/sysent.h>
   92 #include <sys/sysproto.h>
   93 #include <sys/ucontext.h>
   94 #include <sys/vmmeter.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_extern.h>
   98 #include <vm/vm_kern.h>
   99 #include <vm/vm_page.h>
  100 #include <vm/vm_map.h>
  101 #include <vm/vm_object.h>
  102 #include <vm/vm_pager.h>
  103 #include <vm/vm_param.h>
  104 #include <vm/vm_phys.h>
  105 
  106 #ifdef DDB
  107 #ifndef KDB
  108 #error KDB must be enabled in order for DDB to work!
  109 #endif
  110 #include <ddb/ddb.h>
  111 #include <ddb/db_sym.h>
  112 #endif
  113 
  114 #include <isa/rtc.h>
  115 
  116 #include <net/netisr.h>
  117 
  118 #include <machine/bootinfo.h>
  119 #include <machine/clock.h>
  120 #include <machine/cpu.h>
  121 #include <machine/cputypes.h>
  122 #include <machine/intr_machdep.h>
  123 #include <x86/mca.h>
  124 #include <machine/md_var.h>
  125 #include <machine/metadata.h>
  126 #include <machine/mp_watchdog.h>
  127 #include <machine/pc/bios.h>
  128 #include <machine/pcb.h>
  129 #include <machine/pcb_ext.h>
  130 #include <machine/proc.h>
  131 #include <machine/reg.h>
  132 #include <machine/sigframe.h>
  133 #include <machine/specialreg.h>
  134 #include <machine/sysarch.h>
  135 #include <machine/trap.h>
  136 #include <x86/ucode.h>
  137 #include <machine/vm86.h>
  138 #include <x86/init.h>
  139 #ifdef PERFMON
  140 #include <machine/perfmon.h>
  141 #endif
  142 #ifdef SMP
  143 #include <machine/smp.h>
  144 #endif
  145 #ifdef FDT
  146 #include <x86/fdt.h>
  147 #endif
  148 
  149 #ifdef DEV_APIC
  150 #include <x86/apicvar.h>
  151 #endif
  152 
  153 #ifdef DEV_ISA
  154 #include <x86/isa/icu.h>
  155 #endif
  156 
  157 /* Sanity check for __curthread() */
  158 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  159 
  160 register_t init386(int first);
  161 void dblfault_handler(void);
  162 void identify_cpu(void);
  163 
  164 static void cpu_startup(void *);
  165 static void fpstate_drop(struct thread *td);
  166 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  167     char *xfpusave, size_t xfpusave_len);
  168 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  169     char *xfpustate, size_t xfpustate_len);
  170 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  171 
  172 /* Intel ICH registers */
  173 #define ICH_PMBASE      0x400
  174 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  175 
  176 int     _udatasel, _ucodesel;
  177 u_int   basemem;
  178 
  179 int cold = 1;
  180 
  181 #ifdef COMPAT_43
  182 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  183 #endif
  184 #ifdef COMPAT_FREEBSD4
  185 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  186 #endif
  187 
  188 long Maxmem = 0;
  189 long realmem = 0;
  190 
  191 #ifdef PAE
  192 FEATURE(pae, "Physical Address Extensions");
  193 #endif
  194 
  195 /*
  196  * The number of PHYSMAP entries must be one less than the number of
  197  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  198  * physical address that is accessible by ISA DMA is split into two
  199  * PHYSSEG entries.
  200  */
  201 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  202 
  203 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  204 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  205 
  206 /* must be 2 less so 0 0 can signal end of chunks */
  207 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
  208 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
  209 
  210 struct kva_md_info kmi;
  211 
  212 static struct trapframe proc0_tf;
  213 struct pcpu __pcpu[MAXCPU];
  214 
  215 struct mtx icu_lock;
  216 
  217 struct mem_range_softc mem_range_softc;
  218 
  219 extern char start_exceptions[], end_exceptions[];
  220 
  221 extern struct sysentvec elf32_freebsd_sysvec;
  222 
  223 /* Default init_ops implementation. */
  224 struct init_ops init_ops = {
  225         .early_clock_source_init =      i8254_init,
  226         .early_delay =                  i8254_delay,
  227 #ifdef DEV_APIC
  228         .msi_init =                     msi_init,
  229 #endif
  230 };
  231 
  232 static void
  233 cpu_startup(dummy)
  234         void *dummy;
  235 {
  236         uintmax_t memsize;
  237         char *sysenv;
  238 
  239         /*
  240          * On MacBooks, we need to disallow the legacy USB circuit to
  241          * generate an SMI# because this can cause several problems,
  242          * namely: incorrect CPU frequency detection and failure to
  243          * start the APs.
  244          * We do this by disabling a bit in the SMI_EN (SMI Control and
  245          * Enable register) of the Intel ICH LPC Interface Bridge.
  246          */
  247         sysenv = kern_getenv("smbios.system.product");
  248         if (sysenv != NULL) {
  249                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  250                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  251                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  252                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  253                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  254                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  255                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  256                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  257                         if (bootverbose)
  258                                 printf("Disabling LEGACY_USB_EN bit on "
  259                                     "Intel ICH.\n");
  260                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  261                 }
  262                 freeenv(sysenv);
  263         }
  264 
  265         /*
  266          * Good {morning,afternoon,evening,night}.
  267          */
  268         startrtclock();
  269         printcpuinfo();
  270         panicifcpuunsupported();
  271 #ifdef PERFMON
  272         perfmon_init();
  273 #endif
  274 
  275         /*
  276          * Display physical memory if SMBIOS reports reasonable amount.
  277          */
  278         memsize = 0;
  279         sysenv = kern_getenv("smbios.memory.enabled");
  280         if (sysenv != NULL) {
  281                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  282                 freeenv(sysenv);
  283         }
  284         if (memsize < ptoa((uintmax_t)vm_free_count()))
  285                 memsize = ptoa((uintmax_t)Maxmem);
  286         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  287         realmem = atop(memsize);
  288 
  289         /*
  290          * Display any holes after the first chunk of extended memory.
  291          */
  292         if (bootverbose) {
  293                 int indx;
  294 
  295                 printf("Physical memory chunk(s):\n");
  296                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  297                         vm_paddr_t size;
  298 
  299                         size = phys_avail[indx + 1] - phys_avail[indx];
  300                         printf(
  301                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  302                             (uintmax_t)phys_avail[indx],
  303                             (uintmax_t)phys_avail[indx + 1] - 1,
  304                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  305                 }
  306         }
  307 
  308         vm_ksubmap_init(&kmi);
  309 
  310         printf("avail memory = %ju (%ju MB)\n",
  311             ptoa((uintmax_t)vm_free_count()),
  312             ptoa((uintmax_t)vm_free_count()) / 1048576);
  313 
  314         /*
  315          * Set up buffers, so they can be used to read disk labels.
  316          */
  317         bufinit();
  318         vm_pager_bufferinit();
  319         cpu_setregs();
  320 }
  321 
  322 /*
  323  * Send an interrupt to process.
  324  *
  325  * Stack is set up to allow sigcode stored
  326  * at top to call routine, followed by call
  327  * to sigreturn routine below.  After sigreturn
  328  * resets the signal mask, the stack, and the
  329  * frame pointer, it returns to the user
  330  * specified pc, psl.
  331  */
  332 #ifdef COMPAT_43
  333 static void
  334 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  335 {
  336         struct osigframe sf, *fp;
  337         struct proc *p;
  338         struct thread *td;
  339         struct sigacts *psp;
  340         struct trapframe *regs;
  341         int sig;
  342         int oonstack;
  343 
  344         td = curthread;
  345         p = td->td_proc;
  346         PROC_LOCK_ASSERT(p, MA_OWNED);
  347         sig = ksi->ksi_signo;
  348         psp = p->p_sigacts;
  349         mtx_assert(&psp->ps_mtx, MA_OWNED);
  350         regs = td->td_frame;
  351         oonstack = sigonstack(regs->tf_esp);
  352 
  353         /* Allocate space for the signal handler context. */
  354         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  355             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  356                 fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
  357                     td->td_sigstk.ss_size - sizeof(struct osigframe));
  358 #if defined(COMPAT_43)
  359                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  360 #endif
  361         } else
  362                 fp = (struct osigframe *)regs->tf_esp - 1;
  363 
  364         /* Build the argument list for the signal handler. */
  365         sf.sf_signum = sig;
  366         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  367         bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
  368         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  369                 /* Signal handler installed with SA_SIGINFO. */
  370                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  371                 sf.sf_siginfo.si_signo = sig;
  372                 sf.sf_siginfo.si_code = ksi->ksi_code;
  373                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  374                 sf.sf_addr = 0;
  375         } else {
  376                 /* Old FreeBSD-style arguments. */
  377                 sf.sf_arg2 = ksi->ksi_code;
  378                 sf.sf_addr = (register_t)ksi->ksi_addr;
  379                 sf.sf_ahu.sf_handler = catcher;
  380         }
  381         mtx_unlock(&psp->ps_mtx);
  382         PROC_UNLOCK(p);
  383 
  384         /* Save most if not all of trap frame. */
  385         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  386         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  387         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  388         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  389         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  390         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  391         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  392         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  393         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  394         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  395         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  396         sf.sf_siginfo.si_sc.sc_gs = rgs();
  397         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  398 
  399         /* Build the signal context to be used by osigreturn(). */
  400         sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
  401         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  402         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  403         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  404         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  405         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  406         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  407         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  408 
  409         /*
  410          * If we're a vm86 process, we want to save the segment registers.
  411          * We also change eflags to be our emulated eflags, not the actual
  412          * eflags.
  413          */
  414         if (regs->tf_eflags & PSL_VM) {
  415                 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
  416                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  417                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  418 
  419                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  420                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  421                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  422                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  423 
  424                 if (vm86->vm86_has_vme == 0)
  425                         sf.sf_siginfo.si_sc.sc_ps =
  426                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  427                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  428 
  429                 /* See sendsig() for comments. */
  430                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  431         }
  432 
  433         /*
  434          * Copy the sigframe out to the user's stack.
  435          */
  436         if (copyout(&sf, fp, sizeof(*fp)) != 0) {
  437                 PROC_LOCK(p);
  438                 sigexit(td, SIGILL);
  439         }
  440 
  441         regs->tf_esp = (int)fp;
  442         if (p->p_sysent->sv_sigcode_base != 0) {
  443                 regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  444                     szosigcode;
  445         } else {
  446                 /* a.out sysentvec does not use shared page */
  447                 regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
  448         }
  449         regs->tf_eflags &= ~(PSL_T | PSL_D);
  450         regs->tf_cs = _ucodesel;
  451         regs->tf_ds = _udatasel;
  452         regs->tf_es = _udatasel;
  453         regs->tf_fs = _udatasel;
  454         load_gs(_udatasel);
  455         regs->tf_ss = _udatasel;
  456         PROC_LOCK(p);
  457         mtx_lock(&psp->ps_mtx);
  458 }
  459 #endif /* COMPAT_43 */
  460 
  461 #ifdef COMPAT_FREEBSD4
  462 static void
  463 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  464 {
  465         struct sigframe4 sf, *sfp;
  466         struct proc *p;
  467         struct thread *td;
  468         struct sigacts *psp;
  469         struct trapframe *regs;
  470         int sig;
  471         int oonstack;
  472 
  473         td = curthread;
  474         p = td->td_proc;
  475         PROC_LOCK_ASSERT(p, MA_OWNED);
  476         sig = ksi->ksi_signo;
  477         psp = p->p_sigacts;
  478         mtx_assert(&psp->ps_mtx, MA_OWNED);
  479         regs = td->td_frame;
  480         oonstack = sigonstack(regs->tf_esp);
  481 
  482         /* Save user context. */
  483         bzero(&sf, sizeof(sf));
  484         sf.sf_uc.uc_sigmask = *mask;
  485         sf.sf_uc.uc_stack = td->td_sigstk;
  486         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  487             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  488         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  489         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  490         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  491         bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
  492             sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
  493         bzero(sf.sf_uc.uc_mcontext.__spare__,
  494             sizeof(sf.sf_uc.uc_mcontext.__spare__));
  495         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  496 
  497         /* Allocate space for the signal handler context. */
  498         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  499             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  500                 sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp +
  501                     td->td_sigstk.ss_size - sizeof(struct sigframe4));
  502 #if defined(COMPAT_43)
  503                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  504 #endif
  505         } else
  506                 sfp = (struct sigframe4 *)regs->tf_esp - 1;
  507 
  508         /* Build the argument list for the signal handler. */
  509         sf.sf_signum = sig;
  510         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  511         bzero(&sf.sf_si, sizeof(sf.sf_si));
  512         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  513                 /* Signal handler installed with SA_SIGINFO. */
  514                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  515                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  516 
  517                 /* Fill in POSIX parts */
  518                 sf.sf_si.si_signo = sig;
  519                 sf.sf_si.si_code = ksi->ksi_code;
  520                 sf.sf_si.si_addr = ksi->ksi_addr;
  521         } else {
  522                 /* Old FreeBSD-style arguments. */
  523                 sf.sf_siginfo = ksi->ksi_code;
  524                 sf.sf_addr = (register_t)ksi->ksi_addr;
  525                 sf.sf_ahu.sf_handler = catcher;
  526         }
  527         mtx_unlock(&psp->ps_mtx);
  528         PROC_UNLOCK(p);
  529 
  530         /*
  531          * If we're a vm86 process, we want to save the segment registers.
  532          * We also change eflags to be our emulated eflags, not the actual
  533          * eflags.
  534          */
  535         if (regs->tf_eflags & PSL_VM) {
  536                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  537                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  538 
  539                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  540                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  541                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  542                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  543 
  544                 if (vm86->vm86_has_vme == 0)
  545                         sf.sf_uc.uc_mcontext.mc_eflags =
  546                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  547                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  548 
  549                 /*
  550                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  551                  * syscalls made by the signal handler.  This just avoids
  552                  * wasting time for our lazy fixup of such faults.  PSL_NT
  553                  * does nothing in vm86 mode, but vm86 programs can set it
  554                  * almost legitimately in probes for old cpu types.
  555                  */
  556                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  557         }
  558 
  559         /*
  560          * Copy the sigframe out to the user's stack.
  561          */
  562         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  563                 PROC_LOCK(p);
  564                 sigexit(td, SIGILL);
  565         }
  566 
  567         regs->tf_esp = (int)sfp;
  568         regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  569             szfreebsd4_sigcode;
  570         regs->tf_eflags &= ~(PSL_T | PSL_D);
  571         regs->tf_cs = _ucodesel;
  572         regs->tf_ds = _udatasel;
  573         regs->tf_es = _udatasel;
  574         regs->tf_fs = _udatasel;
  575         regs->tf_ss = _udatasel;
  576         PROC_LOCK(p);
  577         mtx_lock(&psp->ps_mtx);
  578 }
  579 #endif  /* COMPAT_FREEBSD4 */
  580 
  581 void
  582 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  583 {
  584         struct sigframe sf, *sfp;
  585         struct proc *p;
  586         struct thread *td;
  587         struct sigacts *psp;
  588         char *sp;
  589         struct trapframe *regs;
  590         struct segment_descriptor *sdp;
  591         char *xfpusave;
  592         size_t xfpusave_len;
  593         int sig;
  594         int oonstack;
  595 
  596         td = curthread;
  597         p = td->td_proc;
  598         PROC_LOCK_ASSERT(p, MA_OWNED);
  599         sig = ksi->ksi_signo;
  600         psp = p->p_sigacts;
  601         mtx_assert(&psp->ps_mtx, MA_OWNED);
  602 #ifdef COMPAT_FREEBSD4
  603         if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
  604                 freebsd4_sendsig(catcher, ksi, mask);
  605                 return;
  606         }
  607 #endif
  608 #ifdef COMPAT_43
  609         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  610                 osendsig(catcher, ksi, mask);
  611                 return;
  612         }
  613 #endif
  614         regs = td->td_frame;
  615         oonstack = sigonstack(regs->tf_esp);
  616 
  617         if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
  618                 xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
  619                 xfpusave = __builtin_alloca(xfpusave_len);
  620         } else {
  621                 xfpusave_len = 0;
  622                 xfpusave = NULL;
  623         }
  624 
  625         /* Save user context. */
  626         bzero(&sf, sizeof(sf));
  627         sf.sf_uc.uc_sigmask = *mask;
  628         sf.sf_uc.uc_stack = td->td_sigstk;
  629         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  630             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  631         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  632         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  633         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  634         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  635         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  636         fpstate_drop(td);
  637         /*
  638          * Unconditionally fill the fsbase and gsbase into the mcontext.
  639          */
  640         sdp = &td->td_pcb->pcb_fsd;
  641         sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
  642             sdp->sd_lobase;
  643         sdp = &td->td_pcb->pcb_gsd;
  644         sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
  645             sdp->sd_lobase;
  646         bzero(sf.sf_uc.uc_mcontext.mc_spare2,
  647             sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
  648         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  649 
  650         /* Allocate space for the signal handler context. */
  651         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  652             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  653                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  654 #if defined(COMPAT_43)
  655                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  656 #endif
  657         } else
  658                 sp = (char *)regs->tf_esp - 128;
  659         if (xfpusave != NULL) {
  660                 sp -= xfpusave_len;
  661                 sp = (char *)((unsigned int)sp & ~0x3F);
  662                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  663         }
  664         sp -= sizeof(struct sigframe);
  665 
  666         /* Align to 16 bytes. */
  667         sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
  668 
  669         /* Build the argument list for the signal handler. */
  670         sf.sf_signum = sig;
  671         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  672         bzero(&sf.sf_si, sizeof(sf.sf_si));
  673         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  674                 /* Signal handler installed with SA_SIGINFO. */
  675                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  676                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  677 
  678                 /* Fill in POSIX parts */
  679                 sf.sf_si = ksi->ksi_info;
  680                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  681         } else {
  682                 /* Old FreeBSD-style arguments. */
  683                 sf.sf_siginfo = ksi->ksi_code;
  684                 sf.sf_addr = (register_t)ksi->ksi_addr;
  685                 sf.sf_ahu.sf_handler = catcher;
  686         }
  687         mtx_unlock(&psp->ps_mtx);
  688         PROC_UNLOCK(p);
  689 
  690         /*
  691          * If we're a vm86 process, we want to save the segment registers.
  692          * We also change eflags to be our emulated eflags, not the actual
  693          * eflags.
  694          */
  695         if (regs->tf_eflags & PSL_VM) {
  696                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  697                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  698 
  699                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  700                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  701                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  702                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  703 
  704                 if (vm86->vm86_has_vme == 0)
  705                         sf.sf_uc.uc_mcontext.mc_eflags =
  706                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  707                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  708 
  709                 /*
  710                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  711                  * syscalls made by the signal handler.  This just avoids
  712                  * wasting time for our lazy fixup of such faults.  PSL_NT
  713                  * does nothing in vm86 mode, but vm86 programs can set it
  714                  * almost legitimately in probes for old cpu types.
  715                  */
  716                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  717         }
  718 
  719         /*
  720          * Copy the sigframe out to the user's stack.
  721          */
  722         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  723             (xfpusave != NULL && copyout(xfpusave,
  724             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  725             != 0)) {
  726                 PROC_LOCK(p);
  727                 sigexit(td, SIGILL);
  728         }
  729 
  730         regs->tf_esp = (int)sfp;
  731         regs->tf_eip = p->p_sysent->sv_sigcode_base;
  732         if (regs->tf_eip == 0)
  733                 regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
  734         regs->tf_eflags &= ~(PSL_T | PSL_D);
  735         regs->tf_cs = _ucodesel;
  736         regs->tf_ds = _udatasel;
  737         regs->tf_es = _udatasel;
  738         regs->tf_fs = _udatasel;
  739         regs->tf_ss = _udatasel;
  740         PROC_LOCK(p);
  741         mtx_lock(&psp->ps_mtx);
  742 }
  743 
  744 /*
  745  * System call to cleanup state after a signal
  746  * has been taken.  Reset signal mask and
  747  * stack state from context left by sendsig (above).
  748  * Return to previous pc and psl as specified by
  749  * context left by sendsig. Check carefully to
  750  * make sure that the user has not modified the
  751  * state to gain improper privileges.
  752  *
  753  * MPSAFE
  754  */
  755 #ifdef COMPAT_43
  756 int
  757 osigreturn(td, uap)
  758         struct thread *td;
  759         struct osigreturn_args /* {
  760                 struct osigcontext *sigcntxp;
  761         } */ *uap;
  762 {
  763         struct osigcontext sc;
  764         struct trapframe *regs;
  765         struct osigcontext *scp;
  766         int eflags, error;
  767         ksiginfo_t ksi;
  768 
  769         regs = td->td_frame;
  770         error = copyin(uap->sigcntxp, &sc, sizeof(sc));
  771         if (error != 0)
  772                 return (error);
  773         scp = &sc;
  774         eflags = scp->sc_ps;
  775         if (eflags & PSL_VM) {
  776                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  777                 struct vm86_kernel *vm86;
  778 
  779                 /*
  780                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  781                  * set up the vm86 area, and we can't enter vm86 mode.
  782                  */
  783                 if (td->td_pcb->pcb_ext == 0)
  784                         return (EINVAL);
  785                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  786                 if (vm86->vm86_inited == 0)
  787                         return (EINVAL);
  788 
  789                 /* Go back to user mode if both flags are set. */
  790                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  791                         ksiginfo_init_trap(&ksi);
  792                         ksi.ksi_signo = SIGBUS;
  793                         ksi.ksi_code = BUS_OBJERR;
  794                         ksi.ksi_addr = (void *)regs->tf_eip;
  795                         trapsignal(td, &ksi);
  796                 }
  797 
  798                 if (vm86->vm86_has_vme) {
  799                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  800                             (eflags & VME_USERCHANGE) | PSL_VM;
  801                 } else {
  802                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  803                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  804                             (eflags & VM_USERCHANGE) | PSL_VM;
  805                 }
  806                 tf->tf_vm86_ds = scp->sc_ds;
  807                 tf->tf_vm86_es = scp->sc_es;
  808                 tf->tf_vm86_fs = scp->sc_fs;
  809                 tf->tf_vm86_gs = scp->sc_gs;
  810                 tf->tf_ds = _udatasel;
  811                 tf->tf_es = _udatasel;
  812                 tf->tf_fs = _udatasel;
  813         } else {
  814                 /*
  815                  * Don't allow users to change privileged or reserved flags.
  816                  */
  817                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  818                         return (EINVAL);
  819                 }
  820 
  821                 /*
  822                  * Don't allow users to load a valid privileged %cs.  Let the
  823                  * hardware check for invalid selectors, excess privilege in
  824                  * other selectors, invalid %eip's and invalid %esp's.
  825                  */
  826                 if (!CS_SECURE(scp->sc_cs)) {
  827                         ksiginfo_init_trap(&ksi);
  828                         ksi.ksi_signo = SIGBUS;
  829                         ksi.ksi_code = BUS_OBJERR;
  830                         ksi.ksi_trapno = T_PROTFLT;
  831                         ksi.ksi_addr = (void *)regs->tf_eip;
  832                         trapsignal(td, &ksi);
  833                         return (EINVAL);
  834                 }
  835                 regs->tf_ds = scp->sc_ds;
  836                 regs->tf_es = scp->sc_es;
  837                 regs->tf_fs = scp->sc_fs;
  838         }
  839 
  840         /* Restore remaining registers. */
  841         regs->tf_eax = scp->sc_eax;
  842         regs->tf_ebx = scp->sc_ebx;
  843         regs->tf_ecx = scp->sc_ecx;
  844         regs->tf_edx = scp->sc_edx;
  845         regs->tf_esi = scp->sc_esi;
  846         regs->tf_edi = scp->sc_edi;
  847         regs->tf_cs = scp->sc_cs;
  848         regs->tf_ss = scp->sc_ss;
  849         regs->tf_isp = scp->sc_isp;
  850         regs->tf_ebp = scp->sc_fp;
  851         regs->tf_esp = scp->sc_sp;
  852         regs->tf_eip = scp->sc_pc;
  853         regs->tf_eflags = eflags;
  854 
  855 #if defined(COMPAT_43)
  856         if (scp->sc_onstack & 1)
  857                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  858         else
  859                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  860 #endif
  861         kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
  862             SIGPROCMASK_OLD);
  863         return (EJUSTRETURN);
  864 }
  865 #endif /* COMPAT_43 */
  866 
  867 #ifdef COMPAT_FREEBSD4
  868 /*
  869  * MPSAFE
  870  */
  871 int
  872 freebsd4_sigreturn(td, uap)
  873         struct thread *td;
  874         struct freebsd4_sigreturn_args /* {
  875                 const ucontext4 *sigcntxp;
  876         } */ *uap;
  877 {
  878         struct ucontext4 uc;
  879         struct trapframe *regs;
  880         struct ucontext4 *ucp;
  881         int cs, eflags, error;
  882         ksiginfo_t ksi;
  883 
  884         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  885         if (error != 0)
  886                 return (error);
  887         ucp = &uc;
  888         regs = td->td_frame;
  889         eflags = ucp->uc_mcontext.mc_eflags;
  890         if (eflags & PSL_VM) {
  891                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  892                 struct vm86_kernel *vm86;
  893 
  894                 /*
  895                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  896                  * set up the vm86 area, and we can't enter vm86 mode.
  897                  */
  898                 if (td->td_pcb->pcb_ext == 0)
  899                         return (EINVAL);
  900                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  901                 if (vm86->vm86_inited == 0)
  902                         return (EINVAL);
  903 
  904                 /* Go back to user mode if both flags are set. */
  905                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  906                         ksiginfo_init_trap(&ksi);
  907                         ksi.ksi_signo = SIGBUS;
  908                         ksi.ksi_code = BUS_OBJERR;
  909                         ksi.ksi_addr = (void *)regs->tf_eip;
  910                         trapsignal(td, &ksi);
  911                 }
  912                 if (vm86->vm86_has_vme) {
  913                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  914                             (eflags & VME_USERCHANGE) | PSL_VM;
  915                 } else {
  916                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  917                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  918                             (eflags & VM_USERCHANGE) | PSL_VM;
  919                 }
  920                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  921                 tf->tf_eflags = eflags;
  922                 tf->tf_vm86_ds = tf->tf_ds;
  923                 tf->tf_vm86_es = tf->tf_es;
  924                 tf->tf_vm86_fs = tf->tf_fs;
  925                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  926                 tf->tf_ds = _udatasel;
  927                 tf->tf_es = _udatasel;
  928                 tf->tf_fs = _udatasel;
  929         } else {
  930                 /*
  931                  * Don't allow users to change privileged or reserved flags.
  932                  */
  933                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  934                         uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
  935                             td->td_proc->p_pid, td->td_name, eflags);
  936                         return (EINVAL);
  937                 }
  938 
  939                 /*
  940                  * Don't allow users to load a valid privileged %cs.  Let the
  941                  * hardware check for invalid selectors, excess privilege in
  942                  * other selectors, invalid %eip's and invalid %esp's.
  943                  */
  944                 cs = ucp->uc_mcontext.mc_cs;
  945                 if (!CS_SECURE(cs)) {
  946                         uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
  947                             td->td_proc->p_pid, td->td_name, cs);
  948                         ksiginfo_init_trap(&ksi);
  949                         ksi.ksi_signo = SIGBUS;
  950                         ksi.ksi_code = BUS_OBJERR;
  951                         ksi.ksi_trapno = T_PROTFLT;
  952                         ksi.ksi_addr = (void *)regs->tf_eip;
  953                         trapsignal(td, &ksi);
  954                         return (EINVAL);
  955                 }
  956 
  957                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  958         }
  959 
  960 #if defined(COMPAT_43)
  961         if (ucp->uc_mcontext.mc_onstack & 1)
  962                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  963         else
  964                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  965 #endif
  966         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  967         return (EJUSTRETURN);
  968 }
  969 #endif  /* COMPAT_FREEBSD4 */
  970 
  971 /*
  972  * MPSAFE
  973  */
  974 int
  975 sys_sigreturn(td, uap)
  976         struct thread *td;
  977         struct sigreturn_args /* {
  978                 const struct __ucontext *sigcntxp;
  979         } */ *uap;
  980 {
  981         ucontext_t uc;
  982         struct proc *p;
  983         struct trapframe *regs;
  984         ucontext_t *ucp;
  985         char *xfpustate;
  986         size_t xfpustate_len;
  987         int cs, eflags, error, ret;
  988         ksiginfo_t ksi;
  989 
  990         p = td->td_proc;
  991 
  992         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  993         if (error != 0)
  994                 return (error);
  995         ucp = &uc;
  996         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
  997                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
  998                     td->td_name, ucp->uc_mcontext.mc_flags);
  999                 return (EINVAL);
 1000         }
 1001         regs = td->td_frame;
 1002         eflags = ucp->uc_mcontext.mc_eflags;
 1003         if (eflags & PSL_VM) {
 1004                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 1005                 struct vm86_kernel *vm86;
 1006 
 1007                 /*
 1008                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 1009                  * set up the vm86 area, and we can't enter vm86 mode.
 1010                  */
 1011                 if (td->td_pcb->pcb_ext == 0)
 1012                         return (EINVAL);
 1013                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 1014                 if (vm86->vm86_inited == 0)
 1015                         return (EINVAL);
 1016 
 1017                 /* Go back to user mode if both flags are set. */
 1018                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 1019                         ksiginfo_init_trap(&ksi);
 1020                         ksi.ksi_signo = SIGBUS;
 1021                         ksi.ksi_code = BUS_OBJERR;
 1022                         ksi.ksi_addr = (void *)regs->tf_eip;
 1023                         trapsignal(td, &ksi);
 1024                 }
 1025 
 1026                 if (vm86->vm86_has_vme) {
 1027                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 1028                             (eflags & VME_USERCHANGE) | PSL_VM;
 1029                 } else {
 1030                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
 1031                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 1032                             (eflags & VM_USERCHANGE) | PSL_VM;
 1033                 }
 1034                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 1035                 tf->tf_eflags = eflags;
 1036                 tf->tf_vm86_ds = tf->tf_ds;
 1037                 tf->tf_vm86_es = tf->tf_es;
 1038                 tf->tf_vm86_fs = tf->tf_fs;
 1039                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 1040                 tf->tf_ds = _udatasel;
 1041                 tf->tf_es = _udatasel;
 1042                 tf->tf_fs = _udatasel;
 1043         } else {
 1044                 /*
 1045                  * Don't allow users to change privileged or reserved flags.
 1046                  */
 1047                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 1048                         uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 1049                             td->td_proc->p_pid, td->td_name, eflags);
 1050                         return (EINVAL);
 1051                 }
 1052 
 1053                 /*
 1054                  * Don't allow users to load a valid privileged %cs.  Let the
 1055                  * hardware check for invalid selectors, excess privilege in
 1056                  * other selectors, invalid %eip's and invalid %esp's.
 1057                  */
 1058                 cs = ucp->uc_mcontext.mc_cs;
 1059                 if (!CS_SECURE(cs)) {
 1060                         uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 1061                             td->td_proc->p_pid, td->td_name, cs);
 1062                         ksiginfo_init_trap(&ksi);
 1063                         ksi.ksi_signo = SIGBUS;
 1064                         ksi.ksi_code = BUS_OBJERR;
 1065                         ksi.ksi_trapno = T_PROTFLT;
 1066                         ksi.ksi_addr = (void *)regs->tf_eip;
 1067                         trapsignal(td, &ksi);
 1068                         return (EINVAL);
 1069                 }
 1070 
 1071                 if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 1072                         xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 1073                         if (xfpustate_len > cpu_max_ext_state_size -
 1074                             sizeof(union savefpu)) {
 1075                                 uprintf(
 1076                             "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 1077                                     p->p_pid, td->td_name, xfpustate_len);
 1078                                 return (EINVAL);
 1079                         }
 1080                         xfpustate = __builtin_alloca(xfpustate_len);
 1081                         error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 1082                             xfpustate, xfpustate_len);
 1083                         if (error != 0) {
 1084                                 uprintf(
 1085         "pid %d (%s): sigreturn copying xfpustate failed\n",
 1086                                     p->p_pid, td->td_name);
 1087                                 return (error);
 1088                         }
 1089                 } else {
 1090                         xfpustate = NULL;
 1091                         xfpustate_len = 0;
 1092                 }
 1093                 ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 1094                     xfpustate_len);
 1095                 if (ret != 0)
 1096                         return (ret);
 1097                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 1098         }
 1099 
 1100 #if defined(COMPAT_43)
 1101         if (ucp->uc_mcontext.mc_onstack & 1)
 1102                 td->td_sigstk.ss_flags |= SS_ONSTACK;
 1103         else
 1104                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 1105 #endif
 1106 
 1107         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 1108         return (EJUSTRETURN);
 1109 }
 1110 
 1111 #ifdef COMPAT_43
 1112 static void
 1113 setup_priv_lcall_gate(struct proc *p)
 1114 {
 1115         struct i386_ldt_args uap;
 1116         union descriptor desc;
 1117         u_int lcall_addr;
 1118 
 1119         bzero(&uap, sizeof(uap));
 1120         uap.start = 0;
 1121         uap.num = 1;
 1122         lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
 1123         bzero(&desc, sizeof(desc));
 1124         desc.sd.sd_type = SDT_MEMERA;
 1125         desc.sd.sd_dpl = SEL_UPL;
 1126         desc.sd.sd_p = 1;
 1127         desc.sd.sd_def32 = 1;
 1128         desc.sd.sd_gran = 1;
 1129         desc.sd.sd_lolimit = 0xffff;
 1130         desc.sd.sd_hilimit = 0xf;
 1131         desc.sd.sd_lobase = lcall_addr;
 1132         desc.sd.sd_hibase = lcall_addr >> 24;
 1133         i386_set_ldt(curthread, &uap, &desc);
 1134 }
 1135 #endif
 1136 
 1137 /*
 1138  * Reset the hardware debug registers if they were in use.
 1139  * They won't have any meaning for the newly exec'd process.
 1140  */
 1141 void
 1142 x86_clear_dbregs(struct pcb *pcb)
 1143 {
 1144         if ((pcb->pcb_flags & PCB_DBREGS) == 0)
 1145                 return;
 1146 
 1147         pcb->pcb_dr0 = 0;
 1148         pcb->pcb_dr1 = 0;
 1149         pcb->pcb_dr2 = 0;
 1150         pcb->pcb_dr3 = 0;
 1151         pcb->pcb_dr6 = 0;
 1152         pcb->pcb_dr7 = 0;
 1153 
 1154         if (pcb == curpcb) {
 1155                 /*
 1156                  * Clear the debug registers on the running CPU,
 1157                  * otherwise they will end up affecting the next
 1158                  * process we switch to.
 1159                  */
 1160                 reset_dbregs();
 1161         }
 1162         pcb->pcb_flags &= ~PCB_DBREGS;
 1163 }
 1164 
 1165 /*
 1166  * Reset registers to default values on exec.
 1167  */
 1168 void
 1169 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 1170 {
 1171         struct trapframe *regs;
 1172         struct pcb *pcb;
 1173         register_t saved_eflags;
 1174 
 1175         regs = td->td_frame;
 1176         pcb = td->td_pcb;
 1177 
 1178         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1179         pcb->pcb_gs = _udatasel;
 1180         load_gs(_udatasel);
 1181 
 1182         mtx_lock_spin(&dt_lock);
 1183         if (td->td_proc->p_md.md_ldt != NULL)
 1184                 user_ldt_free(td);
 1185         else
 1186                 mtx_unlock_spin(&dt_lock);
 1187 
 1188 #ifdef COMPAT_43
 1189         if (td->td_proc->p_sysent->sv_psstrings !=
 1190             elf32_freebsd_sysvec.sv_psstrings)
 1191                 setup_priv_lcall_gate(td->td_proc);
 1192 #endif
 1193   
 1194         /*
 1195          * Reset the fs and gs bases.  The values from the old address
 1196          * space do not make sense for the new program.  In particular,
 1197          * gsbase might be the TLS base for the old program but the new
 1198          * program has no TLS now.
 1199          */
 1200         set_fsbase(td, 0);
 1201         set_gsbase(td, 0);
 1202 
 1203         /* Make sure edx is 0x0 on entry. Linux binaries depend on it. */
 1204         saved_eflags = regs->tf_eflags & PSL_T;
 1205         bzero((char *)regs, sizeof(struct trapframe));
 1206         regs->tf_eip = imgp->entry_addr;
 1207         regs->tf_esp = stack;
 1208         regs->tf_eflags = PSL_USER | saved_eflags;
 1209         regs->tf_ss = _udatasel;
 1210         regs->tf_ds = _udatasel;
 1211         regs->tf_es = _udatasel;
 1212         regs->tf_fs = _udatasel;
 1213         regs->tf_cs = _ucodesel;
 1214 
 1215         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1216         regs->tf_ebx = imgp->ps_strings;
 1217 
 1218         x86_clear_dbregs(pcb);
 1219 
 1220         pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 1221 
 1222         /*
 1223          * Drop the FP state if we hold it, so that the process gets a
 1224          * clean FP state if it uses the FPU again.
 1225          */
 1226         fpstate_drop(td);
 1227 }
 1228 
 1229 void
 1230 cpu_setregs(void)
 1231 {
 1232         unsigned int cr0;
 1233 
 1234         cr0 = rcr0();
 1235 
 1236         /*
 1237          * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 1238          *
 1239          * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 1240          * instructions.  We must set the CR0_MP bit and use the CR0_TS
 1241          * bit to control the trap, because setting the CR0_EM bit does
 1242          * not cause WAIT instructions to trap.  It's important to trap
 1243          * WAIT instructions - otherwise the "wait" variants of no-wait
 1244          * control instructions would degenerate to the "no-wait" variants
 1245          * after FP context switches but work correctly otherwise.  It's
 1246          * particularly important to trap WAITs when there is no NPX -
 1247          * otherwise the "wait" variants would always degenerate.
 1248          *
 1249          * Try setting CR0_NE to get correct error reporting on 486DX's.
 1250          * Setting it should fail or do nothing on lesser processors.
 1251          */
 1252         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 1253         load_cr0(cr0);
 1254         load_gs(_udatasel);
 1255 }
 1256 
 1257 u_long bootdev;         /* not a struct cdev *- encoding is different */
 1258 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1259         CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 1260 
 1261 static char bootmethod[16] = "BIOS";
 1262 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1263     "System firmware boot method");
 1264 
 1265 /*
 1266  * Initialize 386 and configure to run kernel
 1267  */
 1268 
 1269 /*
 1270  * Initialize segments & interrupt table
 1271  */
 1272 
 1273 int _default_ldt;
 1274 
 1275 struct mtx dt_lock;                     /* lock for GDT and LDT */
 1276 
 1277 union descriptor gdt0[NGDT];    /* initial global descriptor table */
 1278 union descriptor *gdt = gdt0;   /* global descriptor table */
 1279 
 1280 union descriptor *ldt;          /* local descriptor table */
 1281 
 1282 static struct gate_descriptor idt0[NIDT];
 1283 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1284 
 1285 static struct i386tss *dblfault_tss;
 1286 static char *dblfault_stack;
 1287 
 1288 static struct i386tss common_tss0;
 1289 
 1290 vm_offset_t proc0kstack;
 1291 
 1292 /*
 1293  * software prototypes -- in more palatable form.
 1294  *
 1295  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
 1296  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
 1297  */
 1298 struct soft_segment_descriptor gdt_segs[] = {
 1299 /* GNULL_SEL    0 Null Descriptor */
 1300 {       .ssd_base = 0x0,
 1301         .ssd_limit = 0x0,
 1302         .ssd_type = 0,
 1303         .ssd_dpl = SEL_KPL,
 1304         .ssd_p = 0,
 1305         .ssd_xx = 0, .ssd_xx1 = 0,
 1306         .ssd_def32 = 0,
 1307         .ssd_gran = 0           },
 1308 /* GPRIV_SEL    1 SMP Per-Processor Private Data Descriptor */
 1309 {       .ssd_base = 0x0,
 1310         .ssd_limit = 0xfffff,
 1311         .ssd_type = SDT_MEMRWA,
 1312         .ssd_dpl = SEL_KPL,
 1313         .ssd_p = 1,
 1314         .ssd_xx = 0, .ssd_xx1 = 0,
 1315         .ssd_def32 = 1,
 1316         .ssd_gran = 1           },
 1317 /* GUFS_SEL     2 %fs Descriptor for user */
 1318 {       .ssd_base = 0x0,
 1319         .ssd_limit = 0xfffff,
 1320         .ssd_type = SDT_MEMRWA,
 1321         .ssd_dpl = SEL_UPL,
 1322         .ssd_p = 1,
 1323         .ssd_xx = 0, .ssd_xx1 = 0,
 1324         .ssd_def32 = 1,
 1325         .ssd_gran = 1           },
 1326 /* GUGS_SEL     3 %gs Descriptor for user */
 1327 {       .ssd_base = 0x0,
 1328         .ssd_limit = 0xfffff,
 1329         .ssd_type = SDT_MEMRWA,
 1330         .ssd_dpl = SEL_UPL,
 1331         .ssd_p = 1,
 1332         .ssd_xx = 0, .ssd_xx1 = 0,
 1333         .ssd_def32 = 1,
 1334         .ssd_gran = 1           },
 1335 /* GCODE_SEL    4 Code Descriptor for kernel */
 1336 {       .ssd_base = 0x0,
 1337         .ssd_limit = 0xfffff,
 1338         .ssd_type = SDT_MEMERA,
 1339         .ssd_dpl = SEL_KPL,
 1340         .ssd_p = 1,
 1341         .ssd_xx = 0, .ssd_xx1 = 0,
 1342         .ssd_def32 = 1,
 1343         .ssd_gran = 1           },
 1344 /* GDATA_SEL    5 Data Descriptor for kernel */
 1345 {       .ssd_base = 0x0,
 1346         .ssd_limit = 0xfffff,
 1347         .ssd_type = SDT_MEMRWA,
 1348         .ssd_dpl = SEL_KPL,
 1349         .ssd_p = 1,
 1350         .ssd_xx = 0, .ssd_xx1 = 0,
 1351         .ssd_def32 = 1,
 1352         .ssd_gran = 1           },
 1353 /* GUCODE_SEL   6 Code Descriptor for user */
 1354 {       .ssd_base = 0x0,
 1355         .ssd_limit = 0xfffff,
 1356         .ssd_type = SDT_MEMERA,
 1357         .ssd_dpl = SEL_UPL,
 1358         .ssd_p = 1,
 1359         .ssd_xx = 0, .ssd_xx1 = 0,
 1360         .ssd_def32 = 1,
 1361         .ssd_gran = 1           },
 1362 /* GUDATA_SEL   7 Data Descriptor for user */
 1363 {       .ssd_base = 0x0,
 1364         .ssd_limit = 0xfffff,
 1365         .ssd_type = SDT_MEMRWA,
 1366         .ssd_dpl = SEL_UPL,
 1367         .ssd_p = 1,
 1368         .ssd_xx = 0, .ssd_xx1 = 0,
 1369         .ssd_def32 = 1,
 1370         .ssd_gran = 1           },
 1371 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1372 {       .ssd_base = 0x400,
 1373         .ssd_limit = 0xfffff,
 1374         .ssd_type = SDT_MEMRWA,
 1375         .ssd_dpl = SEL_KPL,
 1376         .ssd_p = 1,
 1377         .ssd_xx = 0, .ssd_xx1 = 0,
 1378         .ssd_def32 = 1,
 1379         .ssd_gran = 1           },
 1380 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
 1381 {
 1382         .ssd_base = 0x0,
 1383         .ssd_limit = sizeof(struct i386tss)-1,
 1384         .ssd_type = SDT_SYS386TSS,
 1385         .ssd_dpl = 0,
 1386         .ssd_p = 1,
 1387         .ssd_xx = 0, .ssd_xx1 = 0,
 1388         .ssd_def32 = 0,
 1389         .ssd_gran = 0           },
 1390 /* GLDT_SEL     10 LDT Descriptor */
 1391 {       .ssd_base = 0,
 1392         .ssd_limit = sizeof(union descriptor) * NLDT - 1,
 1393         .ssd_type = SDT_SYSLDT,
 1394         .ssd_dpl = SEL_UPL,
 1395         .ssd_p = 1,
 1396         .ssd_xx = 0, .ssd_xx1 = 0,
 1397         .ssd_def32 = 0,
 1398         .ssd_gran = 0           },
 1399 /* GUSERLDT_SEL 11 User LDT Descriptor per process */
 1400 {       .ssd_base = 0,
 1401         .ssd_limit = (512 * sizeof(union descriptor)-1),
 1402         .ssd_type = SDT_SYSLDT,
 1403         .ssd_dpl = 0,
 1404         .ssd_p = 1,
 1405         .ssd_xx = 0, .ssd_xx1 = 0,
 1406         .ssd_def32 = 0,
 1407         .ssd_gran = 0           },
 1408 /* GPANIC_SEL   12 Panic Tss Descriptor */
 1409 {       .ssd_base = 0,
 1410         .ssd_limit = sizeof(struct i386tss)-1,
 1411         .ssd_type = SDT_SYS386TSS,
 1412         .ssd_dpl = 0,
 1413         .ssd_p = 1,
 1414         .ssd_xx = 0, .ssd_xx1 = 0,
 1415         .ssd_def32 = 0,
 1416         .ssd_gran = 0           },
 1417 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 1418 {       .ssd_base = 0,
 1419         .ssd_limit = 0xfffff,
 1420         .ssd_type = SDT_MEMERA,
 1421         .ssd_dpl = 0,
 1422         .ssd_p = 1,
 1423         .ssd_xx = 0, .ssd_xx1 = 0,
 1424         .ssd_def32 = 0,
 1425         .ssd_gran = 1           },
 1426 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 1427 {       .ssd_base = 0,
 1428         .ssd_limit = 0xfffff,
 1429         .ssd_type = SDT_MEMERA,
 1430         .ssd_dpl = 0,
 1431         .ssd_p = 1,
 1432         .ssd_xx = 0, .ssd_xx1 = 0,
 1433         .ssd_def32 = 0,
 1434         .ssd_gran = 1           },
 1435 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 1436 {       .ssd_base = 0,
 1437         .ssd_limit = 0xfffff,
 1438         .ssd_type = SDT_MEMRWA,
 1439         .ssd_dpl = 0,
 1440         .ssd_p = 1,
 1441         .ssd_xx = 0, .ssd_xx1 = 0,
 1442         .ssd_def32 = 1,
 1443         .ssd_gran = 1           },
 1444 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 1445 {       .ssd_base = 0,
 1446         .ssd_limit = 0xfffff,
 1447         .ssd_type = SDT_MEMRWA,
 1448         .ssd_dpl = 0,
 1449         .ssd_p = 1,
 1450         .ssd_xx = 0, .ssd_xx1 = 0,
 1451         .ssd_def32 = 0,
 1452         .ssd_gran = 1           },
 1453 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 1454 {       .ssd_base = 0,
 1455         .ssd_limit = 0xfffff,
 1456         .ssd_type = SDT_MEMRWA,
 1457         .ssd_dpl = 0,
 1458         .ssd_p = 1,
 1459         .ssd_xx = 0, .ssd_xx1 = 0,
 1460         .ssd_def32 = 0,
 1461         .ssd_gran = 1           },
 1462 /* GNDIS_SEL    18 NDIS Descriptor */
 1463 {       .ssd_base = 0x0,
 1464         .ssd_limit = 0x0,
 1465         .ssd_type = 0,
 1466         .ssd_dpl = 0,
 1467         .ssd_p = 0,
 1468         .ssd_xx = 0, .ssd_xx1 = 0,
 1469         .ssd_def32 = 0,
 1470         .ssd_gran = 0           },
 1471 };
 1472 
 1473 static struct soft_segment_descriptor ldt_segs[] = {
 1474         /* Null Descriptor - overwritten by call gate */
 1475 {       .ssd_base = 0x0,
 1476         .ssd_limit = 0x0,
 1477         .ssd_type = 0,
 1478         .ssd_dpl = 0,
 1479         .ssd_p = 0,
 1480         .ssd_xx = 0, .ssd_xx1 = 0,
 1481         .ssd_def32 = 0,
 1482         .ssd_gran = 0           },
 1483         /* Null Descriptor - overwritten by call gate */
 1484 {       .ssd_base = 0x0,
 1485         .ssd_limit = 0x0,
 1486         .ssd_type = 0,
 1487         .ssd_dpl = 0,
 1488         .ssd_p = 0,
 1489         .ssd_xx = 0, .ssd_xx1 = 0,
 1490         .ssd_def32 = 0,
 1491         .ssd_gran = 0           },
 1492         /* Null Descriptor - overwritten by call gate */
 1493 {       .ssd_base = 0x0,
 1494         .ssd_limit = 0x0,
 1495         .ssd_type = 0,
 1496         .ssd_dpl = 0,
 1497         .ssd_p = 0,
 1498         .ssd_xx = 0, .ssd_xx1 = 0,
 1499         .ssd_def32 = 0,
 1500         .ssd_gran = 0           },
 1501         /* Code Descriptor for user */
 1502 {       .ssd_base = 0x0,
 1503         .ssd_limit = 0xfffff,
 1504         .ssd_type = SDT_MEMERA,
 1505         .ssd_dpl = SEL_UPL,
 1506         .ssd_p = 1,
 1507         .ssd_xx = 0, .ssd_xx1 = 0,
 1508         .ssd_def32 = 1,
 1509         .ssd_gran = 1           },
 1510         /* Null Descriptor - overwritten by call gate */
 1511 {       .ssd_base = 0x0,
 1512         .ssd_limit = 0x0,
 1513         .ssd_type = 0,
 1514         .ssd_dpl = 0,
 1515         .ssd_p = 0,
 1516         .ssd_xx = 0, .ssd_xx1 = 0,
 1517         .ssd_def32 = 0,
 1518         .ssd_gran = 0           },
 1519         /* Data Descriptor for user */
 1520 {       .ssd_base = 0x0,
 1521         .ssd_limit = 0xfffff,
 1522         .ssd_type = SDT_MEMRWA,
 1523         .ssd_dpl = SEL_UPL,
 1524         .ssd_p = 1,
 1525         .ssd_xx = 0, .ssd_xx1 = 0,
 1526         .ssd_def32 = 1,
 1527         .ssd_gran = 1           },
 1528 };
 1529 
 1530 size_t setidt_disp;
 1531 
 1532 void
 1533 setidt(int idx, inthand_t *func, int typ, int dpl, int selec)
 1534 {
 1535         uintptr_t off;
 1536 
 1537         off = func != NULL ? (uintptr_t)func + setidt_disp : 0;
 1538         setidt_nodisp(idx, off, typ, dpl, selec);
 1539 }
 1540 
 1541 void
 1542 setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec)
 1543 {
 1544         struct gate_descriptor *ip;
 1545 
 1546         ip = idt + idx;
 1547         ip->gd_looffset = off;
 1548         ip->gd_selector = selec;
 1549         ip->gd_stkcpy = 0;
 1550         ip->gd_xx = 0;
 1551         ip->gd_type = typ;
 1552         ip->gd_dpl = dpl;
 1553         ip->gd_p = 1;
 1554         ip->gd_hioffset = ((u_int)off) >> 16 ;
 1555 }
 1556 
 1557 extern inthand_t
 1558         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1559         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1560         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1561         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1562         IDTVEC(xmm),
 1563 #ifdef KDTRACE_HOOKS
 1564         IDTVEC(dtrace_ret),
 1565 #endif
 1566 #ifdef XENHVM
 1567         IDTVEC(xen_intr_upcall),
 1568 #endif
 1569         IDTVEC(int0x80_syscall);
 1570 
 1571 #ifdef DDB
 1572 /*
 1573  * Display the index and function name of any IDT entries that don't use
 1574  * the default 'rsvd' entry point.
 1575  */
 1576 DB_SHOW_COMMAND(idt, db_show_idt)
 1577 {
 1578         struct gate_descriptor *ip;
 1579         int idx;
 1580         uintptr_t func, func_trm;
 1581         bool trm;
 1582 
 1583         ip = idt;
 1584         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 1585                 if (ip->gd_type == SDT_SYSTASKGT) {
 1586                         db_printf("%3d\t<TASK>\n", idx);
 1587                 } else {
 1588                         func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 1589                         if (func >= PMAP_TRM_MIN_ADDRESS) {
 1590                                 func_trm = func;
 1591                                 func -= setidt_disp;
 1592                                 trm = true;
 1593                         } else
 1594                                 trm = false;
 1595                         if (func != (uintptr_t)&IDTVEC(rsvd)) {
 1596                                 db_printf("%3d\t", idx);
 1597                                 db_printsym(func, DB_STGY_PROC);
 1598                                 if (trm)
 1599                                         db_printf(" (trampoline %#x)",
 1600                                             func_trm);
 1601                                 db_printf("\n");
 1602                         }
 1603                 }
 1604                 ip++;
 1605         }
 1606 }
 1607 
 1608 /* Show privileged registers. */
 1609 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 1610 {
 1611         uint64_t idtr, gdtr;
 1612 
 1613         idtr = ridt();
 1614         db_printf("idtr\t0x%08x/%04x\n",
 1615             (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 1616         gdtr = rgdt();
 1617         db_printf("gdtr\t0x%08x/%04x\n",
 1618             (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 1619         db_printf("ldtr\t0x%04x\n", rldt());
 1620         db_printf("tr\t0x%04x\n", rtr());
 1621         db_printf("cr0\t0x%08x\n", rcr0());
 1622         db_printf("cr2\t0x%08x\n", rcr2());
 1623         db_printf("cr3\t0x%08x\n", rcr3());
 1624         db_printf("cr4\t0x%08x\n", rcr4());
 1625         if (rcr4() & CR4_XSAVE)
 1626                 db_printf("xcr0\t0x%016llx\n", rxcr(0));
 1627         if (amd_feature & (AMDID_NX | AMDID_LM))
 1628                 db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
 1629         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 1630                 db_printf("FEATURES_CTL\t0x%016llx\n",
 1631                     rdmsr(MSR_IA32_FEATURE_CONTROL));
 1632         if (((cpu_vendor_id == CPU_VENDOR_INTEL ||
 1633             cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) ||
 1634             cpu_vendor_id == CPU_VENDOR_HYGON)
 1635                 db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
 1636         if (cpu_feature & CPUID_PAT)
 1637                 db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 1638 }
 1639 
 1640 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 1641 {
 1642 
 1643         db_printf("dr0\t0x%08x\n", rdr0());
 1644         db_printf("dr1\t0x%08x\n", rdr1());
 1645         db_printf("dr2\t0x%08x\n", rdr2());
 1646         db_printf("dr3\t0x%08x\n", rdr3());
 1647         db_printf("dr6\t0x%08x\n", rdr6());
 1648         db_printf("dr7\t0x%08x\n", rdr7());     
 1649 }
 1650 
 1651 DB_SHOW_COMMAND(frame, db_show_frame)
 1652 {
 1653         struct trapframe *frame;
 1654 
 1655         frame = have_addr ? (struct trapframe *)addr : curthread->td_frame;
 1656         printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n",
 1657             frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs,
 1658             frame->tf_eip);
 1659         printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno);
 1660         printf("ds %#x es %#x fs %#x\n",
 1661             frame->tf_ds, frame->tf_es, frame->tf_fs);
 1662         printf("eax %#x ecx %#x edx %#x ebx %#x\n",
 1663             frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx);
 1664         printf("ebp %#x esi %#x edi %#x\n",
 1665             frame->tf_ebp, frame->tf_esi, frame->tf_edi);
 1666 
 1667 }
 1668 #endif
 1669 
 1670 void
 1671 sdtossd(sd, ssd)
 1672         struct segment_descriptor *sd;
 1673         struct soft_segment_descriptor *ssd;
 1674 {
 1675         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1676         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1677         ssd->ssd_type  = sd->sd_type;
 1678         ssd->ssd_dpl   = sd->sd_dpl;
 1679         ssd->ssd_p     = sd->sd_p;
 1680         ssd->ssd_def32 = sd->sd_def32;
 1681         ssd->ssd_gran  = sd->sd_gran;
 1682 }
 1683 
 1684 static int
 1685 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
 1686     int *physmap_idxp)
 1687 {
 1688         int i, insert_idx, physmap_idx;
 1689 
 1690         physmap_idx = *physmap_idxp;
 1691         
 1692         if (length == 0)
 1693                 return (1);
 1694 
 1695 #ifndef PAE
 1696         if (base > 0xffffffff) {
 1697                 printf("%uK of memory above 4GB ignored\n",
 1698                     (u_int)(length / 1024));
 1699                 return (1);
 1700         }
 1701 #endif
 1702 
 1703         /*
 1704          * Find insertion point while checking for overlap.  Start off by
 1705          * assuming the new entry will be added to the end.
 1706          */
 1707         insert_idx = physmap_idx + 2;
 1708         for (i = 0; i <= physmap_idx; i += 2) {
 1709                 if (base < physmap[i + 1]) {
 1710                         if (base + length <= physmap[i]) {
 1711                                 insert_idx = i;
 1712                                 break;
 1713                         }
 1714                         if (boothowto & RB_VERBOSE)
 1715                                 printf(
 1716                     "Overlapping memory regions, ignoring second region\n");
 1717                         return (1);
 1718                 }
 1719         }
 1720 
 1721         /* See if we can prepend to the next entry. */
 1722         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1723                 physmap[insert_idx] = base;
 1724                 return (1);
 1725         }
 1726 
 1727         /* See if we can append to the previous entry. */
 1728         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1729                 physmap[insert_idx - 1] += length;
 1730                 return (1);
 1731         }
 1732 
 1733         physmap_idx += 2;
 1734         *physmap_idxp = physmap_idx;
 1735         if (physmap_idx == PHYSMAP_SIZE) {
 1736                 printf(
 1737                 "Too many segments in the physical address map, giving up\n");
 1738                 return (0);
 1739         }
 1740 
 1741         /*
 1742          * Move the last 'N' entries down to make room for the new
 1743          * entry if needed.
 1744          */
 1745         for (i = physmap_idx; i > insert_idx; i -= 2) {
 1746                 physmap[i] = physmap[i - 2];
 1747                 physmap[i + 1] = physmap[i - 1];
 1748         }
 1749 
 1750         /* Insert the new entry. */
 1751         physmap[insert_idx] = base;
 1752         physmap[insert_idx + 1] = base + length;
 1753         return (1);
 1754 }
 1755 
 1756 static int
 1757 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 1758 {
 1759         if (boothowto & RB_VERBOSE)
 1760                 printf("SMAP type=%02x base=%016llx len=%016llx\n",
 1761                     smap->type, smap->base, smap->length);
 1762 
 1763         if (smap->type != SMAP_TYPE_MEMORY)
 1764                 return (1);
 1765 
 1766         return (add_physmap_entry(smap->base, smap->length, physmap,
 1767             physmap_idxp));
 1768 }
 1769 
 1770 static void
 1771 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
 1772     int *physmap_idxp)
 1773 {
 1774         struct bios_smap *smap, *smapend;
 1775         u_int32_t smapsize;
 1776         /*
 1777          * Memory map from INT 15:E820.
 1778          *
 1779          * subr_module.c says:
 1780          * "Consumer may safely assume that size value precedes data."
 1781          * ie: an int32_t immediately precedes SMAP.
 1782          */
 1783         smapsize = *((u_int32_t *)smapbase - 1);
 1784         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1785 
 1786         for (smap = smapbase; smap < smapend; smap++)
 1787                 if (!add_smap_entry(smap, physmap, physmap_idxp))
 1788                         break;
 1789 }
 1790 
 1791 static void
 1792 basemem_setup(void)
 1793 {
 1794         pt_entry_t *pte;
 1795         int i;
 1796 
 1797         if (basemem > 640) {
 1798                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1799                         basemem);
 1800                 basemem = 640;
 1801         }
 1802 
 1803         /*
 1804          * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 1805          * the vm86 page table so that vm86 can scribble on them using
 1806          * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 1807          * page 0, at least as initialized here?
 1808          */
 1809         pte = (pt_entry_t *)vm86paddr;
 1810         for (i = basemem / 4; i < 160; i++)
 1811                 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1812 }
 1813 
 1814 /*
 1815  * Populate the (physmap) array with base/bound pairs describing the
 1816  * available physical memory in the system, then test this memory and
 1817  * build the phys_avail array describing the actually-available memory.
 1818  *
 1819  * If we cannot accurately determine the physical memory map, then use
 1820  * value from the 0xE801 call, and failing that, the RTC.
 1821  *
 1822  * Total memory size may be set by the kernel environment variable
 1823  * hw.physmem or the compile-time define MAXMEM.
 1824  *
 1825  * XXX first should be vm_paddr_t.
 1826  */
 1827 static void
 1828 getmemsize(int first)
 1829 {
 1830         int has_smap, off, physmap_idx, pa_indx, da_indx;
 1831         u_long memtest;
 1832         vm_paddr_t physmap[PHYSMAP_SIZE];
 1833         pt_entry_t *pte;
 1834         quad_t dcons_addr, dcons_size, physmem_tunable;
 1835         int hasbrokenint12, i, res;
 1836         u_int extmem;
 1837         struct vm86frame vmf;
 1838         struct vm86context vmc;
 1839         vm_paddr_t pa;
 1840         struct bios_smap *smap, *smapbase;
 1841         caddr_t kmdp;
 1842 
 1843         has_smap = 0;
 1844         bzero(&vmf, sizeof(vmf));
 1845         bzero(physmap, sizeof(physmap));
 1846         basemem = 0;
 1847 
 1848         /*
 1849          * Tell the physical memory allocator about pages used to store
 1850          * the kernel and preloaded data.  See kmem_bootstrap_free().
 1851          */
 1852         vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
 1853 
 1854         /*
 1855          * Check if the loader supplied an SMAP memory map.  If so,
 1856          * use that and do not make any VM86 calls.
 1857          */
 1858         physmap_idx = 0;
 1859         kmdp = preload_search_by_type("elf kernel");
 1860         if (kmdp == NULL)
 1861                 kmdp = preload_search_by_type("elf32 kernel");
 1862         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 1863             MODINFO_METADATA | MODINFOMD_SMAP);
 1864         if (smapbase != NULL) {
 1865                 add_smap_entries(smapbase, physmap, &physmap_idx);
 1866                 has_smap = 1;
 1867                 goto have_smap;
 1868         }
 1869 
 1870         /*
 1871          * Some newer BIOSes have a broken INT 12H implementation
 1872          * which causes a kernel panic immediately.  In this case, we
 1873          * need use the SMAP to determine the base memory size.
 1874          */
 1875         hasbrokenint12 = 0;
 1876         TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 1877         if (hasbrokenint12 == 0) {
 1878                 /* Use INT12 to determine base memory size. */
 1879                 vm86_intcall(0x12, &vmf);
 1880                 basemem = vmf.vmf_ax;
 1881                 basemem_setup();
 1882         }
 1883 
 1884         /*
 1885          * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 1886          * the kernel page table so we can use it as a buffer.  The
 1887          * kernel will unmap this page later.
 1888          */
 1889         vmc.npages = 0;
 1890         smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1));
 1891         res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 1892         KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 1893 
 1894         vmf.vmf_ebx = 0;
 1895         do {
 1896                 vmf.vmf_eax = 0xE820;
 1897                 vmf.vmf_edx = SMAP_SIG;
 1898                 vmf.vmf_ecx = sizeof(struct bios_smap);
 1899                 i = vm86_datacall(0x15, &vmf, &vmc);
 1900                 if (i || vmf.vmf_eax != SMAP_SIG)
 1901                         break;
 1902                 has_smap = 1;
 1903                 if (!add_smap_entry(smap, physmap, &physmap_idx))
 1904                         break;
 1905         } while (vmf.vmf_ebx != 0);
 1906 
 1907 have_smap:
 1908         /*
 1909          * If we didn't fetch the "base memory" size from INT12,
 1910          * figure it out from the SMAP (or just guess).
 1911          */
 1912         if (basemem == 0) {
 1913                 for (i = 0; i <= physmap_idx; i += 2) {
 1914                         if (physmap[i] == 0x00000000) {
 1915                                 basemem = physmap[i + 1] / 1024;
 1916                                 break;
 1917                         }
 1918                 }
 1919 
 1920                 /* XXX: If we couldn't find basemem from SMAP, just guess. */
 1921                 if (basemem == 0)
 1922                         basemem = 640;
 1923                 basemem_setup();
 1924         }
 1925 
 1926         if (physmap[1] != 0)
 1927                 goto physmap_done;
 1928 
 1929         /*
 1930          * If we failed to find an SMAP, figure out the extended
 1931          * memory size.  We will then build a simple memory map with
 1932          * two segments, one for "base memory" and the second for
 1933          * "extended memory".  Note that "extended memory" starts at a
 1934          * physical address of 1MB and that both basemem and extmem
 1935          * are in units of 1KB.
 1936          *
 1937          * First, try to fetch the extended memory size via INT 15:E801.
 1938          */
 1939         vmf.vmf_ax = 0xE801;
 1940         if (vm86_intcall(0x15, &vmf) == 0) {
 1941                 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 1942         } else {
 1943                 /*
 1944                  * If INT15:E801 fails, this is our last ditch effort
 1945                  * to determine the extended memory size.  Currently
 1946                  * we prefer the RTC value over INT15:88.
 1947                  */
 1948 #if 0
 1949                 vmf.vmf_ah = 0x88;
 1950                 vm86_intcall(0x15, &vmf);
 1951                 extmem = vmf.vmf_ax;
 1952 #else
 1953                 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 1954 #endif
 1955         }
 1956 
 1957         /*
 1958          * Special hack for chipsets that still remap the 384k hole when
 1959          * there's 16MB of memory - this really confuses people that
 1960          * are trying to use bus mastering ISA controllers with the
 1961          * "16MB limit"; they only have 16MB, but the remapping puts
 1962          * them beyond the limit.
 1963          *
 1964          * If extended memory is between 15-16MB (16-17MB phys address range),
 1965          *      chop it to 15MB.
 1966          */
 1967         if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 1968                 extmem = 15 * 1024;
 1969 
 1970         physmap[0] = 0;
 1971         physmap[1] = basemem * 1024;
 1972         physmap_idx = 2;
 1973         physmap[physmap_idx] = 0x100000;
 1974         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1975 
 1976 physmap_done:
 1977         /*
 1978          * Now, physmap contains a map of physical memory.
 1979          */
 1980 
 1981 #ifdef SMP
 1982         /* make hole for AP bootstrap code */
 1983         alloc_ap_trampoline(physmap, &physmap_idx);
 1984 #endif
 1985 
 1986         /*
 1987          * Maxmem isn't the "maximum memory", it's one larger than the
 1988          * highest page of the physical address space.  It should be
 1989          * called something like "Maxphyspage".  We may adjust this 
 1990          * based on ``hw.physmem'' and the results of the memory test.
 1991          *
 1992          * This is especially confusing when it is much larger than the
 1993          * memory size and is displayed as "realmem".
 1994          */
 1995         Maxmem = atop(physmap[physmap_idx + 1]);
 1996 
 1997 #ifdef MAXMEM
 1998         Maxmem = MAXMEM / 4;
 1999 #endif
 2000 
 2001         if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 2002                 Maxmem = atop(physmem_tunable);
 2003 
 2004         /*
 2005          * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 2006          * the amount of memory in the system.
 2007          */
 2008         if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 2009                 Maxmem = atop(physmap[physmap_idx + 1]);
 2010 
 2011         /*
 2012          * By default enable the memory test on real hardware, and disable
 2013          * it if we appear to be running in a VM.  This avoids touching all
 2014          * pages unnecessarily, which doesn't matter on real hardware but is
 2015          * bad for shared VM hosts.  Use a general name so that
 2016          * one could eventually do more with the code than just disable it.
 2017          */
 2018         memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
 2019         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 2020 
 2021         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 2022             (boothowto & RB_VERBOSE))
 2023                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 2024 
 2025         /*
 2026          * If Maxmem has been increased beyond what the system has detected,
 2027          * extend the last memory segment to the new limit.
 2028          */ 
 2029         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 2030                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 2031 
 2032         /* call pmap initialization to make new kernel address space */
 2033         pmap_bootstrap(first);
 2034 
 2035         /*
 2036          * Size up each available chunk of physical memory.
 2037          */
 2038         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 2039         pa_indx = 0;
 2040         da_indx = 1;
 2041         phys_avail[pa_indx++] = physmap[0];
 2042         phys_avail[pa_indx] = physmap[0];
 2043         dump_avail[da_indx] = physmap[0];
 2044         pte = CMAP3;
 2045 
 2046         /*
 2047          * Get dcons buffer address
 2048          */
 2049         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 2050             getenv_quad("dcons.size", &dcons_size) == 0)
 2051                 dcons_addr = 0;
 2052 
 2053         /*
 2054          * physmap is in bytes, so when converting to page boundaries,
 2055          * round up the start address and round down the end address.
 2056          */
 2057         for (i = 0; i <= physmap_idx; i += 2) {
 2058                 vm_paddr_t end;
 2059 
 2060                 end = ptoa((vm_paddr_t)Maxmem);
 2061                 if (physmap[i + 1] < end)
 2062                         end = trunc_page(physmap[i + 1]);
 2063                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 2064                         int tmp, page_bad, full;
 2065                         int *ptr = (int *)CADDR3;
 2066 
 2067                         full = FALSE;
 2068                         /*
 2069                          * block out kernel memory as not available.
 2070                          */
 2071                         if (pa >= KERNLOAD && pa < first)
 2072                                 goto do_dump_avail;
 2073 
 2074                         /*
 2075                          * block out dcons buffer
 2076                          */
 2077                         if (dcons_addr > 0
 2078                             && pa >= trunc_page(dcons_addr)
 2079                             && pa < dcons_addr + dcons_size)
 2080                                 goto do_dump_avail;
 2081 
 2082                         page_bad = FALSE;
 2083                         if (memtest == 0)
 2084                                 goto skip_memtest;
 2085 
 2086                         /*
 2087                          * map page into kernel: valid, read/write,non-cacheable
 2088                          */
 2089                         *pte = pa | PG_V | PG_RW | PG_N;
 2090                         invltlb();
 2091 
 2092                         tmp = *(int *)ptr;
 2093                         /*
 2094                          * Test for alternating 1's and 0's
 2095                          */
 2096                         *(volatile int *)ptr = 0xaaaaaaaa;
 2097                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 2098                                 page_bad = TRUE;
 2099                         /*
 2100                          * Test for alternating 0's and 1's
 2101                          */
 2102                         *(volatile int *)ptr = 0x55555555;
 2103                         if (*(volatile int *)ptr != 0x55555555)
 2104                                 page_bad = TRUE;
 2105                         /*
 2106                          * Test for all 1's
 2107                          */
 2108                         *(volatile int *)ptr = 0xffffffff;
 2109                         if (*(volatile int *)ptr != 0xffffffff)
 2110                                 page_bad = TRUE;
 2111                         /*
 2112                          * Test for all 0's
 2113                          */
 2114                         *(volatile int *)ptr = 0x0;
 2115                         if (*(volatile int *)ptr != 0x0)
 2116                                 page_bad = TRUE;
 2117                         /*
 2118                          * Restore original value.
 2119                          */
 2120                         *(int *)ptr = tmp;
 2121 
 2122 skip_memtest:
 2123                         /*
 2124                          * Adjust array of valid/good pages.
 2125                          */
 2126                         if (page_bad == TRUE)
 2127                                 continue;
 2128                         /*
 2129                          * If this good page is a continuation of the
 2130                          * previous set of good pages, then just increase
 2131                          * the end pointer. Otherwise start a new chunk.
 2132                          * Note that "end" points one higher than end,
 2133                          * making the range >= start and < end.
 2134                          * If we're also doing a speculative memory
 2135                          * test and we at or past the end, bump up Maxmem
 2136                          * so that we keep going. The first bad page
 2137                          * will terminate the loop.
 2138                          */
 2139                         if (phys_avail[pa_indx] == pa) {
 2140                                 phys_avail[pa_indx] += PAGE_SIZE;
 2141                         } else {
 2142                                 pa_indx++;
 2143                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 2144                                         printf(
 2145                 "Too many holes in the physical address space, giving up\n");
 2146                                         pa_indx--;
 2147                                         full = TRUE;
 2148                                         goto do_dump_avail;
 2149                                 }
 2150                                 phys_avail[pa_indx++] = pa;     /* start */
 2151                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 2152                         }
 2153                         physmem++;
 2154 do_dump_avail:
 2155                         if (dump_avail[da_indx] == pa) {
 2156                                 dump_avail[da_indx] += PAGE_SIZE;
 2157                         } else {
 2158                                 da_indx++;
 2159                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 2160                                         da_indx--;
 2161                                         goto do_next;
 2162                                 }
 2163                                 dump_avail[da_indx++] = pa;     /* start */
 2164                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 2165                         }
 2166 do_next:
 2167                         if (full)
 2168                                 break;
 2169                 }
 2170         }
 2171         *pte = 0;
 2172         invltlb();
 2173         
 2174         /*
 2175          * XXX
 2176          * The last chunk must contain at least one page plus the message
 2177          * buffer to avoid complicating other code (message buffer address
 2178          * calculation, etc.).
 2179          */
 2180         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 2181             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 2182                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 2183                 phys_avail[pa_indx--] = 0;
 2184                 phys_avail[pa_indx--] = 0;
 2185         }
 2186 
 2187         Maxmem = atop(phys_avail[pa_indx]);
 2188 
 2189         /* Trim off space for the message buffer. */
 2190         phys_avail[pa_indx] -= round_page(msgbufsize);
 2191 
 2192         /* Map the message buffer. */
 2193         for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 2194                 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 2195                     off);
 2196 }
 2197 
 2198 static void
 2199 i386_kdb_init(void)
 2200 {
 2201 #ifdef DDB
 2202         db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
 2203 #endif
 2204         kdb_init();
 2205 #ifdef KDB
 2206         if (boothowto & RB_KDB)
 2207                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 2208 #endif
 2209 }
 2210 
 2211 static void
 2212 fixup_idt(void)
 2213 {
 2214         struct gate_descriptor *ip;
 2215         uintptr_t off;
 2216         int x;
 2217 
 2218         for (x = 0; x < NIDT; x++) {
 2219                 ip = &idt[x];
 2220                 if (ip->gd_type != SDT_SYS386IGT &&
 2221                     ip->gd_type != SDT_SYS386TGT)
 2222                         continue;
 2223                 off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16);
 2224                 KASSERT(off >= (uintptr_t)start_exceptions &&
 2225                     off < (uintptr_t)end_exceptions,
 2226                     ("IDT[%d] type %d off %#x", x, ip->gd_type, off));
 2227                 off += setidt_disp;
 2228                 MPASS(off >= PMAP_TRM_MIN_ADDRESS &&
 2229                     off < PMAP_TRM_MAX_ADDRESS);
 2230                 ip->gd_looffset = off;
 2231                 ip->gd_hioffset = off >> 16;
 2232         }
 2233 }
 2234 
 2235 static void
 2236 i386_setidt1(void)
 2237 {
 2238         int x;
 2239 
 2240         /* exceptions */
 2241         for (x = 0; x < NIDT; x++)
 2242                 setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
 2243                     GSEL(GCODE_SEL, SEL_KPL));
 2244         setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL,
 2245             GSEL(GCODE_SEL, SEL_KPL));
 2246         setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
 2247             GSEL(GCODE_SEL, SEL_KPL));
 2248         setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
 2249             GSEL(GCODE_SEL, SEL_KPL));
 2250         setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
 2251             GSEL(GCODE_SEL, SEL_KPL));
 2252         setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL,
 2253             GSEL(GCODE_SEL, SEL_KPL));
 2254         setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL,
 2255             GSEL(GCODE_SEL, SEL_KPL));
 2256         setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 2257             GSEL(GCODE_SEL, SEL_KPL));
 2258         setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL,
 2259             GSEL(GCODE_SEL, SEL_KPL));
 2260         setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL,
 2261             SEL_KPL));
 2262         setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT,
 2263             SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2264         setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL,
 2265             GSEL(GCODE_SEL, SEL_KPL));
 2266         setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL,
 2267             GSEL(GCODE_SEL, SEL_KPL));
 2268         setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL,
 2269             GSEL(GCODE_SEL, SEL_KPL));
 2270         setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 2271             GSEL(GCODE_SEL, SEL_KPL));
 2272         setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
 2273             GSEL(GCODE_SEL, SEL_KPL));
 2274         setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386IGT, SEL_KPL,
 2275             GSEL(GCODE_SEL, SEL_KPL));
 2276         setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
 2277             GSEL(GCODE_SEL, SEL_KPL));
 2278         setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL,
 2279             GSEL(GCODE_SEL, SEL_KPL));
 2280         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
 2281             GSEL(GCODE_SEL, SEL_KPL));
 2282         setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall),
 2283             SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 2284 #ifdef KDTRACE_HOOKS
 2285         setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret),
 2286             SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 2287 #endif
 2288 #ifdef XENHVM
 2289         setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall),
 2290             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2291 #endif
 2292 }
 2293 
 2294 static void
 2295 i386_setidt2(void)
 2296 {
 2297 
 2298         setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 2299             GSEL(GCODE_SEL, SEL_KPL));
 2300         setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 2301             GSEL(GCODE_SEL, SEL_KPL));
 2302 }
 2303 
 2304 #if defined(DEV_ISA) && !defined(DEV_ATPIC)
 2305 static void
 2306 i386_setidt3(void)
 2307 {
 2308 
 2309         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint),
 2310             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2311         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint),
 2312             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2313 }
 2314 #endif
 2315 
 2316 register_t
 2317 init386(int first)
 2318 {
 2319         struct region_descriptor r_gdt, r_idt;  /* table descriptors */
 2320         int gsel_tss, metadata_missing, x, pa;
 2321         struct pcpu *pc;
 2322         struct xstate_hdr *xhdr;
 2323         caddr_t kmdp;
 2324         vm_offset_t addend;
 2325         size_t ucode_len;
 2326         int late_console;
 2327 
 2328         thread0.td_kstack = proc0kstack;
 2329         thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 2330 
 2331         /*
 2332          * This may be done better later if it gets more high level
 2333          * components in it. If so just link td->td_proc here.
 2334          */
 2335         proc_linkup0(&proc0, &thread0);
 2336 
 2337         if (bootinfo.bi_modulep) {
 2338                 metadata_missing = 0;
 2339                 addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ?
 2340                     PMAP_MAP_LOW : 0;
 2341                 preload_metadata = (caddr_t)bootinfo.bi_modulep + addend;
 2342                 preload_bootstrap_relocate(addend);
 2343         } else {
 2344                 metadata_missing = 1;
 2345         }
 2346 
 2347         if (bootinfo.bi_envp != 0) {
 2348                 addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ?
 2349                     PMAP_MAP_LOW : 0;
 2350                 init_static_kenv((char *)bootinfo.bi_envp + addend, 0);
 2351         } else {
 2352                 init_static_kenv(NULL, 0);
 2353         }
 2354 
 2355         /*
 2356          * Re-evaluate CPU features if we loaded a microcode update.
 2357          */
 2358         ucode_len = ucode_load_bsp(first);
 2359         if (ucode_len != 0) {
 2360                 identify_cpu();
 2361                 first = roundup2(first + ucode_len, PAGE_SIZE);
 2362         }
 2363 
 2364         identify_hypervisor();
 2365 
 2366         /* Init basic tunables, hz etc */
 2367         init_param1();
 2368 
 2369         /*
 2370          * Make gdt memory segments.  All segments cover the full 4GB
 2371          * of address space and permissions are enforced at page level.
 2372          */
 2373         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 2374         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 2375         gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 2376         gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 2377         gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 2378         gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 2379 
 2380         pc = &__pcpu[0];
 2381         gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 2382         gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
 2383         gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0;
 2384 
 2385         for (x = 0; x < NGDT; x++)
 2386                 ssdtosd(&gdt_segs[x], &gdt0[x].sd);
 2387 
 2388         r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1;
 2389         r_gdt.rd_base =  (int)gdt0;
 2390         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 2391         lgdt(&r_gdt);
 2392 
 2393         pcpu_init(pc, 0, sizeof(struct pcpu));
 2394         for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 2395                 pmap_kenter(pa, pa);
 2396         dpcpu_init((void *)first, 0);
 2397         first += DPCPU_SIZE;
 2398         PCPU_SET(prvspace, pc);
 2399         PCPU_SET(curthread, &thread0);
 2400         /* Non-late cninit() and printf() can be moved up to here. */
 2401 
 2402         /*
 2403          * Initialize mutexes.
 2404          *
 2405          * icu_lock: in order to allow an interrupt to occur in a critical
 2406          *           section, to set pcpu->ipending (etc...) properly, we
 2407          *           must be able to get the icu lock, so it can't be
 2408          *           under witness.
 2409          */
 2410         mutex_init();
 2411         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 2412 
 2413         i386_setidt1();
 2414 
 2415         r_idt.rd_limit = sizeof(idt0) - 1;
 2416         r_idt.rd_base = (int) idt;
 2417         lidt(&r_idt);
 2418 
 2419         /*
 2420          * Initialize the clock before the console so that console
 2421          * initialization can use DELAY().
 2422          */
 2423         clock_init();
 2424 
 2425         finishidentcpu();       /* Final stage of CPU initialization */
 2426         i386_setidt2();
 2427         initializecpu();        /* Initialize CPU registers */
 2428         initializecpucache();
 2429 
 2430         /* pointer to selector slot for %fs/%gs */
 2431         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2432 
 2433         /* Initialize the tss (except for the final esp0) early for vm86. */
 2434         common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages *
 2435             PAGE_SIZE - VM86_STACK_SPACE;
 2436         common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 2437         common_tss0.tss_ioopt = sizeof(struct i386tss) << 16;
 2438         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 2439         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2440         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2441         ltr(gsel_tss);
 2442 
 2443         /* Initialize the PIC early for vm86 calls. */
 2444 #ifdef DEV_ISA
 2445 #ifdef DEV_ATPIC
 2446         elcr_probe();
 2447         atpic_startup();
 2448 #else
 2449         /* Reset and mask the atpics and leave them shut down. */
 2450         atpic_reset();
 2451 
 2452         /*
 2453          * Point the ICU spurious interrupt vectors at the APIC spurious
 2454          * interrupt handler.
 2455          */
 2456         i386_setidt3();
 2457 #endif
 2458 #endif
 2459 
 2460         /*
 2461          * The console and kdb should be initialized even earlier than here,
 2462          * but some console drivers don't work until after getmemsize().
 2463          * Default to late console initialization to support these drivers.
 2464          * This loses mainly printf()s in getmemsize() and early debugging.
 2465          */
 2466         late_console = 1;
 2467         TUNABLE_INT_FETCH("debug.late_console", &late_console);
 2468         if (!late_console) {
 2469                 cninit();
 2470                 i386_kdb_init();
 2471         }
 2472 
 2473         kmdp = preload_search_by_type("elf kernel");
 2474         link_elf_ireloc(kmdp);
 2475 
 2476         vm86_initialize();
 2477         getmemsize(first);
 2478         init_param2(physmem);
 2479 
 2480         /* now running on new page tables, configured,and u/iom is accessible */
 2481 
 2482         if (late_console)
 2483                 cninit();
 2484 
 2485         if (metadata_missing)
 2486                 printf("WARNING: loader(8) metadata is missing!\n");
 2487 
 2488         if (late_console)
 2489                 i386_kdb_init();
 2490 
 2491         msgbufinit(msgbufp, msgbufsize);
 2492         npxinit(true);
 2493         /*
 2494          * Set up thread0 pcb after npxinit calculated pcb + fpu save
 2495          * area size.  Zero out the extended state header in fpu save
 2496          * area.
 2497          */
 2498         thread0.td_pcb = get_pcb_td(&thread0);
 2499         thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 2500         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 2501         if (use_xsave) {
 2502                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 2503                     1);
 2504                 xhdr->xstate_bv = xsave_mask;
 2505         }
 2506         PCPU_SET(curpcb, thread0.td_pcb);
 2507         /* Move esp0 in the tss to its final place. */
 2508         /* Note: -16 is so we can grow the trapframe if we came from vm86 */
 2509         common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE;
 2510         PCPU_SET(kesp0, common_tss0.tss_esp0);
 2511         gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;     /* clear busy bit */
 2512         ltr(gsel_tss);
 2513 
 2514         /* transfer to user mode */
 2515 
 2516         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 2517         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 2518 
 2519         /* setup proc 0's pcb */
 2520         thread0.td_pcb->pcb_flags = 0;
 2521 #if defined(PAE) || defined(PAE_TABLES)
 2522         thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 2523 #else
 2524         thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 2525 #endif
 2526         thread0.td_pcb->pcb_ext = 0;
 2527         thread0.td_frame = &proc0_tf;
 2528 
 2529 #ifdef FDT
 2530         x86_init_fdt();
 2531 #endif
 2532 
 2533         /* Location of kernel stack for locore */
 2534         return ((register_t)thread0.td_pcb);
 2535 }
 2536 
 2537 static void
 2538 machdep_init_trampoline(void)
 2539 {
 2540         struct region_descriptor r_gdt, r_idt;
 2541         struct i386tss *tss;
 2542         char *copyout_buf, *trampoline, *tramp_stack_base;
 2543         int x;
 2544 
 2545         gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus,
 2546             M_NOWAIT | M_ZERO);
 2547         bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT);
 2548         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 2549         r_gdt.rd_base = (int)gdt;
 2550         lgdt(&r_gdt);
 2551 
 2552         tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus,
 2553             M_NOWAIT | M_ZERO);
 2554         bcopy(&common_tss0, tss, sizeof(struct i386tss));
 2555         gdt[GPROC0_SEL].sd.sd_lobase = (int)tss;
 2556         gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24;
 2557         gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 2558 
 2559         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2560         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2561         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2562         PCPU_SET(common_tssp, tss);
 2563         ltr(GSEL(GPROC0_SEL, SEL_KPL));
 2564 
 2565         trampoline = pmap_trm_alloc(end_exceptions - start_exceptions,
 2566             M_NOWAIT);
 2567         bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions);
 2568         tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
 2569         PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ -
 2570             VM86_STACK_SPACE);
 2571         tss[0].tss_esp0 = PCPU_GET(trampstk);
 2572 
 2573         idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO);
 2574         bcopy(idt0, idt, sizeof(idt0));
 2575 
 2576         /* Re-initialize new IDT since the handlers were relocated */
 2577         setidt_disp = trampoline - start_exceptions;
 2578         fixup_idt();
 2579 
 2580         r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
 2581         r_idt.rd_base = (int)idt;
 2582         lidt(&r_idt);
 2583 
 2584         /* dblfault TSS */
 2585         dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO);
 2586         dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT);
 2587         dblfault_tss->tss_esp = dblfault_tss->tss_esp0 =
 2588             dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 =
 2589             (int)dblfault_stack + PAGE_SIZE;
 2590         dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
 2591             dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 2592 #if defined(PAE) || defined(PAE_TABLES)
 2593         dblfault_tss->tss_cr3 = (int)IdlePDPT;
 2594 #else
 2595         dblfault_tss->tss_cr3 = (int)IdlePTD;
 2596 #endif
 2597         dblfault_tss->tss_eip = (int)dblfault_handler;
 2598         dblfault_tss->tss_eflags = PSL_KERNEL;
 2599         dblfault_tss->tss_ds = dblfault_tss->tss_es =
 2600             dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2601         dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2602         dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2603         dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2604         gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss;
 2605         gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24;
 2606 
 2607         /* make ldt memory segments */
 2608         ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT,
 2609             M_NOWAIT | M_ZERO);
 2610         gdt[GLDT_SEL].sd.sd_lobase = (int)ldt;
 2611         gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24;
 2612         ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 2613         ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 2614         for (x = 0; x < nitems(ldt_segs); x++)
 2615                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 2616 
 2617         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2618         lldt(_default_ldt);
 2619         PCPU_SET(currentldt, _default_ldt);
 2620 
 2621         copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
 2622         PCPU_SET(copyout_buf, copyout_buf);
 2623         copyout_init_tramp();
 2624 }
 2625 SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL);
 2626 
 2627 #ifdef COMPAT_43
 2628 static void
 2629 i386_setup_lcall_gate(void)
 2630 {
 2631         struct sysentvec *sv;
 2632         struct user_segment_descriptor desc;
 2633         u_int lcall_addr;
 2634 
 2635         sv = &elf32_freebsd_sysvec;
 2636         lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp;
 2637 
 2638         bzero(&desc, sizeof(desc));
 2639         desc.sd_type = SDT_MEMERA;
 2640         desc.sd_dpl = SEL_UPL;
 2641         desc.sd_p = 1;
 2642         desc.sd_def32 = 1;
 2643         desc.sd_gran = 1;
 2644         desc.sd_lolimit = 0xffff;
 2645         desc.sd_hilimit = 0xf;
 2646         desc.sd_lobase = lcall_addr;
 2647         desc.sd_hibase = lcall_addr >> 24;
 2648         bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc));
 2649 }
 2650 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL);
 2651 #endif
 2652 
 2653 void
 2654 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 2655 {
 2656 
 2657         pcpu->pc_acpi_id = 0xffffffff;
 2658 }
 2659 
 2660 static int
 2661 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 2662 {
 2663         struct bios_smap *smapbase;
 2664         struct bios_smap_xattr smap;
 2665         caddr_t kmdp;
 2666         uint32_t *smapattr;
 2667         int count, error, i;
 2668 
 2669         /* Retrieve the system memory map from the loader. */
 2670         kmdp = preload_search_by_type("elf kernel");
 2671         if (kmdp == NULL)
 2672                 kmdp = preload_search_by_type("elf32 kernel");
 2673         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 2674             MODINFO_METADATA | MODINFOMD_SMAP);
 2675         if (smapbase == NULL)
 2676                 return (0);
 2677         smapattr = (uint32_t *)preload_search_info(kmdp,
 2678             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 2679         count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 2680         error = 0;
 2681         for (i = 0; i < count; i++) {
 2682                 smap.base = smapbase[i].base;
 2683                 smap.length = smapbase[i].length;
 2684                 smap.type = smapbase[i].type;
 2685                 if (smapattr != NULL)
 2686                         smap.xattr = smapattr[i];
 2687                 else
 2688                         smap.xattr = 0;
 2689                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 2690         }
 2691         return (error);
 2692 }
 2693 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 2694     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 2695 
 2696 void
 2697 spinlock_enter(void)
 2698 {
 2699         struct thread *td;
 2700         register_t flags;
 2701 
 2702         td = curthread;
 2703         if (td->td_md.md_spinlock_count == 0) {
 2704                 flags = intr_disable();
 2705                 td->td_md.md_spinlock_count = 1;
 2706                 td->td_md.md_saved_flags = flags;
 2707         } else
 2708                 td->td_md.md_spinlock_count++;
 2709         critical_enter();
 2710 }
 2711 
 2712 void
 2713 spinlock_exit(void)
 2714 {
 2715         struct thread *td;
 2716         register_t flags;
 2717 
 2718         td = curthread;
 2719         critical_exit();
 2720         flags = td->td_md.md_saved_flags;
 2721         td->td_md.md_spinlock_count--;
 2722         if (td->td_md.md_spinlock_count == 0)
 2723                 intr_restore(flags);
 2724 }
 2725 
 2726 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2727 static void f00f_hack(void *unused);
 2728 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 2729 
 2730 static void
 2731 f00f_hack(void *unused)
 2732 {
 2733         struct region_descriptor r_idt;
 2734         struct gate_descriptor *new_idt;
 2735         vm_offset_t tmp;
 2736 
 2737         if (!has_f00f_bug)
 2738                 return;
 2739 
 2740         GIANT_REQUIRED;
 2741 
 2742         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2743 
 2744         tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO);
 2745         if (tmp == 0)
 2746                 panic("kmem_malloc returned 0");
 2747         tmp = round_page(tmp);
 2748 
 2749         /* Put the problematic entry (#6) at the end of the lower page. */
 2750         new_idt = (struct gate_descriptor *)
 2751             (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 2752         bcopy(idt, new_idt, sizeof(idt0));
 2753         r_idt.rd_base = (u_int)new_idt;
 2754         r_idt.rd_limit = sizeof(idt0) - 1;
 2755         lidt(&r_idt);
 2756         /* SMP machines do not need the F00F hack. */
 2757         idt = new_idt;
 2758         pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 2759 }
 2760 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2761 
 2762 /*
 2763  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2764  * we want to start a backtrace from the function that caused us to enter
 2765  * the debugger. We have the context in the trapframe, but base the trace
 2766  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2767  * enough for a backtrace.
 2768  */
 2769 void
 2770 makectx(struct trapframe *tf, struct pcb *pcb)
 2771 {
 2772 
 2773         pcb->pcb_edi = tf->tf_edi;
 2774         pcb->pcb_esi = tf->tf_esi;
 2775         pcb->pcb_ebp = tf->tf_ebp;
 2776         pcb->pcb_ebx = tf->tf_ebx;
 2777         pcb->pcb_eip = tf->tf_eip;
 2778         pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 2779         pcb->pcb_gs = rgs();
 2780 }
 2781 
 2782 int
 2783 ptrace_set_pc(struct thread *td, u_long addr)
 2784 {
 2785 
 2786         td->td_frame->tf_eip = addr;
 2787         return (0);
 2788 }
 2789 
 2790 int
 2791 ptrace_single_step(struct thread *td)
 2792 {
 2793 
 2794         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2795         if ((td->td_frame->tf_eflags & PSL_T) == 0) {
 2796                 td->td_frame->tf_eflags |= PSL_T;
 2797                 td->td_dbgflags |= TDB_STEP;
 2798         }
 2799         return (0);
 2800 }
 2801 
 2802 int
 2803 ptrace_clear_single_step(struct thread *td)
 2804 {
 2805 
 2806         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2807         td->td_frame->tf_eflags &= ~PSL_T;
 2808         td->td_dbgflags &= ~TDB_STEP;
 2809         return (0);
 2810 }
 2811 
 2812 int
 2813 fill_regs(struct thread *td, struct reg *regs)
 2814 {
 2815         struct pcb *pcb;
 2816         struct trapframe *tp;
 2817 
 2818         tp = td->td_frame;
 2819         pcb = td->td_pcb;
 2820         regs->r_gs = pcb->pcb_gs;
 2821         return (fill_frame_regs(tp, regs));
 2822 }
 2823 
 2824 int
 2825 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2826 {
 2827 
 2828         regs->r_fs = tp->tf_fs;
 2829         regs->r_es = tp->tf_es;
 2830         regs->r_ds = tp->tf_ds;
 2831         regs->r_edi = tp->tf_edi;
 2832         regs->r_esi = tp->tf_esi;
 2833         regs->r_ebp = tp->tf_ebp;
 2834         regs->r_ebx = tp->tf_ebx;
 2835         regs->r_edx = tp->tf_edx;
 2836         regs->r_ecx = tp->tf_ecx;
 2837         regs->r_eax = tp->tf_eax;
 2838         regs->r_eip = tp->tf_eip;
 2839         regs->r_cs = tp->tf_cs;
 2840         regs->r_eflags = tp->tf_eflags;
 2841         regs->r_esp = tp->tf_esp;
 2842         regs->r_ss = tp->tf_ss;
 2843         regs->r_err = 0;
 2844         regs->r_trapno = 0;
 2845         return (0);
 2846 }
 2847 
 2848 int
 2849 set_regs(struct thread *td, struct reg *regs)
 2850 {
 2851         struct pcb *pcb;
 2852         struct trapframe *tp;
 2853 
 2854         tp = td->td_frame;
 2855         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2856             !CS_SECURE(regs->r_cs))
 2857                 return (EINVAL);
 2858         pcb = td->td_pcb;
 2859         tp->tf_fs = regs->r_fs;
 2860         tp->tf_es = regs->r_es;
 2861         tp->tf_ds = regs->r_ds;
 2862         tp->tf_edi = regs->r_edi;
 2863         tp->tf_esi = regs->r_esi;
 2864         tp->tf_ebp = regs->r_ebp;
 2865         tp->tf_ebx = regs->r_ebx;
 2866         tp->tf_edx = regs->r_edx;
 2867         tp->tf_ecx = regs->r_ecx;
 2868         tp->tf_eax = regs->r_eax;
 2869         tp->tf_eip = regs->r_eip;
 2870         tp->tf_cs = regs->r_cs;
 2871         tp->tf_eflags = regs->r_eflags;
 2872         tp->tf_esp = regs->r_esp;
 2873         tp->tf_ss = regs->r_ss;
 2874         pcb->pcb_gs = regs->r_gs;
 2875         return (0);
 2876 }
 2877 
 2878 int
 2879 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2880 {
 2881 
 2882         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2883             P_SHOULDSTOP(td->td_proc),
 2884             ("not suspended thread %p", td));
 2885         npxgetregs(td);
 2886         if (cpu_fxsr)
 2887                 npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 2888                     (struct save87 *)fpregs);
 2889         else
 2890                 bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 2891                     sizeof(*fpregs));
 2892         return (0);
 2893 }
 2894 
 2895 int
 2896 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2897 {
 2898 
 2899         critical_enter();
 2900         if (cpu_fxsr)
 2901                 npx_set_fpregs_xmm((struct save87 *)fpregs,
 2902                     &get_pcb_user_save_td(td)->sv_xmm);
 2903         else
 2904                 bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 2905                     sizeof(*fpregs));
 2906         npxuserinited(td);
 2907         critical_exit();
 2908         return (0);
 2909 }
 2910 
 2911 /*
 2912  * Get machine context.
 2913  */
 2914 int
 2915 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2916 {
 2917         struct trapframe *tp;
 2918         struct segment_descriptor *sdp;
 2919 
 2920         tp = td->td_frame;
 2921 
 2922         PROC_LOCK(curthread->td_proc);
 2923         mcp->mc_onstack = sigonstack(tp->tf_esp);
 2924         PROC_UNLOCK(curthread->td_proc);
 2925         mcp->mc_gs = td->td_pcb->pcb_gs;
 2926         mcp->mc_fs = tp->tf_fs;
 2927         mcp->mc_es = tp->tf_es;
 2928         mcp->mc_ds = tp->tf_ds;
 2929         mcp->mc_edi = tp->tf_edi;
 2930         mcp->mc_esi = tp->tf_esi;
 2931         mcp->mc_ebp = tp->tf_ebp;
 2932         mcp->mc_isp = tp->tf_isp;
 2933         mcp->mc_eflags = tp->tf_eflags;
 2934         if (flags & GET_MC_CLEAR_RET) {
 2935                 mcp->mc_eax = 0;
 2936                 mcp->mc_edx = 0;
 2937                 mcp->mc_eflags &= ~PSL_C;
 2938         } else {
 2939                 mcp->mc_eax = tp->tf_eax;
 2940                 mcp->mc_edx = tp->tf_edx;
 2941         }
 2942         mcp->mc_ebx = tp->tf_ebx;
 2943         mcp->mc_ecx = tp->tf_ecx;
 2944         mcp->mc_eip = tp->tf_eip;
 2945         mcp->mc_cs = tp->tf_cs;
 2946         mcp->mc_esp = tp->tf_esp;
 2947         mcp->mc_ss = tp->tf_ss;
 2948         mcp->mc_len = sizeof(*mcp);
 2949         get_fpcontext(td, mcp, NULL, 0);
 2950         sdp = &td->td_pcb->pcb_fsd;
 2951         mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2952         sdp = &td->td_pcb->pcb_gsd;
 2953         mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2954         mcp->mc_flags = 0;
 2955         mcp->mc_xfpustate = 0;
 2956         mcp->mc_xfpustate_len = 0;
 2957         bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 2958         return (0);
 2959 }
 2960 
 2961 /*
 2962  * Set machine context.
 2963  *
 2964  * However, we don't set any but the user modifiable flags, and we won't
 2965  * touch the cs selector.
 2966  */
 2967 int
 2968 set_mcontext(struct thread *td, mcontext_t *mcp)
 2969 {
 2970         struct trapframe *tp;
 2971         char *xfpustate;
 2972         int eflags, ret;
 2973 
 2974         tp = td->td_frame;
 2975         if (mcp->mc_len != sizeof(*mcp) ||
 2976             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 2977                 return (EINVAL);
 2978         eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 2979             (tp->tf_eflags & ~PSL_USERCHANGE);
 2980         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 2981                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 2982                     sizeof(union savefpu))
 2983                         return (EINVAL);
 2984                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 2985                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 2986                     mcp->mc_xfpustate_len);
 2987                 if (ret != 0)
 2988                         return (ret);
 2989         } else
 2990                 xfpustate = NULL;
 2991         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 2992         if (ret != 0)
 2993                 return (ret);
 2994         tp->tf_fs = mcp->mc_fs;
 2995         tp->tf_es = mcp->mc_es;
 2996         tp->tf_ds = mcp->mc_ds;
 2997         tp->tf_edi = mcp->mc_edi;
 2998         tp->tf_esi = mcp->mc_esi;
 2999         tp->tf_ebp = mcp->mc_ebp;
 3000         tp->tf_ebx = mcp->mc_ebx;
 3001         tp->tf_edx = mcp->mc_edx;
 3002         tp->tf_ecx = mcp->mc_ecx;
 3003         tp->tf_eax = mcp->mc_eax;
 3004         tp->tf_eip = mcp->mc_eip;
 3005         tp->tf_eflags = eflags;
 3006         tp->tf_esp = mcp->mc_esp;
 3007         tp->tf_ss = mcp->mc_ss;
 3008         td->td_pcb->pcb_gs = mcp->mc_gs;
 3009         return (0);
 3010 }
 3011 
 3012 static void
 3013 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 3014     size_t xfpusave_len)
 3015 {
 3016         size_t max_len, len;
 3017 
 3018         mcp->mc_ownedfp = npxgetregs(td);
 3019         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 3020             sizeof(mcp->mc_fpstate));
 3021         mcp->mc_fpformat = npxformat();
 3022         if (!use_xsave || xfpusave_len == 0)
 3023                 return;
 3024         max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 3025         len = xfpusave_len;
 3026         if (len > max_len) {
 3027                 len = max_len;
 3028                 bzero(xfpusave + max_len, len - max_len);
 3029         }
 3030         mcp->mc_flags |= _MC_HASFPXSTATE;
 3031         mcp->mc_xfpustate_len = len;
 3032         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 3033 }
 3034 
 3035 static int
 3036 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 3037     size_t xfpustate_len)
 3038 {
 3039         int error;
 3040 
 3041         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 3042                 return (0);
 3043         else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 3044             mcp->mc_fpformat != _MC_FPFMT_XMM)
 3045                 return (EINVAL);
 3046         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 3047                 /* We don't care what state is left in the FPU or PCB. */
 3048                 fpstate_drop(td);
 3049                 error = 0;
 3050         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 3051             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 3052                 error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
 3053                     xfpustate, xfpustate_len);
 3054         } else
 3055                 return (EINVAL);
 3056         return (error);
 3057 }
 3058 
 3059 static void
 3060 fpstate_drop(struct thread *td)
 3061 {
 3062 
 3063         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 3064         critical_enter();
 3065         if (PCPU_GET(fpcurthread) == td)
 3066                 npxdrop();
 3067         /*
 3068          * XXX force a full drop of the npx.  The above only drops it if we
 3069          * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 3070          *
 3071          * XXX I don't much like npxgetregs()'s semantics of doing a full
 3072          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 3073          * We only need to drop to !PCB_INITDONE in sendsig().  But
 3074          * sendsig() is the only caller of npxgetregs()... perhaps we just
 3075          * have too many layers.
 3076          */
 3077         curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 3078             PCB_NPXUSERINITDONE);
 3079         critical_exit();
 3080 }
 3081 
 3082 int
 3083 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 3084 {
 3085         struct pcb *pcb;
 3086 
 3087         if (td == NULL) {
 3088                 dbregs->dr[0] = rdr0();
 3089                 dbregs->dr[1] = rdr1();
 3090                 dbregs->dr[2] = rdr2();
 3091                 dbregs->dr[3] = rdr3();
 3092                 dbregs->dr[6] = rdr6();
 3093                 dbregs->dr[7] = rdr7();
 3094         } else {
 3095                 pcb = td->td_pcb;
 3096                 dbregs->dr[0] = pcb->pcb_dr0;
 3097                 dbregs->dr[1] = pcb->pcb_dr1;
 3098                 dbregs->dr[2] = pcb->pcb_dr2;
 3099                 dbregs->dr[3] = pcb->pcb_dr3;
 3100                 dbregs->dr[6] = pcb->pcb_dr6;
 3101                 dbregs->dr[7] = pcb->pcb_dr7;
 3102         }
 3103         dbregs->dr[4] = 0;
 3104         dbregs->dr[5] = 0;
 3105         return (0);
 3106 }
 3107 
 3108 int
 3109 set_dbregs(struct thread *td, struct dbreg *dbregs)
 3110 {
 3111         struct pcb *pcb;
 3112         int i;
 3113 
 3114         if (td == NULL) {
 3115                 load_dr0(dbregs->dr[0]);
 3116                 load_dr1(dbregs->dr[1]);
 3117                 load_dr2(dbregs->dr[2]);
 3118                 load_dr3(dbregs->dr[3]);
 3119                 load_dr6(dbregs->dr[6]);
 3120                 load_dr7(dbregs->dr[7]);
 3121         } else {
 3122                 /*
 3123                  * Don't let an illegal value for dr7 get set.  Specifically,
 3124                  * check for undefined settings.  Setting these bit patterns
 3125                  * result in undefined behaviour and can lead to an unexpected
 3126                  * TRCTRAP.
 3127                  */
 3128                 for (i = 0; i < 4; i++) {
 3129                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 3130                                 return (EINVAL);
 3131                         if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 3132                                 return (EINVAL);
 3133                 }
 3134                 
 3135                 pcb = td->td_pcb;
 3136                 
 3137                 /*
 3138                  * Don't let a process set a breakpoint that is not within the
 3139                  * process's address space.  If a process could do this, it
 3140                  * could halt the system by setting a breakpoint in the kernel
 3141                  * (if ddb was enabled).  Thus, we need to check to make sure
 3142                  * that no breakpoints are being enabled for addresses outside
 3143                  * process's address space.
 3144                  *
 3145                  * XXX - what about when the watched area of the user's
 3146                  * address space is written into from within the kernel
 3147                  * ... wouldn't that still cause a breakpoint to be generated
 3148                  * from within kernel mode?
 3149                  */
 3150 
 3151                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 3152                         /* dr0 is enabled */
 3153                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 3154                                 return (EINVAL);
 3155                 }
 3156                         
 3157                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 3158                         /* dr1 is enabled */
 3159                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 3160                                 return (EINVAL);
 3161                 }
 3162                         
 3163                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 3164                         /* dr2 is enabled */
 3165                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 3166                                 return (EINVAL);
 3167                 }
 3168                         
 3169                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 3170                         /* dr3 is enabled */
 3171                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 3172                                 return (EINVAL);
 3173                 }
 3174 
 3175                 pcb->pcb_dr0 = dbregs->dr[0];
 3176                 pcb->pcb_dr1 = dbregs->dr[1];
 3177                 pcb->pcb_dr2 = dbregs->dr[2];
 3178                 pcb->pcb_dr3 = dbregs->dr[3];
 3179                 pcb->pcb_dr6 = dbregs->dr[6];
 3180                 pcb->pcb_dr7 = dbregs->dr[7];
 3181 
 3182                 pcb->pcb_flags |= PCB_DBREGS;
 3183         }
 3184 
 3185         return (0);
 3186 }
 3187 
 3188 /*
 3189  * Return > 0 if a hardware breakpoint has been hit, and the
 3190  * breakpoint was in user space.  Return 0, otherwise.
 3191  */
 3192 int
 3193 user_dbreg_trap(register_t dr6)
 3194 {
 3195         u_int32_t dr7;
 3196         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 3197         int nbp;            /* number of breakpoints that triggered */
 3198         caddr_t addr[4];    /* breakpoint addresses */
 3199         int i;
 3200 
 3201         bp = dr6 & DBREG_DR6_BMASK;
 3202         if (bp == 0) {
 3203                 /*
 3204                  * None of the breakpoint bits are set meaning this
 3205                  * trap was not caused by any of the debug registers
 3206                  */
 3207                 return 0;
 3208         }
 3209 
 3210         dr7 = rdr7();
 3211         if ((dr7 & 0x000000ff) == 0) {
 3212                 /*
 3213                  * all GE and LE bits in the dr7 register are zero,
 3214                  * thus the trap couldn't have been caused by the
 3215                  * hardware debug registers
 3216                  */
 3217                 return 0;
 3218         }
 3219 
 3220         nbp = 0;
 3221 
 3222         /*
 3223          * at least one of the breakpoints were hit, check to see
 3224          * which ones and if any of them are user space addresses
 3225          */
 3226 
 3227         if (bp & 0x01) {
 3228                 addr[nbp++] = (caddr_t)rdr0();
 3229         }
 3230         if (bp & 0x02) {
 3231                 addr[nbp++] = (caddr_t)rdr1();
 3232         }
 3233         if (bp & 0x04) {
 3234                 addr[nbp++] = (caddr_t)rdr2();
 3235         }
 3236         if (bp & 0x08) {
 3237                 addr[nbp++] = (caddr_t)rdr3();
 3238         }
 3239 
 3240         for (i = 0; i < nbp; i++) {
 3241                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 3242                         /*
 3243                          * addr[i] is in user space
 3244                          */
 3245                         return nbp;
 3246                 }
 3247         }
 3248 
 3249         /*
 3250          * None of the breakpoints are in user space.
 3251          */
 3252         return 0;
 3253 }
 3254 
 3255 #ifdef KDB
 3256 
 3257 /*
 3258  * Provide inb() and outb() as functions.  They are normally only available as
 3259  * inline functions, thus cannot be called from the debugger.
 3260  */
 3261 
 3262 /* silence compiler warnings */
 3263 u_char inb_(u_short);
 3264 void outb_(u_short, u_char);
 3265 
 3266 u_char
 3267 inb_(u_short port)
 3268 {
 3269         return inb(port);
 3270 }
 3271 
 3272 void
 3273 outb_(u_short port, u_char data)
 3274 {
 3275         outb(port, data);
 3276 }
 3277 
 3278 #endif /* KDB */

Cache object: 7e4e89d42f8a347f6c5877c860c4bfda


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.