The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 2018 The FreeBSD Foundation
    5  * Copyright (c) 1992 Terrence R. Lambert.
    6  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * William Jolitz.
   11  *
   12  * Portions of this software were developed by A. Joseph Koshy under
   13  * sponsorship from the FreeBSD Foundation and Google, Inc.
   14  *
   15  * Redistribution and use in source and binary forms, with or without
   16  * modification, are permitted provided that the following conditions
   17  * are met:
   18  * 1. Redistributions of source code must retain the above copyright
   19  *    notice, this list of conditions and the following disclaimer.
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  * 3. All advertising materials mentioning features or use of this software
   24  *    must display the following acknowledgement:
   25  *      This product includes software developed by the University of
   26  *      California, Berkeley and its contributors.
   27  * 4. Neither the name of the University nor the names of its contributors
   28  *    may be used to endorse or promote products derived from this software
   29  *    without specific prior written permission.
   30  *
   31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   41  * SUCH DAMAGE.
   42  *
   43  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   44  */
   45 
   46 #include <sys/cdefs.h>
   47 __FBSDID("$FreeBSD: releng/12.0/sys/i386/i386/machdep.c 341603 2018-12-05 19:16:12Z gjb $");
   48 
   49 #include "opt_apic.h"
   50 #include "opt_atpic.h"
   51 #include "opt_cpu.h"
   52 #include "opt_ddb.h"
   53 #include "opt_inet.h"
   54 #include "opt_isa.h"
   55 #include "opt_kstack_pages.h"
   56 #include "opt_maxmem.h"
   57 #include "opt_mp_watchdog.h"
   58 #include "opt_perfmon.h"
   59 #include "opt_platform.h"
   60 
   61 #include <sys/param.h>
   62 #include <sys/proc.h>
   63 #include <sys/systm.h>
   64 #include <sys/bio.h>
   65 #include <sys/buf.h>
   66 #include <sys/bus.h>
   67 #include <sys/callout.h>
   68 #include <sys/cons.h>
   69 #include <sys/cpu.h>
   70 #include <sys/eventhandler.h>
   71 #include <sys/exec.h>
   72 #include <sys/imgact.h>
   73 #include <sys/kdb.h>
   74 #include <sys/kernel.h>
   75 #include <sys/ktr.h>
   76 #include <sys/linker.h>
   77 #include <sys/lock.h>
   78 #include <sys/malloc.h>
   79 #include <sys/memrange.h>
   80 #include <sys/msgbuf.h>
   81 #include <sys/mutex.h>
   82 #include <sys/pcpu.h>
   83 #include <sys/ptrace.h>
   84 #include <sys/reboot.h>
   85 #include <sys/rwlock.h>
   86 #include <sys/sched.h>
   87 #include <sys/signalvar.h>
   88 #include <sys/smp.h>
   89 #include <sys/syscallsubr.h>
   90 #include <sys/sysctl.h>
   91 #include <sys/sysent.h>
   92 #include <sys/sysproto.h>
   93 #include <sys/ucontext.h>
   94 #include <sys/vmmeter.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_extern.h>
   98 #include <vm/vm_kern.h>
   99 #include <vm/vm_page.h>
  100 #include <vm/vm_map.h>
  101 #include <vm/vm_object.h>
  102 #include <vm/vm_pager.h>
  103 #include <vm/vm_param.h>
  104 #include <vm/vm_phys.h>
  105 
  106 #ifdef DDB
  107 #ifndef KDB
  108 #error KDB must be enabled in order for DDB to work!
  109 #endif
  110 #include <ddb/ddb.h>
  111 #include <ddb/db_sym.h>
  112 #endif
  113 
  114 #include <isa/rtc.h>
  115 
  116 #include <net/netisr.h>
  117 
  118 #include <machine/bootinfo.h>
  119 #include <machine/clock.h>
  120 #include <machine/cpu.h>
  121 #include <machine/cputypes.h>
  122 #include <machine/intr_machdep.h>
  123 #include <x86/mca.h>
  124 #include <machine/md_var.h>
  125 #include <machine/metadata.h>
  126 #include <machine/mp_watchdog.h>
  127 #include <machine/pc/bios.h>
  128 #include <machine/pcb.h>
  129 #include <machine/pcb_ext.h>
  130 #include <machine/proc.h>
  131 #include <machine/reg.h>
  132 #include <machine/sigframe.h>
  133 #include <machine/specialreg.h>
  134 #include <machine/sysarch.h>
  135 #include <machine/trap.h>
  136 #include <x86/ucode.h>
  137 #include <machine/vm86.h>
  138 #include <x86/init.h>
  139 #ifdef PERFMON
  140 #include <machine/perfmon.h>
  141 #endif
  142 #ifdef SMP
  143 #include <machine/smp.h>
  144 #endif
  145 #ifdef FDT
  146 #include <x86/fdt.h>
  147 #endif
  148 
  149 #ifdef DEV_APIC
  150 #include <x86/apicvar.h>
  151 #endif
  152 
  153 #ifdef DEV_ISA
  154 #include <x86/isa/icu.h>
  155 #endif
  156 
  157 /* Sanity check for __curthread() */
  158 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  159 
  160 register_t init386(int first);
  161 void dblfault_handler(void);
  162 void identify_cpu(void);
  163 
  164 static void cpu_startup(void *);
  165 static void fpstate_drop(struct thread *td);
  166 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  167     char *xfpusave, size_t xfpusave_len);
  168 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  169     char *xfpustate, size_t xfpustate_len);
  170 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  171 
  172 /* Intel ICH registers */
  173 #define ICH_PMBASE      0x400
  174 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  175 
  176 int     _udatasel, _ucodesel;
  177 u_int   basemem;
  178 
  179 int cold = 1;
  180 
  181 #ifdef COMPAT_43
  182 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  183 #endif
  184 #ifdef COMPAT_FREEBSD4
  185 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  186 #endif
  187 
  188 long Maxmem = 0;
  189 long realmem = 0;
  190 
  191 #ifdef PAE
  192 FEATURE(pae, "Physical Address Extensions");
  193 #endif
  194 
  195 /*
  196  * The number of PHYSMAP entries must be one less than the number of
  197  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  198  * physical address that is accessible by ISA DMA is split into two
  199  * PHYSSEG entries.
  200  */
  201 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  202 
  203 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  204 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  205 
  206 /* must be 2 less so 0 0 can signal end of chunks */
  207 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
  208 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
  209 
  210 struct kva_md_info kmi;
  211 
  212 static struct trapframe proc0_tf;
  213 struct pcpu __pcpu[MAXCPU];
  214 
  215 struct mtx icu_lock;
  216 
  217 struct mem_range_softc mem_range_softc;
  218 
  219 extern char start_exceptions[], end_exceptions[];
  220 
  221 extern struct sysentvec elf32_freebsd_sysvec;
  222 
  223 /* Default init_ops implementation. */
  224 struct init_ops init_ops = {
  225         .early_clock_source_init =      i8254_init,
  226         .early_delay =                  i8254_delay,
  227 #ifdef DEV_APIC
  228         .msi_init =                     msi_init,
  229 #endif
  230 };
  231 
  232 static void
  233 cpu_startup(dummy)
  234         void *dummy;
  235 {
  236         uintmax_t memsize;
  237         char *sysenv;
  238 
  239         /*
  240          * On MacBooks, we need to disallow the legacy USB circuit to
  241          * generate an SMI# because this can cause several problems,
  242          * namely: incorrect CPU frequency detection and failure to
  243          * start the APs.
  244          * We do this by disabling a bit in the SMI_EN (SMI Control and
  245          * Enable register) of the Intel ICH LPC Interface Bridge.
  246          */
  247         sysenv = kern_getenv("smbios.system.product");
  248         if (sysenv != NULL) {
  249                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  250                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  251                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  252                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  253                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  254                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  255                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  256                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  257                         if (bootverbose)
  258                                 printf("Disabling LEGACY_USB_EN bit on "
  259                                     "Intel ICH.\n");
  260                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  261                 }
  262                 freeenv(sysenv);
  263         }
  264 
  265         /*
  266          * Good {morning,afternoon,evening,night}.
  267          */
  268         startrtclock();
  269         printcpuinfo();
  270         panicifcpuunsupported();
  271 #ifdef PERFMON
  272         perfmon_init();
  273 #endif
  274 
  275         /*
  276          * Display physical memory if SMBIOS reports reasonable amount.
  277          */
  278         memsize = 0;
  279         sysenv = kern_getenv("smbios.memory.enabled");
  280         if (sysenv != NULL) {
  281                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  282                 freeenv(sysenv);
  283         }
  284         if (memsize < ptoa((uintmax_t)vm_free_count()))
  285                 memsize = ptoa((uintmax_t)Maxmem);
  286         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  287         realmem = atop(memsize);
  288 
  289         /*
  290          * Display any holes after the first chunk of extended memory.
  291          */
  292         if (bootverbose) {
  293                 int indx;
  294 
  295                 printf("Physical memory chunk(s):\n");
  296                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  297                         vm_paddr_t size;
  298 
  299                         size = phys_avail[indx + 1] - phys_avail[indx];
  300                         printf(
  301                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  302                             (uintmax_t)phys_avail[indx],
  303                             (uintmax_t)phys_avail[indx + 1] - 1,
  304                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  305                 }
  306         }
  307 
  308         vm_ksubmap_init(&kmi);
  309 
  310         printf("avail memory = %ju (%ju MB)\n",
  311             ptoa((uintmax_t)vm_free_count()),
  312             ptoa((uintmax_t)vm_free_count()) / 1048576);
  313 
  314         /*
  315          * Set up buffers, so they can be used to read disk labels.
  316          */
  317         bufinit();
  318         vm_pager_bufferinit();
  319         cpu_setregs();
  320 }
  321 
  322 /*
  323  * Send an interrupt to process.
  324  *
  325  * Stack is set up to allow sigcode stored
  326  * at top to call routine, followed by call
  327  * to sigreturn routine below.  After sigreturn
  328  * resets the signal mask, the stack, and the
  329  * frame pointer, it returns to the user
  330  * specified pc, psl.
  331  */
  332 #ifdef COMPAT_43
  333 static void
  334 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  335 {
  336         struct osigframe sf, *fp;
  337         struct proc *p;
  338         struct thread *td;
  339         struct sigacts *psp;
  340         struct trapframe *regs;
  341         int sig;
  342         int oonstack;
  343 
  344         td = curthread;
  345         p = td->td_proc;
  346         PROC_LOCK_ASSERT(p, MA_OWNED);
  347         sig = ksi->ksi_signo;
  348         psp = p->p_sigacts;
  349         mtx_assert(&psp->ps_mtx, MA_OWNED);
  350         regs = td->td_frame;
  351         oonstack = sigonstack(regs->tf_esp);
  352 
  353         /* Allocate space for the signal handler context. */
  354         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  355             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  356                 fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
  357                     td->td_sigstk.ss_size - sizeof(struct osigframe));
  358 #if defined(COMPAT_43)
  359                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  360 #endif
  361         } else
  362                 fp = (struct osigframe *)regs->tf_esp - 1;
  363 
  364         /* Build the argument list for the signal handler. */
  365         sf.sf_signum = sig;
  366         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  367         bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
  368         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  369                 /* Signal handler installed with SA_SIGINFO. */
  370                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  371                 sf.sf_siginfo.si_signo = sig;
  372                 sf.sf_siginfo.si_code = ksi->ksi_code;
  373                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  374                 sf.sf_addr = 0;
  375         } else {
  376                 /* Old FreeBSD-style arguments. */
  377                 sf.sf_arg2 = ksi->ksi_code;
  378                 sf.sf_addr = (register_t)ksi->ksi_addr;
  379                 sf.sf_ahu.sf_handler = catcher;
  380         }
  381         mtx_unlock(&psp->ps_mtx);
  382         PROC_UNLOCK(p);
  383 
  384         /* Save most if not all of trap frame. */
  385         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  386         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  387         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  388         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  389         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  390         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  391         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  392         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  393         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  394         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  395         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  396         sf.sf_siginfo.si_sc.sc_gs = rgs();
  397         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  398 
  399         /* Build the signal context to be used by osigreturn(). */
  400         sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
  401         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  402         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  403         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  404         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  405         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  406         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  407         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  408 
  409         /*
  410          * If we're a vm86 process, we want to save the segment registers.
  411          * We also change eflags to be our emulated eflags, not the actual
  412          * eflags.
  413          */
  414         if (regs->tf_eflags & PSL_VM) {
  415                 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
  416                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  417                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  418 
  419                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  420                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  421                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  422                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  423 
  424                 if (vm86->vm86_has_vme == 0)
  425                         sf.sf_siginfo.si_sc.sc_ps =
  426                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  427                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  428 
  429                 /* See sendsig() for comments. */
  430                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  431         }
  432 
  433         /*
  434          * Copy the sigframe out to the user's stack.
  435          */
  436         if (copyout(&sf, fp, sizeof(*fp)) != 0) {
  437                 PROC_LOCK(p);
  438                 sigexit(td, SIGILL);
  439         }
  440 
  441         regs->tf_esp = (int)fp;
  442         if (p->p_sysent->sv_sigcode_base != 0) {
  443                 regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  444                     szosigcode;
  445         } else {
  446                 /* a.out sysentvec does not use shared page */
  447                 regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
  448         }
  449         regs->tf_eflags &= ~(PSL_T | PSL_D);
  450         regs->tf_cs = _ucodesel;
  451         regs->tf_ds = _udatasel;
  452         regs->tf_es = _udatasel;
  453         regs->tf_fs = _udatasel;
  454         load_gs(_udatasel);
  455         regs->tf_ss = _udatasel;
  456         PROC_LOCK(p);
  457         mtx_lock(&psp->ps_mtx);
  458 }
  459 #endif /* COMPAT_43 */
  460 
  461 #ifdef COMPAT_FREEBSD4
  462 static void
  463 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  464 {
  465         struct sigframe4 sf, *sfp;
  466         struct proc *p;
  467         struct thread *td;
  468         struct sigacts *psp;
  469         struct trapframe *regs;
  470         int sig;
  471         int oonstack;
  472 
  473         td = curthread;
  474         p = td->td_proc;
  475         PROC_LOCK_ASSERT(p, MA_OWNED);
  476         sig = ksi->ksi_signo;
  477         psp = p->p_sigacts;
  478         mtx_assert(&psp->ps_mtx, MA_OWNED);
  479         regs = td->td_frame;
  480         oonstack = sigonstack(regs->tf_esp);
  481 
  482         /* Save user context. */
  483         bzero(&sf, sizeof(sf));
  484         sf.sf_uc.uc_sigmask = *mask;
  485         sf.sf_uc.uc_stack = td->td_sigstk;
  486         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  487             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  488         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  489         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  490         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  491         bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
  492             sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
  493         bzero(sf.sf_uc.uc_mcontext.__spare__,
  494             sizeof(sf.sf_uc.uc_mcontext.__spare__));
  495         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  496 
  497         /* Allocate space for the signal handler context. */
  498         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  499             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  500                 sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp +
  501                     td->td_sigstk.ss_size - sizeof(struct sigframe4));
  502 #if defined(COMPAT_43)
  503                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  504 #endif
  505         } else
  506                 sfp = (struct sigframe4 *)regs->tf_esp - 1;
  507 
  508         /* Build the argument list for the signal handler. */
  509         sf.sf_signum = sig;
  510         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  511         bzero(&sf.sf_si, sizeof(sf.sf_si));
  512         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  513                 /* Signal handler installed with SA_SIGINFO. */
  514                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  515                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  516 
  517                 /* Fill in POSIX parts */
  518                 sf.sf_si.si_signo = sig;
  519                 sf.sf_si.si_code = ksi->ksi_code;
  520                 sf.sf_si.si_addr = ksi->ksi_addr;
  521         } else {
  522                 /* Old FreeBSD-style arguments. */
  523                 sf.sf_siginfo = ksi->ksi_code;
  524                 sf.sf_addr = (register_t)ksi->ksi_addr;
  525                 sf.sf_ahu.sf_handler = catcher;
  526         }
  527         mtx_unlock(&psp->ps_mtx);
  528         PROC_UNLOCK(p);
  529 
  530         /*
  531          * If we're a vm86 process, we want to save the segment registers.
  532          * We also change eflags to be our emulated eflags, not the actual
  533          * eflags.
  534          */
  535         if (regs->tf_eflags & PSL_VM) {
  536                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  537                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  538 
  539                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  540                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  541                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  542                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  543 
  544                 if (vm86->vm86_has_vme == 0)
  545                         sf.sf_uc.uc_mcontext.mc_eflags =
  546                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  547                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  548 
  549                 /*
  550                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  551                  * syscalls made by the signal handler.  This just avoids
  552                  * wasting time for our lazy fixup of such faults.  PSL_NT
  553                  * does nothing in vm86 mode, but vm86 programs can set it
  554                  * almost legitimately in probes for old cpu types.
  555                  */
  556                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  557         }
  558 
  559         /*
  560          * Copy the sigframe out to the user's stack.
  561          */
  562         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  563                 PROC_LOCK(p);
  564                 sigexit(td, SIGILL);
  565         }
  566 
  567         regs->tf_esp = (int)sfp;
  568         regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
  569             szfreebsd4_sigcode;
  570         regs->tf_eflags &= ~(PSL_T | PSL_D);
  571         regs->tf_cs = _ucodesel;
  572         regs->tf_ds = _udatasel;
  573         regs->tf_es = _udatasel;
  574         regs->tf_fs = _udatasel;
  575         regs->tf_ss = _udatasel;
  576         PROC_LOCK(p);
  577         mtx_lock(&psp->ps_mtx);
  578 }
  579 #endif  /* COMPAT_FREEBSD4 */
  580 
  581 void
  582 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  583 {
  584         struct sigframe sf, *sfp;
  585         struct proc *p;
  586         struct thread *td;
  587         struct sigacts *psp;
  588         char *sp;
  589         struct trapframe *regs;
  590         struct segment_descriptor *sdp;
  591         char *xfpusave;
  592         size_t xfpusave_len;
  593         int sig;
  594         int oonstack;
  595 
  596         td = curthread;
  597         p = td->td_proc;
  598         PROC_LOCK_ASSERT(p, MA_OWNED);
  599         sig = ksi->ksi_signo;
  600         psp = p->p_sigacts;
  601         mtx_assert(&psp->ps_mtx, MA_OWNED);
  602 #ifdef COMPAT_FREEBSD4
  603         if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
  604                 freebsd4_sendsig(catcher, ksi, mask);
  605                 return;
  606         }
  607 #endif
  608 #ifdef COMPAT_43
  609         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  610                 osendsig(catcher, ksi, mask);
  611                 return;
  612         }
  613 #endif
  614         regs = td->td_frame;
  615         oonstack = sigonstack(regs->tf_esp);
  616 
  617         if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
  618                 xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
  619                 xfpusave = __builtin_alloca(xfpusave_len);
  620         } else {
  621                 xfpusave_len = 0;
  622                 xfpusave = NULL;
  623         }
  624 
  625         /* Save user context. */
  626         bzero(&sf, sizeof(sf));
  627         sf.sf_uc.uc_sigmask = *mask;
  628         sf.sf_uc.uc_stack = td->td_sigstk;
  629         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  630             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  631         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  632         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  633         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  634         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  635         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  636         fpstate_drop(td);
  637         /*
  638          * Unconditionally fill the fsbase and gsbase into the mcontext.
  639          */
  640         sdp = &td->td_pcb->pcb_fsd;
  641         sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
  642             sdp->sd_lobase;
  643         sdp = &td->td_pcb->pcb_gsd;
  644         sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
  645             sdp->sd_lobase;
  646         bzero(sf.sf_uc.uc_mcontext.mc_spare2,
  647             sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
  648         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  649 
  650         /* Allocate space for the signal handler context. */
  651         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  652             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  653                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  654 #if defined(COMPAT_43)
  655                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  656 #endif
  657         } else
  658                 sp = (char *)regs->tf_esp - 128;
  659         if (xfpusave != NULL) {
  660                 sp -= xfpusave_len;
  661                 sp = (char *)((unsigned int)sp & ~0x3F);
  662                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  663         }
  664         sp -= sizeof(struct sigframe);
  665 
  666         /* Align to 16 bytes. */
  667         sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
  668 
  669         /* Build the argument list for the signal handler. */
  670         sf.sf_signum = sig;
  671         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  672         bzero(&sf.sf_si, sizeof(sf.sf_si));
  673         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  674                 /* Signal handler installed with SA_SIGINFO. */
  675                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  676                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  677 
  678                 /* Fill in POSIX parts */
  679                 sf.sf_si = ksi->ksi_info;
  680                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  681         } else {
  682                 /* Old FreeBSD-style arguments. */
  683                 sf.sf_siginfo = ksi->ksi_code;
  684                 sf.sf_addr = (register_t)ksi->ksi_addr;
  685                 sf.sf_ahu.sf_handler = catcher;
  686         }
  687         mtx_unlock(&psp->ps_mtx);
  688         PROC_UNLOCK(p);
  689 
  690         /*
  691          * If we're a vm86 process, we want to save the segment registers.
  692          * We also change eflags to be our emulated eflags, not the actual
  693          * eflags.
  694          */
  695         if (regs->tf_eflags & PSL_VM) {
  696                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  697                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  698 
  699                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  700                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  701                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  702                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  703 
  704                 if (vm86->vm86_has_vme == 0)
  705                         sf.sf_uc.uc_mcontext.mc_eflags =
  706                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  707                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  708 
  709                 /*
  710                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  711                  * syscalls made by the signal handler.  This just avoids
  712                  * wasting time for our lazy fixup of such faults.  PSL_NT
  713                  * does nothing in vm86 mode, but vm86 programs can set it
  714                  * almost legitimately in probes for old cpu types.
  715                  */
  716                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  717         }
  718 
  719         /*
  720          * Copy the sigframe out to the user's stack.
  721          */
  722         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  723             (xfpusave != NULL && copyout(xfpusave,
  724             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  725             != 0)) {
  726                 PROC_LOCK(p);
  727                 sigexit(td, SIGILL);
  728         }
  729 
  730         regs->tf_esp = (int)sfp;
  731         regs->tf_eip = p->p_sysent->sv_sigcode_base;
  732         if (regs->tf_eip == 0)
  733                 regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
  734         regs->tf_eflags &= ~(PSL_T | PSL_D);
  735         regs->tf_cs = _ucodesel;
  736         regs->tf_ds = _udatasel;
  737         regs->tf_es = _udatasel;
  738         regs->tf_fs = _udatasel;
  739         regs->tf_ss = _udatasel;
  740         PROC_LOCK(p);
  741         mtx_lock(&psp->ps_mtx);
  742 }
  743 
  744 /*
  745  * System call to cleanup state after a signal
  746  * has been taken.  Reset signal mask and
  747  * stack state from context left by sendsig (above).
  748  * Return to previous pc and psl as specified by
  749  * context left by sendsig. Check carefully to
  750  * make sure that the user has not modified the
  751  * state to gain improper privileges.
  752  *
  753  * MPSAFE
  754  */
  755 #ifdef COMPAT_43
  756 int
  757 osigreturn(td, uap)
  758         struct thread *td;
  759         struct osigreturn_args /* {
  760                 struct osigcontext *sigcntxp;
  761         } */ *uap;
  762 {
  763         struct osigcontext sc;
  764         struct trapframe *regs;
  765         struct osigcontext *scp;
  766         int eflags, error;
  767         ksiginfo_t ksi;
  768 
  769         regs = td->td_frame;
  770         error = copyin(uap->sigcntxp, &sc, sizeof(sc));
  771         if (error != 0)
  772                 return (error);
  773         scp = &sc;
  774         eflags = scp->sc_ps;
  775         if (eflags & PSL_VM) {
  776                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  777                 struct vm86_kernel *vm86;
  778 
  779                 /*
  780                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  781                  * set up the vm86 area, and we can't enter vm86 mode.
  782                  */
  783                 if (td->td_pcb->pcb_ext == 0)
  784                         return (EINVAL);
  785                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  786                 if (vm86->vm86_inited == 0)
  787                         return (EINVAL);
  788 
  789                 /* Go back to user mode if both flags are set. */
  790                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  791                         ksiginfo_init_trap(&ksi);
  792                         ksi.ksi_signo = SIGBUS;
  793                         ksi.ksi_code = BUS_OBJERR;
  794                         ksi.ksi_addr = (void *)regs->tf_eip;
  795                         trapsignal(td, &ksi);
  796                 }
  797 
  798                 if (vm86->vm86_has_vme) {
  799                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  800                             (eflags & VME_USERCHANGE) | PSL_VM;
  801                 } else {
  802                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  803                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  804                             (eflags & VM_USERCHANGE) | PSL_VM;
  805                 }
  806                 tf->tf_vm86_ds = scp->sc_ds;
  807                 tf->tf_vm86_es = scp->sc_es;
  808                 tf->tf_vm86_fs = scp->sc_fs;
  809                 tf->tf_vm86_gs = scp->sc_gs;
  810                 tf->tf_ds = _udatasel;
  811                 tf->tf_es = _udatasel;
  812                 tf->tf_fs = _udatasel;
  813         } else {
  814                 /*
  815                  * Don't allow users to change privileged or reserved flags.
  816                  */
  817                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  818                         return (EINVAL);
  819                 }
  820 
  821                 /*
  822                  * Don't allow users to load a valid privileged %cs.  Let the
  823                  * hardware check for invalid selectors, excess privilege in
  824                  * other selectors, invalid %eip's and invalid %esp's.
  825                  */
  826                 if (!CS_SECURE(scp->sc_cs)) {
  827                         ksiginfo_init_trap(&ksi);
  828                         ksi.ksi_signo = SIGBUS;
  829                         ksi.ksi_code = BUS_OBJERR;
  830                         ksi.ksi_trapno = T_PROTFLT;
  831                         ksi.ksi_addr = (void *)regs->tf_eip;
  832                         trapsignal(td, &ksi);
  833                         return (EINVAL);
  834                 }
  835                 regs->tf_ds = scp->sc_ds;
  836                 regs->tf_es = scp->sc_es;
  837                 regs->tf_fs = scp->sc_fs;
  838         }
  839 
  840         /* Restore remaining registers. */
  841         regs->tf_eax = scp->sc_eax;
  842         regs->tf_ebx = scp->sc_ebx;
  843         regs->tf_ecx = scp->sc_ecx;
  844         regs->tf_edx = scp->sc_edx;
  845         regs->tf_esi = scp->sc_esi;
  846         regs->tf_edi = scp->sc_edi;
  847         regs->tf_cs = scp->sc_cs;
  848         regs->tf_ss = scp->sc_ss;
  849         regs->tf_isp = scp->sc_isp;
  850         regs->tf_ebp = scp->sc_fp;
  851         regs->tf_esp = scp->sc_sp;
  852         regs->tf_eip = scp->sc_pc;
  853         regs->tf_eflags = eflags;
  854 
  855 #if defined(COMPAT_43)
  856         if (scp->sc_onstack & 1)
  857                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  858         else
  859                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  860 #endif
  861         kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
  862             SIGPROCMASK_OLD);
  863         return (EJUSTRETURN);
  864 }
  865 #endif /* COMPAT_43 */
  866 
  867 #ifdef COMPAT_FREEBSD4
  868 /*
  869  * MPSAFE
  870  */
  871 int
  872 freebsd4_sigreturn(td, uap)
  873         struct thread *td;
  874         struct freebsd4_sigreturn_args /* {
  875                 const ucontext4 *sigcntxp;
  876         } */ *uap;
  877 {
  878         struct ucontext4 uc;
  879         struct trapframe *regs;
  880         struct ucontext4 *ucp;
  881         int cs, eflags, error;
  882         ksiginfo_t ksi;
  883 
  884         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  885         if (error != 0)
  886                 return (error);
  887         ucp = &uc;
  888         regs = td->td_frame;
  889         eflags = ucp->uc_mcontext.mc_eflags;
  890         if (eflags & PSL_VM) {
  891                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  892                 struct vm86_kernel *vm86;
  893 
  894                 /*
  895                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  896                  * set up the vm86 area, and we can't enter vm86 mode.
  897                  */
  898                 if (td->td_pcb->pcb_ext == 0)
  899                         return (EINVAL);
  900                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  901                 if (vm86->vm86_inited == 0)
  902                         return (EINVAL);
  903 
  904                 /* Go back to user mode if both flags are set. */
  905                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  906                         ksiginfo_init_trap(&ksi);
  907                         ksi.ksi_signo = SIGBUS;
  908                         ksi.ksi_code = BUS_OBJERR;
  909                         ksi.ksi_addr = (void *)regs->tf_eip;
  910                         trapsignal(td, &ksi);
  911                 }
  912                 if (vm86->vm86_has_vme) {
  913                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  914                             (eflags & VME_USERCHANGE) | PSL_VM;
  915                 } else {
  916                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  917                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  918                             (eflags & VM_USERCHANGE) | PSL_VM;
  919                 }
  920                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  921                 tf->tf_eflags = eflags;
  922                 tf->tf_vm86_ds = tf->tf_ds;
  923                 tf->tf_vm86_es = tf->tf_es;
  924                 tf->tf_vm86_fs = tf->tf_fs;
  925                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  926                 tf->tf_ds = _udatasel;
  927                 tf->tf_es = _udatasel;
  928                 tf->tf_fs = _udatasel;
  929         } else {
  930                 /*
  931                  * Don't allow users to change privileged or reserved flags.
  932                  */
  933                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
  934                         uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
  935                             td->td_proc->p_pid, td->td_name, eflags);
  936                         return (EINVAL);
  937                 }
  938 
  939                 /*
  940                  * Don't allow users to load a valid privileged %cs.  Let the
  941                  * hardware check for invalid selectors, excess privilege in
  942                  * other selectors, invalid %eip's and invalid %esp's.
  943                  */
  944                 cs = ucp->uc_mcontext.mc_cs;
  945                 if (!CS_SECURE(cs)) {
  946                         uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
  947                             td->td_proc->p_pid, td->td_name, cs);
  948                         ksiginfo_init_trap(&ksi);
  949                         ksi.ksi_signo = SIGBUS;
  950                         ksi.ksi_code = BUS_OBJERR;
  951                         ksi.ksi_trapno = T_PROTFLT;
  952                         ksi.ksi_addr = (void *)regs->tf_eip;
  953                         trapsignal(td, &ksi);
  954                         return (EINVAL);
  955                 }
  956 
  957                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  958         }
  959 
  960 #if defined(COMPAT_43)
  961         if (ucp->uc_mcontext.mc_onstack & 1)
  962                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  963         else
  964                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  965 #endif
  966         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  967         return (EJUSTRETURN);
  968 }
  969 #endif  /* COMPAT_FREEBSD4 */
  970 
  971 /*
  972  * MPSAFE
  973  */
  974 int
  975 sys_sigreturn(td, uap)
  976         struct thread *td;
  977         struct sigreturn_args /* {
  978                 const struct __ucontext *sigcntxp;
  979         } */ *uap;
  980 {
  981         ucontext_t uc;
  982         struct proc *p;
  983         struct trapframe *regs;
  984         ucontext_t *ucp;
  985         char *xfpustate;
  986         size_t xfpustate_len;
  987         int cs, eflags, error, ret;
  988         ksiginfo_t ksi;
  989 
  990         p = td->td_proc;
  991 
  992         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  993         if (error != 0)
  994                 return (error);
  995         ucp = &uc;
  996         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
  997                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
  998                     td->td_name, ucp->uc_mcontext.mc_flags);
  999                 return (EINVAL);
 1000         }
 1001         regs = td->td_frame;
 1002         eflags = ucp->uc_mcontext.mc_eflags;
 1003         if (eflags & PSL_VM) {
 1004                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 1005                 struct vm86_kernel *vm86;
 1006 
 1007                 /*
 1008                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 1009                  * set up the vm86 area, and we can't enter vm86 mode.
 1010                  */
 1011                 if (td->td_pcb->pcb_ext == 0)
 1012                         return (EINVAL);
 1013                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 1014                 if (vm86->vm86_inited == 0)
 1015                         return (EINVAL);
 1016 
 1017                 /* Go back to user mode if both flags are set. */
 1018                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 1019                         ksiginfo_init_trap(&ksi);
 1020                         ksi.ksi_signo = SIGBUS;
 1021                         ksi.ksi_code = BUS_OBJERR;
 1022                         ksi.ksi_addr = (void *)regs->tf_eip;
 1023                         trapsignal(td, &ksi);
 1024                 }
 1025 
 1026                 if (vm86->vm86_has_vme) {
 1027                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 1028                             (eflags & VME_USERCHANGE) | PSL_VM;
 1029                 } else {
 1030                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
 1031                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 1032                             (eflags & VM_USERCHANGE) | PSL_VM;
 1033                 }
 1034                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 1035                 tf->tf_eflags = eflags;
 1036                 tf->tf_vm86_ds = tf->tf_ds;
 1037                 tf->tf_vm86_es = tf->tf_es;
 1038                 tf->tf_vm86_fs = tf->tf_fs;
 1039                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 1040                 tf->tf_ds = _udatasel;
 1041                 tf->tf_es = _udatasel;
 1042                 tf->tf_fs = _udatasel;
 1043         } else {
 1044                 /*
 1045                  * Don't allow users to change privileged or reserved flags.
 1046                  */
 1047                 if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 1048                         uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 1049                             td->td_proc->p_pid, td->td_name, eflags);
 1050                         return (EINVAL);
 1051                 }
 1052 
 1053                 /*
 1054                  * Don't allow users to load a valid privileged %cs.  Let the
 1055                  * hardware check for invalid selectors, excess privilege in
 1056                  * other selectors, invalid %eip's and invalid %esp's.
 1057                  */
 1058                 cs = ucp->uc_mcontext.mc_cs;
 1059                 if (!CS_SECURE(cs)) {
 1060                         uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 1061                             td->td_proc->p_pid, td->td_name, cs);
 1062                         ksiginfo_init_trap(&ksi);
 1063                         ksi.ksi_signo = SIGBUS;
 1064                         ksi.ksi_code = BUS_OBJERR;
 1065                         ksi.ksi_trapno = T_PROTFLT;
 1066                         ksi.ksi_addr = (void *)regs->tf_eip;
 1067                         trapsignal(td, &ksi);
 1068                         return (EINVAL);
 1069                 }
 1070 
 1071                 if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 1072                         xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 1073                         if (xfpustate_len > cpu_max_ext_state_size -
 1074                             sizeof(union savefpu)) {
 1075                                 uprintf(
 1076                             "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 1077                                     p->p_pid, td->td_name, xfpustate_len);
 1078                                 return (EINVAL);
 1079                         }
 1080                         xfpustate = __builtin_alloca(xfpustate_len);
 1081                         error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 1082                             xfpustate, xfpustate_len);
 1083                         if (error != 0) {
 1084                                 uprintf(
 1085         "pid %d (%s): sigreturn copying xfpustate failed\n",
 1086                                     p->p_pid, td->td_name);
 1087                                 return (error);
 1088                         }
 1089                 } else {
 1090                         xfpustate = NULL;
 1091                         xfpustate_len = 0;
 1092                 }
 1093                 ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 1094                     xfpustate_len);
 1095                 if (ret != 0)
 1096                         return (ret);
 1097                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 1098         }
 1099 
 1100 #if defined(COMPAT_43)
 1101         if (ucp->uc_mcontext.mc_onstack & 1)
 1102                 td->td_sigstk.ss_flags |= SS_ONSTACK;
 1103         else
 1104                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 1105 #endif
 1106 
 1107         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 1108         return (EJUSTRETURN);
 1109 }
 1110 
 1111 #ifdef COMPAT_43
 1112 static void
 1113 setup_priv_lcall_gate(struct proc *p)
 1114 {
 1115         struct i386_ldt_args uap;
 1116         union descriptor desc;
 1117         u_int lcall_addr;
 1118 
 1119         bzero(&uap, sizeof(uap));
 1120         uap.start = 0;
 1121         uap.num = 1;
 1122         lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
 1123         bzero(&desc, sizeof(desc));
 1124         desc.sd.sd_type = SDT_MEMERA;
 1125         desc.sd.sd_dpl = SEL_UPL;
 1126         desc.sd.sd_p = 1;
 1127         desc.sd.sd_def32 = 1;
 1128         desc.sd.sd_gran = 1;
 1129         desc.sd.sd_lolimit = 0xffff;
 1130         desc.sd.sd_hilimit = 0xf;
 1131         desc.sd.sd_lobase = lcall_addr;
 1132         desc.sd.sd_hibase = lcall_addr >> 24;
 1133         i386_set_ldt(curthread, &uap, &desc);
 1134 }
 1135 #endif
 1136 
 1137 /*
 1138  * Reset registers to default values on exec.
 1139  */
 1140 void
 1141 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 1142 {
 1143         struct trapframe *regs;
 1144         struct pcb *pcb;
 1145         register_t saved_eflags;
 1146 
 1147         regs = td->td_frame;
 1148         pcb = td->td_pcb;
 1149 
 1150         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1151         pcb->pcb_gs = _udatasel;
 1152         load_gs(_udatasel);
 1153 
 1154         mtx_lock_spin(&dt_lock);
 1155         if (td->td_proc->p_md.md_ldt != NULL)
 1156                 user_ldt_free(td);
 1157         else
 1158                 mtx_unlock_spin(&dt_lock);
 1159 
 1160 #ifdef COMPAT_43
 1161         if (td->td_proc->p_sysent->sv_psstrings !=
 1162             elf32_freebsd_sysvec.sv_psstrings)
 1163                 setup_priv_lcall_gate(td->td_proc);
 1164 #endif
 1165   
 1166         /*
 1167          * Reset the fs and gs bases.  The values from the old address
 1168          * space do not make sense for the new program.  In particular,
 1169          * gsbase might be the TLS base for the old program but the new
 1170          * program has no TLS now.
 1171          */
 1172         set_fsbase(td, 0);
 1173         set_gsbase(td, 0);
 1174 
 1175         /* Make sure edx is 0x0 on entry. Linux binaries depend on it. */
 1176         saved_eflags = regs->tf_eflags & PSL_T;
 1177         bzero((char *)regs, sizeof(struct trapframe));
 1178         regs->tf_eip = imgp->entry_addr;
 1179         regs->tf_esp = stack;
 1180         regs->tf_eflags = PSL_USER | saved_eflags;
 1181         regs->tf_ss = _udatasel;
 1182         regs->tf_ds = _udatasel;
 1183         regs->tf_es = _udatasel;
 1184         regs->tf_fs = _udatasel;
 1185         regs->tf_cs = _ucodesel;
 1186 
 1187         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1188         regs->tf_ebx = imgp->ps_strings;
 1189 
 1190         /*
 1191          * Reset the hardware debug registers if they were in use.
 1192          * They won't have any meaning for the newly exec'd process.  
 1193          */
 1194         if (pcb->pcb_flags & PCB_DBREGS) {
 1195                 pcb->pcb_dr0 = 0;
 1196                 pcb->pcb_dr1 = 0;
 1197                 pcb->pcb_dr2 = 0;
 1198                 pcb->pcb_dr3 = 0;
 1199                 pcb->pcb_dr6 = 0;
 1200                 pcb->pcb_dr7 = 0;
 1201                 if (pcb == curpcb) {
 1202                         /*
 1203                          * Clear the debug registers on the running
 1204                          * CPU, otherwise they will end up affecting
 1205                          * the next process we switch to.
 1206                          */
 1207                         reset_dbregs();
 1208                 }
 1209                 pcb->pcb_flags &= ~PCB_DBREGS;
 1210         }
 1211 
 1212         pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 1213 
 1214         /*
 1215          * Drop the FP state if we hold it, so that the process gets a
 1216          * clean FP state if it uses the FPU again.
 1217          */
 1218         fpstate_drop(td);
 1219 }
 1220 
 1221 void
 1222 cpu_setregs(void)
 1223 {
 1224         unsigned int cr0;
 1225 
 1226         cr0 = rcr0();
 1227 
 1228         /*
 1229          * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 1230          *
 1231          * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 1232          * instructions.  We must set the CR0_MP bit and use the CR0_TS
 1233          * bit to control the trap, because setting the CR0_EM bit does
 1234          * not cause WAIT instructions to trap.  It's important to trap
 1235          * WAIT instructions - otherwise the "wait" variants of no-wait
 1236          * control instructions would degenerate to the "no-wait" variants
 1237          * after FP context switches but work correctly otherwise.  It's
 1238          * particularly important to trap WAITs when there is no NPX -
 1239          * otherwise the "wait" variants would always degenerate.
 1240          *
 1241          * Try setting CR0_NE to get correct error reporting on 486DX's.
 1242          * Setting it should fail or do nothing on lesser processors.
 1243          */
 1244         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 1245         load_cr0(cr0);
 1246         load_gs(_udatasel);
 1247 }
 1248 
 1249 u_long bootdev;         /* not a struct cdev *- encoding is different */
 1250 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1251         CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 1252 
 1253 static char bootmethod[16] = "BIOS";
 1254 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1255     "System firmware boot method");
 1256 
 1257 /*
 1258  * Initialize 386 and configure to run kernel
 1259  */
 1260 
 1261 /*
 1262  * Initialize segments & interrupt table
 1263  */
 1264 
 1265 int _default_ldt;
 1266 
 1267 struct mtx dt_lock;                     /* lock for GDT and LDT */
 1268 
 1269 union descriptor gdt0[NGDT];    /* initial global descriptor table */
 1270 union descriptor *gdt = gdt0;   /* global descriptor table */
 1271 
 1272 union descriptor *ldt;          /* local descriptor table */
 1273 
 1274 static struct gate_descriptor idt0[NIDT];
 1275 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1276 
 1277 static struct i386tss *dblfault_tss;
 1278 static char *dblfault_stack;
 1279 
 1280 static struct i386tss common_tss0;
 1281 
 1282 vm_offset_t proc0kstack;
 1283 
 1284 /*
 1285  * software prototypes -- in more palatable form.
 1286  *
 1287  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
 1288  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
 1289  */
 1290 struct soft_segment_descriptor gdt_segs[] = {
 1291 /* GNULL_SEL    0 Null Descriptor */
 1292 {       .ssd_base = 0x0,
 1293         .ssd_limit = 0x0,
 1294         .ssd_type = 0,
 1295         .ssd_dpl = SEL_KPL,
 1296         .ssd_p = 0,
 1297         .ssd_xx = 0, .ssd_xx1 = 0,
 1298         .ssd_def32 = 0,
 1299         .ssd_gran = 0           },
 1300 /* GPRIV_SEL    1 SMP Per-Processor Private Data Descriptor */
 1301 {       .ssd_base = 0x0,
 1302         .ssd_limit = 0xfffff,
 1303         .ssd_type = SDT_MEMRWA,
 1304         .ssd_dpl = SEL_KPL,
 1305         .ssd_p = 1,
 1306         .ssd_xx = 0, .ssd_xx1 = 0,
 1307         .ssd_def32 = 1,
 1308         .ssd_gran = 1           },
 1309 /* GUFS_SEL     2 %fs Descriptor for user */
 1310 {       .ssd_base = 0x0,
 1311         .ssd_limit = 0xfffff,
 1312         .ssd_type = SDT_MEMRWA,
 1313         .ssd_dpl = SEL_UPL,
 1314         .ssd_p = 1,
 1315         .ssd_xx = 0, .ssd_xx1 = 0,
 1316         .ssd_def32 = 1,
 1317         .ssd_gran = 1           },
 1318 /* GUGS_SEL     3 %gs Descriptor for user */
 1319 {       .ssd_base = 0x0,
 1320         .ssd_limit = 0xfffff,
 1321         .ssd_type = SDT_MEMRWA,
 1322         .ssd_dpl = SEL_UPL,
 1323         .ssd_p = 1,
 1324         .ssd_xx = 0, .ssd_xx1 = 0,
 1325         .ssd_def32 = 1,
 1326         .ssd_gran = 1           },
 1327 /* GCODE_SEL    4 Code Descriptor for kernel */
 1328 {       .ssd_base = 0x0,
 1329         .ssd_limit = 0xfffff,
 1330         .ssd_type = SDT_MEMERA,
 1331         .ssd_dpl = SEL_KPL,
 1332         .ssd_p = 1,
 1333         .ssd_xx = 0, .ssd_xx1 = 0,
 1334         .ssd_def32 = 1,
 1335         .ssd_gran = 1           },
 1336 /* GDATA_SEL    5 Data Descriptor for kernel */
 1337 {       .ssd_base = 0x0,
 1338         .ssd_limit = 0xfffff,
 1339         .ssd_type = SDT_MEMRWA,
 1340         .ssd_dpl = SEL_KPL,
 1341         .ssd_p = 1,
 1342         .ssd_xx = 0, .ssd_xx1 = 0,
 1343         .ssd_def32 = 1,
 1344         .ssd_gran = 1           },
 1345 /* GUCODE_SEL   6 Code Descriptor for user */
 1346 {       .ssd_base = 0x0,
 1347         .ssd_limit = 0xfffff,
 1348         .ssd_type = SDT_MEMERA,
 1349         .ssd_dpl = SEL_UPL,
 1350         .ssd_p = 1,
 1351         .ssd_xx = 0, .ssd_xx1 = 0,
 1352         .ssd_def32 = 1,
 1353         .ssd_gran = 1           },
 1354 /* GUDATA_SEL   7 Data Descriptor for user */
 1355 {       .ssd_base = 0x0,
 1356         .ssd_limit = 0xfffff,
 1357         .ssd_type = SDT_MEMRWA,
 1358         .ssd_dpl = SEL_UPL,
 1359         .ssd_p = 1,
 1360         .ssd_xx = 0, .ssd_xx1 = 0,
 1361         .ssd_def32 = 1,
 1362         .ssd_gran = 1           },
 1363 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1364 {       .ssd_base = 0x400,
 1365         .ssd_limit = 0xfffff,
 1366         .ssd_type = SDT_MEMRWA,
 1367         .ssd_dpl = SEL_KPL,
 1368         .ssd_p = 1,
 1369         .ssd_xx = 0, .ssd_xx1 = 0,
 1370         .ssd_def32 = 1,
 1371         .ssd_gran = 1           },
 1372 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
 1373 {
 1374         .ssd_base = 0x0,
 1375         .ssd_limit = sizeof(struct i386tss)-1,
 1376         .ssd_type = SDT_SYS386TSS,
 1377         .ssd_dpl = 0,
 1378         .ssd_p = 1,
 1379         .ssd_xx = 0, .ssd_xx1 = 0,
 1380         .ssd_def32 = 0,
 1381         .ssd_gran = 0           },
 1382 /* GLDT_SEL     10 LDT Descriptor */
 1383 {       .ssd_base = 0,
 1384         .ssd_limit = sizeof(union descriptor) * NLDT - 1,
 1385         .ssd_type = SDT_SYSLDT,
 1386         .ssd_dpl = SEL_UPL,
 1387         .ssd_p = 1,
 1388         .ssd_xx = 0, .ssd_xx1 = 0,
 1389         .ssd_def32 = 0,
 1390         .ssd_gran = 0           },
 1391 /* GUSERLDT_SEL 11 User LDT Descriptor per process */
 1392 {       .ssd_base = 0,
 1393         .ssd_limit = (512 * sizeof(union descriptor)-1),
 1394         .ssd_type = SDT_SYSLDT,
 1395         .ssd_dpl = 0,
 1396         .ssd_p = 1,
 1397         .ssd_xx = 0, .ssd_xx1 = 0,
 1398         .ssd_def32 = 0,
 1399         .ssd_gran = 0           },
 1400 /* GPANIC_SEL   12 Panic Tss Descriptor */
 1401 {       .ssd_base = 0,
 1402         .ssd_limit = sizeof(struct i386tss)-1,
 1403         .ssd_type = SDT_SYS386TSS,
 1404         .ssd_dpl = 0,
 1405         .ssd_p = 1,
 1406         .ssd_xx = 0, .ssd_xx1 = 0,
 1407         .ssd_def32 = 0,
 1408         .ssd_gran = 0           },
 1409 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 1410 {       .ssd_base = 0,
 1411         .ssd_limit = 0xfffff,
 1412         .ssd_type = SDT_MEMERA,
 1413         .ssd_dpl = 0,
 1414         .ssd_p = 1,
 1415         .ssd_xx = 0, .ssd_xx1 = 0,
 1416         .ssd_def32 = 0,
 1417         .ssd_gran = 1           },
 1418 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 1419 {       .ssd_base = 0,
 1420         .ssd_limit = 0xfffff,
 1421         .ssd_type = SDT_MEMERA,
 1422         .ssd_dpl = 0,
 1423         .ssd_p = 1,
 1424         .ssd_xx = 0, .ssd_xx1 = 0,
 1425         .ssd_def32 = 0,
 1426         .ssd_gran = 1           },
 1427 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 1428 {       .ssd_base = 0,
 1429         .ssd_limit = 0xfffff,
 1430         .ssd_type = SDT_MEMRWA,
 1431         .ssd_dpl = 0,
 1432         .ssd_p = 1,
 1433         .ssd_xx = 0, .ssd_xx1 = 0,
 1434         .ssd_def32 = 1,
 1435         .ssd_gran = 1           },
 1436 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 1437 {       .ssd_base = 0,
 1438         .ssd_limit = 0xfffff,
 1439         .ssd_type = SDT_MEMRWA,
 1440         .ssd_dpl = 0,
 1441         .ssd_p = 1,
 1442         .ssd_xx = 0, .ssd_xx1 = 0,
 1443         .ssd_def32 = 0,
 1444         .ssd_gran = 1           },
 1445 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 1446 {       .ssd_base = 0,
 1447         .ssd_limit = 0xfffff,
 1448         .ssd_type = SDT_MEMRWA,
 1449         .ssd_dpl = 0,
 1450         .ssd_p = 1,
 1451         .ssd_xx = 0, .ssd_xx1 = 0,
 1452         .ssd_def32 = 0,
 1453         .ssd_gran = 1           },
 1454 /* GNDIS_SEL    18 NDIS Descriptor */
 1455 {       .ssd_base = 0x0,
 1456         .ssd_limit = 0x0,
 1457         .ssd_type = 0,
 1458         .ssd_dpl = 0,
 1459         .ssd_p = 0,
 1460         .ssd_xx = 0, .ssd_xx1 = 0,
 1461         .ssd_def32 = 0,
 1462         .ssd_gran = 0           },
 1463 };
 1464 
 1465 static struct soft_segment_descriptor ldt_segs[] = {
 1466         /* Null Descriptor - overwritten by call gate */
 1467 {       .ssd_base = 0x0,
 1468         .ssd_limit = 0x0,
 1469         .ssd_type = 0,
 1470         .ssd_dpl = 0,
 1471         .ssd_p = 0,
 1472         .ssd_xx = 0, .ssd_xx1 = 0,
 1473         .ssd_def32 = 0,
 1474         .ssd_gran = 0           },
 1475         /* Null Descriptor - overwritten by call gate */
 1476 {       .ssd_base = 0x0,
 1477         .ssd_limit = 0x0,
 1478         .ssd_type = 0,
 1479         .ssd_dpl = 0,
 1480         .ssd_p = 0,
 1481         .ssd_xx = 0, .ssd_xx1 = 0,
 1482         .ssd_def32 = 0,
 1483         .ssd_gran = 0           },
 1484         /* Null Descriptor - overwritten by call gate */
 1485 {       .ssd_base = 0x0,
 1486         .ssd_limit = 0x0,
 1487         .ssd_type = 0,
 1488         .ssd_dpl = 0,
 1489         .ssd_p = 0,
 1490         .ssd_xx = 0, .ssd_xx1 = 0,
 1491         .ssd_def32 = 0,
 1492         .ssd_gran = 0           },
 1493         /* Code Descriptor for user */
 1494 {       .ssd_base = 0x0,
 1495         .ssd_limit = 0xfffff,
 1496         .ssd_type = SDT_MEMERA,
 1497         .ssd_dpl = SEL_UPL,
 1498         .ssd_p = 1,
 1499         .ssd_xx = 0, .ssd_xx1 = 0,
 1500         .ssd_def32 = 1,
 1501         .ssd_gran = 1           },
 1502         /* Null Descriptor - overwritten by call gate */
 1503 {       .ssd_base = 0x0,
 1504         .ssd_limit = 0x0,
 1505         .ssd_type = 0,
 1506         .ssd_dpl = 0,
 1507         .ssd_p = 0,
 1508         .ssd_xx = 0, .ssd_xx1 = 0,
 1509         .ssd_def32 = 0,
 1510         .ssd_gran = 0           },
 1511         /* Data Descriptor for user */
 1512 {       .ssd_base = 0x0,
 1513         .ssd_limit = 0xfffff,
 1514         .ssd_type = SDT_MEMRWA,
 1515         .ssd_dpl = SEL_UPL,
 1516         .ssd_p = 1,
 1517         .ssd_xx = 0, .ssd_xx1 = 0,
 1518         .ssd_def32 = 1,
 1519         .ssd_gran = 1           },
 1520 };
 1521 
 1522 uintptr_t setidt_disp;
 1523 
 1524 void
 1525 setidt(int idx, inthand_t *func, int typ, int dpl, int selec)
 1526 {
 1527         uintptr_t off;
 1528 
 1529         off = func != NULL ? (uintptr_t)func + setidt_disp : 0;
 1530         setidt_nodisp(idx, off, typ, dpl, selec);
 1531 }
 1532 
 1533 void
 1534 setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec)
 1535 {
 1536         struct gate_descriptor *ip;
 1537 
 1538         ip = idt + idx;
 1539         ip->gd_looffset = off;
 1540         ip->gd_selector = selec;
 1541         ip->gd_stkcpy = 0;
 1542         ip->gd_xx = 0;
 1543         ip->gd_type = typ;
 1544         ip->gd_dpl = dpl;
 1545         ip->gd_p = 1;
 1546         ip->gd_hioffset = ((u_int)off) >> 16 ;
 1547 }
 1548 
 1549 extern inthand_t
 1550         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1551         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1552         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1553         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1554         IDTVEC(xmm),
 1555 #ifdef KDTRACE_HOOKS
 1556         IDTVEC(dtrace_ret),
 1557 #endif
 1558 #ifdef XENHVM
 1559         IDTVEC(xen_intr_upcall),
 1560 #endif
 1561         IDTVEC(int0x80_syscall);
 1562 
 1563 #ifdef DDB
 1564 /*
 1565  * Display the index and function name of any IDT entries that don't use
 1566  * the default 'rsvd' entry point.
 1567  */
 1568 DB_SHOW_COMMAND(idt, db_show_idt)
 1569 {
 1570         struct gate_descriptor *ip;
 1571         int idx;
 1572         uintptr_t func, func_trm;
 1573         bool trm;
 1574 
 1575         ip = idt;
 1576         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 1577                 if (ip->gd_type == SDT_SYSTASKGT) {
 1578                         db_printf("%3d\t<TASK>\n", idx);
 1579                 } else {
 1580                         func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 1581                         if (func >= PMAP_TRM_MIN_ADDRESS) {
 1582                                 func_trm = func;
 1583                                 func -= setidt_disp;
 1584                                 trm = true;
 1585                         } else
 1586                                 trm = false;
 1587                         if (func != (uintptr_t)&IDTVEC(rsvd)) {
 1588                                 db_printf("%3d\t", idx);
 1589                                 db_printsym(func, DB_STGY_PROC);
 1590                                 if (trm)
 1591                                         db_printf(" (trampoline %#x)",
 1592                                             func_trm);
 1593                                 db_printf("\n");
 1594                         }
 1595                 }
 1596                 ip++;
 1597         }
 1598 }
 1599 
 1600 /* Show privileged registers. */
 1601 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 1602 {
 1603         uint64_t idtr, gdtr;
 1604 
 1605         idtr = ridt();
 1606         db_printf("idtr\t0x%08x/%04x\n",
 1607             (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 1608         gdtr = rgdt();
 1609         db_printf("gdtr\t0x%08x/%04x\n",
 1610             (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 1611         db_printf("ldtr\t0x%04x\n", rldt());
 1612         db_printf("tr\t0x%04x\n", rtr());
 1613         db_printf("cr0\t0x%08x\n", rcr0());
 1614         db_printf("cr2\t0x%08x\n", rcr2());
 1615         db_printf("cr3\t0x%08x\n", rcr3());
 1616         db_printf("cr4\t0x%08x\n", rcr4());
 1617         if (rcr4() & CR4_XSAVE)
 1618                 db_printf("xcr0\t0x%016llx\n", rxcr(0));
 1619         if (amd_feature & (AMDID_NX | AMDID_LM))
 1620                 db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
 1621         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 1622                 db_printf("FEATURES_CTL\t0x%016llx\n",
 1623                     rdmsr(MSR_IA32_FEATURE_CONTROL));
 1624         if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
 1625             cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6)
 1626                 db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
 1627         if (cpu_feature & CPUID_PAT)
 1628                 db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 1629 }
 1630 
 1631 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 1632 {
 1633 
 1634         db_printf("dr0\t0x%08x\n", rdr0());
 1635         db_printf("dr1\t0x%08x\n", rdr1());
 1636         db_printf("dr2\t0x%08x\n", rdr2());
 1637         db_printf("dr3\t0x%08x\n", rdr3());
 1638         db_printf("dr6\t0x%08x\n", rdr6());
 1639         db_printf("dr7\t0x%08x\n", rdr7());     
 1640 }
 1641 
 1642 DB_SHOW_COMMAND(frame, db_show_frame)
 1643 {
 1644         struct trapframe *frame;
 1645 
 1646         frame = have_addr ? (struct trapframe *)addr : curthread->td_frame;
 1647         printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n",
 1648             frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs,
 1649             frame->tf_eip);
 1650         printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno);
 1651         printf("ds %#x es %#x fs %#x\n",
 1652             frame->tf_ds, frame->tf_es, frame->tf_fs);
 1653         printf("eax %#x ecx %#x edx %#x ebx %#x\n",
 1654             frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx);
 1655         printf("ebp %#x esi %#x edi %#x\n",
 1656             frame->tf_ebp, frame->tf_esi, frame->tf_edi);
 1657 
 1658 }
 1659 #endif
 1660 
 1661 void
 1662 sdtossd(sd, ssd)
 1663         struct segment_descriptor *sd;
 1664         struct soft_segment_descriptor *ssd;
 1665 {
 1666         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1667         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1668         ssd->ssd_type  = sd->sd_type;
 1669         ssd->ssd_dpl   = sd->sd_dpl;
 1670         ssd->ssd_p     = sd->sd_p;
 1671         ssd->ssd_def32 = sd->sd_def32;
 1672         ssd->ssd_gran  = sd->sd_gran;
 1673 }
 1674 
 1675 static int
 1676 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
 1677     int *physmap_idxp)
 1678 {
 1679         int i, insert_idx, physmap_idx;
 1680 
 1681         physmap_idx = *physmap_idxp;
 1682         
 1683         if (length == 0)
 1684                 return (1);
 1685 
 1686 #ifndef PAE
 1687         if (base > 0xffffffff) {
 1688                 printf("%uK of memory above 4GB ignored\n",
 1689                     (u_int)(length / 1024));
 1690                 return (1);
 1691         }
 1692 #endif
 1693 
 1694         /*
 1695          * Find insertion point while checking for overlap.  Start off by
 1696          * assuming the new entry will be added to the end.
 1697          */
 1698         insert_idx = physmap_idx + 2;
 1699         for (i = 0; i <= physmap_idx; i += 2) {
 1700                 if (base < physmap[i + 1]) {
 1701                         if (base + length <= physmap[i]) {
 1702                                 insert_idx = i;
 1703                                 break;
 1704                         }
 1705                         if (boothowto & RB_VERBOSE)
 1706                                 printf(
 1707                     "Overlapping memory regions, ignoring second region\n");
 1708                         return (1);
 1709                 }
 1710         }
 1711 
 1712         /* See if we can prepend to the next entry. */
 1713         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1714                 physmap[insert_idx] = base;
 1715                 return (1);
 1716         }
 1717 
 1718         /* See if we can append to the previous entry. */
 1719         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1720                 physmap[insert_idx - 1] += length;
 1721                 return (1);
 1722         }
 1723 
 1724         physmap_idx += 2;
 1725         *physmap_idxp = physmap_idx;
 1726         if (physmap_idx == PHYSMAP_SIZE) {
 1727                 printf(
 1728                 "Too many segments in the physical address map, giving up\n");
 1729                 return (0);
 1730         }
 1731 
 1732         /*
 1733          * Move the last 'N' entries down to make room for the new
 1734          * entry if needed.
 1735          */
 1736         for (i = physmap_idx; i > insert_idx; i -= 2) {
 1737                 physmap[i] = physmap[i - 2];
 1738                 physmap[i + 1] = physmap[i - 1];
 1739         }
 1740 
 1741         /* Insert the new entry. */
 1742         physmap[insert_idx] = base;
 1743         physmap[insert_idx + 1] = base + length;
 1744         return (1);
 1745 }
 1746 
 1747 static int
 1748 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 1749 {
 1750         if (boothowto & RB_VERBOSE)
 1751                 printf("SMAP type=%02x base=%016llx len=%016llx\n",
 1752                     smap->type, smap->base, smap->length);
 1753 
 1754         if (smap->type != SMAP_TYPE_MEMORY)
 1755                 return (1);
 1756 
 1757         return (add_physmap_entry(smap->base, smap->length, physmap,
 1758             physmap_idxp));
 1759 }
 1760 
 1761 static void
 1762 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
 1763     int *physmap_idxp)
 1764 {
 1765         struct bios_smap *smap, *smapend;
 1766         u_int32_t smapsize;
 1767         /*
 1768          * Memory map from INT 15:E820.
 1769          *
 1770          * subr_module.c says:
 1771          * "Consumer may safely assume that size value precedes data."
 1772          * ie: an int32_t immediately precedes SMAP.
 1773          */
 1774         smapsize = *((u_int32_t *)smapbase - 1);
 1775         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1776 
 1777         for (smap = smapbase; smap < smapend; smap++)
 1778                 if (!add_smap_entry(smap, physmap, physmap_idxp))
 1779                         break;
 1780 }
 1781 
 1782 static void
 1783 basemem_setup(void)
 1784 {
 1785         pt_entry_t *pte;
 1786         int i;
 1787 
 1788         if (basemem > 640) {
 1789                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1790                         basemem);
 1791                 basemem = 640;
 1792         }
 1793 
 1794         /*
 1795          * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 1796          * the vm86 page table so that vm86 can scribble on them using
 1797          * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 1798          * page 0, at least as initialized here?
 1799          */
 1800         pte = (pt_entry_t *)vm86paddr;
 1801         for (i = basemem / 4; i < 160; i++)
 1802                 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1803 }
 1804 
 1805 /*
 1806  * Populate the (physmap) array with base/bound pairs describing the
 1807  * available physical memory in the system, then test this memory and
 1808  * build the phys_avail array describing the actually-available memory.
 1809  *
 1810  * If we cannot accurately determine the physical memory map, then use
 1811  * value from the 0xE801 call, and failing that, the RTC.
 1812  *
 1813  * Total memory size may be set by the kernel environment variable
 1814  * hw.physmem or the compile-time define MAXMEM.
 1815  *
 1816  * XXX first should be vm_paddr_t.
 1817  */
 1818 static void
 1819 getmemsize(int first)
 1820 {
 1821         int has_smap, off, physmap_idx, pa_indx, da_indx;
 1822         u_long memtest;
 1823         vm_paddr_t physmap[PHYSMAP_SIZE];
 1824         pt_entry_t *pte;
 1825         quad_t dcons_addr, dcons_size, physmem_tunable;
 1826         int hasbrokenint12, i, res;
 1827         u_int extmem;
 1828         struct vm86frame vmf;
 1829         struct vm86context vmc;
 1830         vm_paddr_t pa;
 1831         struct bios_smap *smap, *smapbase;
 1832         caddr_t kmdp;
 1833 
 1834         has_smap = 0;
 1835         bzero(&vmf, sizeof(vmf));
 1836         bzero(physmap, sizeof(physmap));
 1837         basemem = 0;
 1838 
 1839         /*
 1840          * Tell the physical memory allocator about pages used to store
 1841          * the kernel and preloaded data.  See kmem_bootstrap_free().
 1842          */
 1843         vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
 1844 
 1845         /*
 1846          * Check if the loader supplied an SMAP memory map.  If so,
 1847          * use that and do not make any VM86 calls.
 1848          */
 1849         physmap_idx = 0;
 1850         kmdp = preload_search_by_type("elf kernel");
 1851         if (kmdp == NULL)
 1852                 kmdp = preload_search_by_type("elf32 kernel");
 1853         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 1854             MODINFO_METADATA | MODINFOMD_SMAP);
 1855         if (smapbase != NULL) {
 1856                 add_smap_entries(smapbase, physmap, &physmap_idx);
 1857                 has_smap = 1;
 1858                 goto have_smap;
 1859         }
 1860 
 1861         /*
 1862          * Some newer BIOSes have a broken INT 12H implementation
 1863          * which causes a kernel panic immediately.  In this case, we
 1864          * need use the SMAP to determine the base memory size.
 1865          */
 1866         hasbrokenint12 = 0;
 1867         TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 1868         if (hasbrokenint12 == 0) {
 1869                 /* Use INT12 to determine base memory size. */
 1870                 vm86_intcall(0x12, &vmf);
 1871                 basemem = vmf.vmf_ax;
 1872                 basemem_setup();
 1873         }
 1874 
 1875         /*
 1876          * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 1877          * the kernel page table so we can use it as a buffer.  The
 1878          * kernel will unmap this page later.
 1879          */
 1880         vmc.npages = 0;
 1881         smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1));
 1882         res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 1883         KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 1884 
 1885         vmf.vmf_ebx = 0;
 1886         do {
 1887                 vmf.vmf_eax = 0xE820;
 1888                 vmf.vmf_edx = SMAP_SIG;
 1889                 vmf.vmf_ecx = sizeof(struct bios_smap);
 1890                 i = vm86_datacall(0x15, &vmf, &vmc);
 1891                 if (i || vmf.vmf_eax != SMAP_SIG)
 1892                         break;
 1893                 has_smap = 1;
 1894                 if (!add_smap_entry(smap, physmap, &physmap_idx))
 1895                         break;
 1896         } while (vmf.vmf_ebx != 0);
 1897 
 1898 have_smap:
 1899         /*
 1900          * If we didn't fetch the "base memory" size from INT12,
 1901          * figure it out from the SMAP (or just guess).
 1902          */
 1903         if (basemem == 0) {
 1904                 for (i = 0; i <= physmap_idx; i += 2) {
 1905                         if (physmap[i] == 0x00000000) {
 1906                                 basemem = physmap[i + 1] / 1024;
 1907                                 break;
 1908                         }
 1909                 }
 1910 
 1911                 /* XXX: If we couldn't find basemem from SMAP, just guess. */
 1912                 if (basemem == 0)
 1913                         basemem = 640;
 1914                 basemem_setup();
 1915         }
 1916 
 1917         if (physmap[1] != 0)
 1918                 goto physmap_done;
 1919 
 1920         /*
 1921          * If we failed to find an SMAP, figure out the extended
 1922          * memory size.  We will then build a simple memory map with
 1923          * two segments, one for "base memory" and the second for
 1924          * "extended memory".  Note that "extended memory" starts at a
 1925          * physical address of 1MB and that both basemem and extmem
 1926          * are in units of 1KB.
 1927          *
 1928          * First, try to fetch the extended memory size via INT 15:E801.
 1929          */
 1930         vmf.vmf_ax = 0xE801;
 1931         if (vm86_intcall(0x15, &vmf) == 0) {
 1932                 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 1933         } else {
 1934                 /*
 1935                  * If INT15:E801 fails, this is our last ditch effort
 1936                  * to determine the extended memory size.  Currently
 1937                  * we prefer the RTC value over INT15:88.
 1938                  */
 1939 #if 0
 1940                 vmf.vmf_ah = 0x88;
 1941                 vm86_intcall(0x15, &vmf);
 1942                 extmem = vmf.vmf_ax;
 1943 #else
 1944                 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 1945 #endif
 1946         }
 1947 
 1948         /*
 1949          * Special hack for chipsets that still remap the 384k hole when
 1950          * there's 16MB of memory - this really confuses people that
 1951          * are trying to use bus mastering ISA controllers with the
 1952          * "16MB limit"; they only have 16MB, but the remapping puts
 1953          * them beyond the limit.
 1954          *
 1955          * If extended memory is between 15-16MB (16-17MB phys address range),
 1956          *      chop it to 15MB.
 1957          */
 1958         if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 1959                 extmem = 15 * 1024;
 1960 
 1961         physmap[0] = 0;
 1962         physmap[1] = basemem * 1024;
 1963         physmap_idx = 2;
 1964         physmap[physmap_idx] = 0x100000;
 1965         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1966 
 1967 physmap_done:
 1968         /*
 1969          * Now, physmap contains a map of physical memory.
 1970          */
 1971 
 1972 #ifdef SMP
 1973         /* make hole for AP bootstrap code */
 1974         alloc_ap_trampoline(physmap, &physmap_idx);
 1975 #endif
 1976 
 1977         /*
 1978          * Maxmem isn't the "maximum memory", it's one larger than the
 1979          * highest page of the physical address space.  It should be
 1980          * called something like "Maxphyspage".  We may adjust this 
 1981          * based on ``hw.physmem'' and the results of the memory test.
 1982          *
 1983          * This is especially confusing when it is much larger than the
 1984          * memory size and is displayed as "realmem".
 1985          */
 1986         Maxmem = atop(physmap[physmap_idx + 1]);
 1987 
 1988 #ifdef MAXMEM
 1989         Maxmem = MAXMEM / 4;
 1990 #endif
 1991 
 1992         if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 1993                 Maxmem = atop(physmem_tunable);
 1994 
 1995         /*
 1996          * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 1997          * the amount of memory in the system.
 1998          */
 1999         if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 2000                 Maxmem = atop(physmap[physmap_idx + 1]);
 2001 
 2002         /*
 2003          * By default enable the memory test on real hardware, and disable
 2004          * it if we appear to be running in a VM.  This avoids touching all
 2005          * pages unnecessarily, which doesn't matter on real hardware but is
 2006          * bad for shared VM hosts.  Use a general name so that
 2007          * one could eventually do more with the code than just disable it.
 2008          */
 2009         memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
 2010         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 2011 
 2012         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 2013             (boothowto & RB_VERBOSE))
 2014                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 2015 
 2016         /*
 2017          * If Maxmem has been increased beyond what the system has detected,
 2018          * extend the last memory segment to the new limit.
 2019          */ 
 2020         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 2021                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 2022 
 2023         /* call pmap initialization to make new kernel address space */
 2024         pmap_bootstrap(first);
 2025 
 2026         /*
 2027          * Size up each available chunk of physical memory.
 2028          */
 2029         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 2030         pa_indx = 0;
 2031         da_indx = 1;
 2032         phys_avail[pa_indx++] = physmap[0];
 2033         phys_avail[pa_indx] = physmap[0];
 2034         dump_avail[da_indx] = physmap[0];
 2035         pte = CMAP3;
 2036 
 2037         /*
 2038          * Get dcons buffer address
 2039          */
 2040         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 2041             getenv_quad("dcons.size", &dcons_size) == 0)
 2042                 dcons_addr = 0;
 2043 
 2044         /*
 2045          * physmap is in bytes, so when converting to page boundaries,
 2046          * round up the start address and round down the end address.
 2047          */
 2048         for (i = 0; i <= physmap_idx; i += 2) {
 2049                 vm_paddr_t end;
 2050 
 2051                 end = ptoa((vm_paddr_t)Maxmem);
 2052                 if (physmap[i + 1] < end)
 2053                         end = trunc_page(physmap[i + 1]);
 2054                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 2055                         int tmp, page_bad, full;
 2056                         int *ptr = (int *)CADDR3;
 2057 
 2058                         full = FALSE;
 2059                         /*
 2060                          * block out kernel memory as not available.
 2061                          */
 2062                         if (pa >= KERNLOAD && pa < first)
 2063                                 goto do_dump_avail;
 2064 
 2065                         /*
 2066                          * block out dcons buffer
 2067                          */
 2068                         if (dcons_addr > 0
 2069                             && pa >= trunc_page(dcons_addr)
 2070                             && pa < dcons_addr + dcons_size)
 2071                                 goto do_dump_avail;
 2072 
 2073                         page_bad = FALSE;
 2074                         if (memtest == 0)
 2075                                 goto skip_memtest;
 2076 
 2077                         /*
 2078                          * map page into kernel: valid, read/write,non-cacheable
 2079                          */
 2080                         *pte = pa | PG_V | PG_RW | PG_N;
 2081                         invltlb();
 2082 
 2083                         tmp = *(int *)ptr;
 2084                         /*
 2085                          * Test for alternating 1's and 0's
 2086                          */
 2087                         *(volatile int *)ptr = 0xaaaaaaaa;
 2088                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 2089                                 page_bad = TRUE;
 2090                         /*
 2091                          * Test for alternating 0's and 1's
 2092                          */
 2093                         *(volatile int *)ptr = 0x55555555;
 2094                         if (*(volatile int *)ptr != 0x55555555)
 2095                                 page_bad = TRUE;
 2096                         /*
 2097                          * Test for all 1's
 2098                          */
 2099                         *(volatile int *)ptr = 0xffffffff;
 2100                         if (*(volatile int *)ptr != 0xffffffff)
 2101                                 page_bad = TRUE;
 2102                         /*
 2103                          * Test for all 0's
 2104                          */
 2105                         *(volatile int *)ptr = 0x0;
 2106                         if (*(volatile int *)ptr != 0x0)
 2107                                 page_bad = TRUE;
 2108                         /*
 2109                          * Restore original value.
 2110                          */
 2111                         *(int *)ptr = tmp;
 2112 
 2113 skip_memtest:
 2114                         /*
 2115                          * Adjust array of valid/good pages.
 2116                          */
 2117                         if (page_bad == TRUE)
 2118                                 continue;
 2119                         /*
 2120                          * If this good page is a continuation of the
 2121                          * previous set of good pages, then just increase
 2122                          * the end pointer. Otherwise start a new chunk.
 2123                          * Note that "end" points one higher than end,
 2124                          * making the range >= start and < end.
 2125                          * If we're also doing a speculative memory
 2126                          * test and we at or past the end, bump up Maxmem
 2127                          * so that we keep going. The first bad page
 2128                          * will terminate the loop.
 2129                          */
 2130                         if (phys_avail[pa_indx] == pa) {
 2131                                 phys_avail[pa_indx] += PAGE_SIZE;
 2132                         } else {
 2133                                 pa_indx++;
 2134                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 2135                                         printf(
 2136                 "Too many holes in the physical address space, giving up\n");
 2137                                         pa_indx--;
 2138                                         full = TRUE;
 2139                                         goto do_dump_avail;
 2140                                 }
 2141                                 phys_avail[pa_indx++] = pa;     /* start */
 2142                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 2143                         }
 2144                         physmem++;
 2145 do_dump_avail:
 2146                         if (dump_avail[da_indx] == pa) {
 2147                                 dump_avail[da_indx] += PAGE_SIZE;
 2148                         } else {
 2149                                 da_indx++;
 2150                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 2151                                         da_indx--;
 2152                                         goto do_next;
 2153                                 }
 2154                                 dump_avail[da_indx++] = pa;     /* start */
 2155                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 2156                         }
 2157 do_next:
 2158                         if (full)
 2159                                 break;
 2160                 }
 2161         }
 2162         *pte = 0;
 2163         invltlb();
 2164         
 2165         /*
 2166          * XXX
 2167          * The last chunk must contain at least one page plus the message
 2168          * buffer to avoid complicating other code (message buffer address
 2169          * calculation, etc.).
 2170          */
 2171         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 2172             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 2173                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 2174                 phys_avail[pa_indx--] = 0;
 2175                 phys_avail[pa_indx--] = 0;
 2176         }
 2177 
 2178         Maxmem = atop(phys_avail[pa_indx]);
 2179 
 2180         /* Trim off space for the message buffer. */
 2181         phys_avail[pa_indx] -= round_page(msgbufsize);
 2182 
 2183         /* Map the message buffer. */
 2184         for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 2185                 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 2186                     off);
 2187 }
 2188 
 2189 static void
 2190 i386_kdb_init(void)
 2191 {
 2192 #ifdef DDB
 2193         db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
 2194 #endif
 2195         kdb_init();
 2196 #ifdef KDB
 2197         if (boothowto & RB_KDB)
 2198                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 2199 #endif
 2200 }
 2201 
 2202 static void
 2203 fixup_idt(void)
 2204 {
 2205         struct gate_descriptor *ip;
 2206         uintptr_t off;
 2207         int x;
 2208 
 2209         for (x = 0; x < NIDT; x++) {
 2210                 ip = &idt[x];
 2211                 if (ip->gd_type != SDT_SYS386IGT &&
 2212                     ip->gd_type != SDT_SYS386TGT)
 2213                         continue;
 2214                 off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16);
 2215                 KASSERT(off >= (uintptr_t)start_exceptions &&
 2216                     off < (uintptr_t)end_exceptions,
 2217                     ("IDT[%d] type %d off %#x", x, ip->gd_type, off));
 2218                 off += setidt_disp;
 2219                 MPASS(off >= PMAP_TRM_MIN_ADDRESS &&
 2220                     off < PMAP_TRM_MAX_ADDRESS);
 2221                 ip->gd_looffset = off;
 2222                 ip->gd_hioffset = off >> 16;
 2223         }
 2224 }
 2225 
 2226 static void
 2227 i386_setidt1(void)
 2228 {
 2229         int x;
 2230 
 2231         /* exceptions */
 2232         for (x = 0; x < NIDT; x++)
 2233                 setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
 2234                     GSEL(GCODE_SEL, SEL_KPL));
 2235         setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL,
 2236             GSEL(GCODE_SEL, SEL_KPL));
 2237         setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
 2238             GSEL(GCODE_SEL, SEL_KPL));
 2239         setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
 2240             GSEL(GCODE_SEL, SEL_KPL));
 2241         setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
 2242             GSEL(GCODE_SEL, SEL_KPL));
 2243         setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL,
 2244             GSEL(GCODE_SEL, SEL_KPL));
 2245         setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL,
 2246             GSEL(GCODE_SEL, SEL_KPL));
 2247         setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 2248             GSEL(GCODE_SEL, SEL_KPL));
 2249         setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL,
 2250             GSEL(GCODE_SEL, SEL_KPL));
 2251         setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL,
 2252             SEL_KPL));
 2253         setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT,
 2254             SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2255         setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL,
 2256             GSEL(GCODE_SEL, SEL_KPL));
 2257         setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL,
 2258             GSEL(GCODE_SEL, SEL_KPL));
 2259         setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL,
 2260             GSEL(GCODE_SEL, SEL_KPL));
 2261         setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 2262             GSEL(GCODE_SEL, SEL_KPL));
 2263         setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
 2264             GSEL(GCODE_SEL, SEL_KPL));
 2265         setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386IGT, SEL_KPL,
 2266             GSEL(GCODE_SEL, SEL_KPL));
 2267         setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
 2268             GSEL(GCODE_SEL, SEL_KPL));
 2269         setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL,
 2270             GSEL(GCODE_SEL, SEL_KPL));
 2271         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
 2272             GSEL(GCODE_SEL, SEL_KPL));
 2273         setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall),
 2274             SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 2275 #ifdef KDTRACE_HOOKS
 2276         setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret),
 2277             SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 2278 #endif
 2279 #ifdef XENHVM
 2280         setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall),
 2281             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2282 #endif
 2283 }
 2284 
 2285 static void
 2286 i386_setidt2(void)
 2287 {
 2288 
 2289         setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
 2290             GSEL(GCODE_SEL, SEL_KPL));
 2291         setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
 2292             GSEL(GCODE_SEL, SEL_KPL));
 2293 }
 2294 
 2295 #if defined(DEV_ISA) && !defined(DEV_ATPIC)
 2296 static void
 2297 i386_setidt3(void)
 2298 {
 2299 
 2300         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint),
 2301             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2302         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint),
 2303             SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 2304 }
 2305 #endif
 2306 
 2307 register_t
 2308 init386(int first)
 2309 {
 2310         struct region_descriptor r_gdt, r_idt;  /* table descriptors */
 2311         int gsel_tss, metadata_missing, x, pa;
 2312         struct pcpu *pc;
 2313         struct xstate_hdr *xhdr;
 2314         caddr_t kmdp;
 2315         vm_offset_t addend;
 2316         size_t ucode_len;
 2317         int late_console;
 2318 
 2319         thread0.td_kstack = proc0kstack;
 2320         thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 2321 
 2322         /*
 2323          * This may be done better later if it gets more high level
 2324          * components in it. If so just link td->td_proc here.
 2325          */
 2326         proc_linkup0(&proc0, &thread0);
 2327 
 2328         if (bootinfo.bi_modulep) {
 2329                 metadata_missing = 0;
 2330                 addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ?
 2331                     PMAP_MAP_LOW : 0;
 2332                 preload_metadata = (caddr_t)bootinfo.bi_modulep + addend;
 2333                 preload_bootstrap_relocate(addend);
 2334         } else {
 2335                 metadata_missing = 1;
 2336         }
 2337 
 2338         if (bootinfo.bi_envp != 0) {
 2339                 addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ?
 2340                     PMAP_MAP_LOW : 0;
 2341                 init_static_kenv((char *)bootinfo.bi_envp + addend, 0);
 2342         } else {
 2343                 init_static_kenv(NULL, 0);
 2344         }
 2345 
 2346         /*
 2347          * Re-evaluate CPU features if we loaded a microcode update.
 2348          */
 2349         ucode_len = ucode_load_bsp(first);
 2350         if (ucode_len != 0) {
 2351                 identify_cpu();
 2352                 first = roundup2(first + ucode_len, PAGE_SIZE);
 2353         }
 2354 
 2355         identify_hypervisor();
 2356 
 2357         /* Init basic tunables, hz etc */
 2358         init_param1();
 2359 
 2360         /*
 2361          * Make gdt memory segments.  All segments cover the full 4GB
 2362          * of address space and permissions are enforced at page level.
 2363          */
 2364         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 2365         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 2366         gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 2367         gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 2368         gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 2369         gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 2370 
 2371         pc = &__pcpu[0];
 2372         gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 2373         gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
 2374         gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0;
 2375 
 2376         for (x = 0; x < NGDT; x++)
 2377                 ssdtosd(&gdt_segs[x], &gdt0[x].sd);
 2378 
 2379         r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1;
 2380         r_gdt.rd_base =  (int)gdt0;
 2381         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 2382         lgdt(&r_gdt);
 2383 
 2384         pcpu_init(pc, 0, sizeof(struct pcpu));
 2385         for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 2386                 pmap_kenter(pa, pa);
 2387         dpcpu_init((void *)first, 0);
 2388         first += DPCPU_SIZE;
 2389         PCPU_SET(prvspace, pc);
 2390         PCPU_SET(curthread, &thread0);
 2391         /* Non-late cninit() and printf() can be moved up to here. */
 2392 
 2393         /*
 2394          * Initialize mutexes.
 2395          *
 2396          * icu_lock: in order to allow an interrupt to occur in a critical
 2397          *           section, to set pcpu->ipending (etc...) properly, we
 2398          *           must be able to get the icu lock, so it can't be
 2399          *           under witness.
 2400          */
 2401         mutex_init();
 2402         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 2403 
 2404         i386_setidt1();
 2405 
 2406         r_idt.rd_limit = sizeof(idt0) - 1;
 2407         r_idt.rd_base = (int) idt;
 2408         lidt(&r_idt);
 2409 
 2410         /*
 2411          * Initialize the clock before the console so that console
 2412          * initialization can use DELAY().
 2413          */
 2414         clock_init();
 2415 
 2416         finishidentcpu();       /* Final stage of CPU initialization */
 2417         i386_setidt2();
 2418         initializecpu();        /* Initialize CPU registers */
 2419         initializecpucache();
 2420 
 2421         /* pointer to selector slot for %fs/%gs */
 2422         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2423 
 2424         /* Initialize the tss (except for the final esp0) early for vm86. */
 2425         common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages *
 2426             PAGE_SIZE - VM86_STACK_SPACE;
 2427         common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 2428         common_tss0.tss_ioopt = sizeof(struct i386tss) << 16;
 2429         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 2430         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2431         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2432         ltr(gsel_tss);
 2433 
 2434         /* Initialize the PIC early for vm86 calls. */
 2435 #ifdef DEV_ISA
 2436 #ifdef DEV_ATPIC
 2437         elcr_probe();
 2438         atpic_startup();
 2439 #else
 2440         /* Reset and mask the atpics and leave them shut down. */
 2441         atpic_reset();
 2442 
 2443         /*
 2444          * Point the ICU spurious interrupt vectors at the APIC spurious
 2445          * interrupt handler.
 2446          */
 2447         i386_setidt3();
 2448 #endif
 2449 #endif
 2450 
 2451         /*
 2452          * The console and kdb should be initialized even earlier than here,
 2453          * but some console drivers don't work until after getmemsize().
 2454          * Default to late console initialization to support these drivers.
 2455          * This loses mainly printf()s in getmemsize() and early debugging.
 2456          */
 2457         late_console = 1;
 2458         TUNABLE_INT_FETCH("debug.late_console", &late_console);
 2459         if (!late_console) {
 2460                 cninit();
 2461                 i386_kdb_init();
 2462         }
 2463 
 2464         kmdp = preload_search_by_type("elf kernel");
 2465         link_elf_ireloc(kmdp);
 2466 
 2467         vm86_initialize();
 2468         getmemsize(first);
 2469         init_param2(physmem);
 2470 
 2471         /* now running on new page tables, configured,and u/iom is accessible */
 2472 
 2473         if (late_console)
 2474                 cninit();
 2475 
 2476         if (metadata_missing)
 2477                 printf("WARNING: loader(8) metadata is missing!\n");
 2478 
 2479         if (late_console)
 2480                 i386_kdb_init();
 2481 
 2482         msgbufinit(msgbufp, msgbufsize);
 2483         npxinit(true);
 2484         /*
 2485          * Set up thread0 pcb after npxinit calculated pcb + fpu save
 2486          * area size.  Zero out the extended state header in fpu save
 2487          * area.
 2488          */
 2489         thread0.td_pcb = get_pcb_td(&thread0);
 2490         thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 2491         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 2492         if (use_xsave) {
 2493                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 2494                     1);
 2495                 xhdr->xstate_bv = xsave_mask;
 2496         }
 2497         PCPU_SET(curpcb, thread0.td_pcb);
 2498         /* Move esp0 in the tss to its final place. */
 2499         /* Note: -16 is so we can grow the trapframe if we came from vm86 */
 2500         common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE;
 2501         PCPU_SET(kesp0, common_tss0.tss_esp0);
 2502         gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;     /* clear busy bit */
 2503         ltr(gsel_tss);
 2504 
 2505         /* transfer to user mode */
 2506 
 2507         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 2508         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 2509 
 2510         /* setup proc 0's pcb */
 2511         thread0.td_pcb->pcb_flags = 0;
 2512 #if defined(PAE) || defined(PAE_TABLES)
 2513         thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 2514 #else
 2515         thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 2516 #endif
 2517         thread0.td_pcb->pcb_ext = 0;
 2518         thread0.td_frame = &proc0_tf;
 2519 
 2520         cpu_probe_amdc1e();
 2521 
 2522 #ifdef FDT
 2523         x86_init_fdt();
 2524 #endif
 2525 
 2526         /* Location of kernel stack for locore */
 2527         return ((register_t)thread0.td_pcb);
 2528 }
 2529 
 2530 static void
 2531 machdep_init_trampoline(void)
 2532 {
 2533         struct region_descriptor r_gdt, r_idt;
 2534         struct i386tss *tss;
 2535         char *copyout_buf, *trampoline, *tramp_stack_base;
 2536         int x;
 2537 
 2538         gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus,
 2539             M_NOWAIT | M_ZERO);
 2540         bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT);
 2541         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 2542         r_gdt.rd_base = (int)gdt;
 2543         lgdt(&r_gdt);
 2544 
 2545         tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus,
 2546             M_NOWAIT | M_ZERO);
 2547         bcopy(&common_tss0, tss, sizeof(struct i386tss));
 2548         gdt[GPROC0_SEL].sd.sd_lobase = (int)tss;
 2549         gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24;
 2550         gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 2551 
 2552         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2553         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2554         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2555         PCPU_SET(common_tssp, tss);
 2556         ltr(GSEL(GPROC0_SEL, SEL_KPL));
 2557 
 2558         trampoline = pmap_trm_alloc(end_exceptions - start_exceptions,
 2559             M_NOWAIT);
 2560         bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions);
 2561         tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
 2562         PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ -
 2563             VM86_STACK_SPACE);
 2564         tss[0].tss_esp0 = PCPU_GET(trampstk);
 2565 
 2566         idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO);
 2567         bcopy(idt0, idt, sizeof(idt0));
 2568 
 2569         /* Re-initialize new IDT since the handlers were relocated */
 2570         setidt_disp = trampoline - start_exceptions;
 2571         fixup_idt();
 2572 
 2573         r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
 2574         r_idt.rd_base = (int)idt;
 2575         lidt(&r_idt);
 2576 
 2577         /* dblfault TSS */
 2578         dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO);
 2579         dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT);
 2580         dblfault_tss->tss_esp = dblfault_tss->tss_esp0 =
 2581             dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 =
 2582             (int)dblfault_stack + PAGE_SIZE;
 2583         dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
 2584             dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 2585 #if defined(PAE) || defined(PAE_TABLES)
 2586         dblfault_tss->tss_cr3 = (int)IdlePDPT;
 2587 #else
 2588         dblfault_tss->tss_cr3 = (int)IdlePTD;
 2589 #endif
 2590         dblfault_tss->tss_eip = (int)dblfault_handler;
 2591         dblfault_tss->tss_eflags = PSL_KERNEL;
 2592         dblfault_tss->tss_ds = dblfault_tss->tss_es =
 2593             dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2594         dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2595         dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2596         dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2597         gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss;
 2598         gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24;
 2599 
 2600         /* make ldt memory segments */
 2601         ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT,
 2602             M_NOWAIT | M_ZERO);
 2603         gdt[GLDT_SEL].sd.sd_lobase = (int)ldt;
 2604         gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24;
 2605         ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 2606         ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 2607         for (x = 0; x < nitems(ldt_segs); x++)
 2608                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 2609 
 2610         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2611         lldt(_default_ldt);
 2612         PCPU_SET(currentldt, _default_ldt);
 2613 
 2614         copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
 2615         PCPU_SET(copyout_buf, copyout_buf);
 2616         copyout_init_tramp();
 2617 }
 2618 SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL);
 2619 
 2620 #ifdef COMPAT_43
 2621 static void
 2622 i386_setup_lcall_gate(void)
 2623 {
 2624         struct sysentvec *sv;
 2625         struct user_segment_descriptor desc;
 2626         u_int lcall_addr;
 2627 
 2628         sv = &elf32_freebsd_sysvec;
 2629         lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp;
 2630 
 2631         bzero(&desc, sizeof(desc));
 2632         desc.sd_type = SDT_MEMERA;
 2633         desc.sd_dpl = SEL_UPL;
 2634         desc.sd_p = 1;
 2635         desc.sd_def32 = 1;
 2636         desc.sd_gran = 1;
 2637         desc.sd_lolimit = 0xffff;
 2638         desc.sd_hilimit = 0xf;
 2639         desc.sd_lobase = lcall_addr;
 2640         desc.sd_hibase = lcall_addr >> 24;
 2641         bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc));
 2642 }
 2643 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL);
 2644 #endif
 2645 
 2646 void
 2647 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 2648 {
 2649 
 2650         pcpu->pc_acpi_id = 0xffffffff;
 2651 }
 2652 
 2653 static int
 2654 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 2655 {
 2656         struct bios_smap *smapbase;
 2657         struct bios_smap_xattr smap;
 2658         caddr_t kmdp;
 2659         uint32_t *smapattr;
 2660         int count, error, i;
 2661 
 2662         /* Retrieve the system memory map from the loader. */
 2663         kmdp = preload_search_by_type("elf kernel");
 2664         if (kmdp == NULL)
 2665                 kmdp = preload_search_by_type("elf32 kernel");
 2666         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 2667             MODINFO_METADATA | MODINFOMD_SMAP);
 2668         if (smapbase == NULL)
 2669                 return (0);
 2670         smapattr = (uint32_t *)preload_search_info(kmdp,
 2671             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 2672         count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 2673         error = 0;
 2674         for (i = 0; i < count; i++) {
 2675                 smap.base = smapbase[i].base;
 2676                 smap.length = smapbase[i].length;
 2677                 smap.type = smapbase[i].type;
 2678                 if (smapattr != NULL)
 2679                         smap.xattr = smapattr[i];
 2680                 else
 2681                         smap.xattr = 0;
 2682                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 2683         }
 2684         return (error);
 2685 }
 2686 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 2687     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 2688 
 2689 void
 2690 spinlock_enter(void)
 2691 {
 2692         struct thread *td;
 2693         register_t flags;
 2694 
 2695         td = curthread;
 2696         if (td->td_md.md_spinlock_count == 0) {
 2697                 flags = intr_disable();
 2698                 td->td_md.md_spinlock_count = 1;
 2699                 td->td_md.md_saved_flags = flags;
 2700         } else
 2701                 td->td_md.md_spinlock_count++;
 2702         critical_enter();
 2703 }
 2704 
 2705 void
 2706 spinlock_exit(void)
 2707 {
 2708         struct thread *td;
 2709         register_t flags;
 2710 
 2711         td = curthread;
 2712         critical_exit();
 2713         flags = td->td_md.md_saved_flags;
 2714         td->td_md.md_spinlock_count--;
 2715         if (td->td_md.md_spinlock_count == 0)
 2716                 intr_restore(flags);
 2717 }
 2718 
 2719 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2720 static void f00f_hack(void *unused);
 2721 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 2722 
 2723 static void
 2724 f00f_hack(void *unused)
 2725 {
 2726         struct region_descriptor r_idt;
 2727         struct gate_descriptor *new_idt;
 2728         vm_offset_t tmp;
 2729 
 2730         if (!has_f00f_bug)
 2731                 return;
 2732 
 2733         GIANT_REQUIRED;
 2734 
 2735         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2736 
 2737         tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO);
 2738         if (tmp == 0)
 2739                 panic("kmem_malloc returned 0");
 2740         tmp = round_page(tmp);
 2741 
 2742         /* Put the problematic entry (#6) at the end of the lower page. */
 2743         new_idt = (struct gate_descriptor *)
 2744             (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 2745         bcopy(idt, new_idt, sizeof(idt0));
 2746         r_idt.rd_base = (u_int)new_idt;
 2747         r_idt.rd_limit = sizeof(idt0) - 1;
 2748         lidt(&r_idt);
 2749         /* SMP machines do not need the F00F hack. */
 2750         idt = new_idt;
 2751         pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 2752 }
 2753 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2754 
 2755 /*
 2756  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2757  * we want to start a backtrace from the function that caused us to enter
 2758  * the debugger. We have the context in the trapframe, but base the trace
 2759  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2760  * enough for a backtrace.
 2761  */
 2762 void
 2763 makectx(struct trapframe *tf, struct pcb *pcb)
 2764 {
 2765 
 2766         pcb->pcb_edi = tf->tf_edi;
 2767         pcb->pcb_esi = tf->tf_esi;
 2768         pcb->pcb_ebp = tf->tf_ebp;
 2769         pcb->pcb_ebx = tf->tf_ebx;
 2770         pcb->pcb_eip = tf->tf_eip;
 2771         pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 2772         pcb->pcb_gs = rgs();
 2773 }
 2774 
 2775 int
 2776 ptrace_set_pc(struct thread *td, u_long addr)
 2777 {
 2778 
 2779         td->td_frame->tf_eip = addr;
 2780         return (0);
 2781 }
 2782 
 2783 int
 2784 ptrace_single_step(struct thread *td)
 2785 {
 2786 
 2787         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2788         if ((td->td_frame->tf_eflags & PSL_T) == 0) {
 2789                 td->td_frame->tf_eflags |= PSL_T;
 2790                 td->td_dbgflags |= TDB_STEP;
 2791         }
 2792         return (0);
 2793 }
 2794 
 2795 int
 2796 ptrace_clear_single_step(struct thread *td)
 2797 {
 2798 
 2799         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2800         td->td_frame->tf_eflags &= ~PSL_T;
 2801         td->td_dbgflags &= ~TDB_STEP;
 2802         return (0);
 2803 }
 2804 
 2805 int
 2806 fill_regs(struct thread *td, struct reg *regs)
 2807 {
 2808         struct pcb *pcb;
 2809         struct trapframe *tp;
 2810 
 2811         tp = td->td_frame;
 2812         pcb = td->td_pcb;
 2813         regs->r_gs = pcb->pcb_gs;
 2814         return (fill_frame_regs(tp, regs));
 2815 }
 2816 
 2817 int
 2818 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2819 {
 2820 
 2821         regs->r_fs = tp->tf_fs;
 2822         regs->r_es = tp->tf_es;
 2823         regs->r_ds = tp->tf_ds;
 2824         regs->r_edi = tp->tf_edi;
 2825         regs->r_esi = tp->tf_esi;
 2826         regs->r_ebp = tp->tf_ebp;
 2827         regs->r_ebx = tp->tf_ebx;
 2828         regs->r_edx = tp->tf_edx;
 2829         regs->r_ecx = tp->tf_ecx;
 2830         regs->r_eax = tp->tf_eax;
 2831         regs->r_eip = tp->tf_eip;
 2832         regs->r_cs = tp->tf_cs;
 2833         regs->r_eflags = tp->tf_eflags;
 2834         regs->r_esp = tp->tf_esp;
 2835         regs->r_ss = tp->tf_ss;
 2836         regs->r_err = 0;
 2837         regs->r_trapno = 0;
 2838         return (0);
 2839 }
 2840 
 2841 int
 2842 set_regs(struct thread *td, struct reg *regs)
 2843 {
 2844         struct pcb *pcb;
 2845         struct trapframe *tp;
 2846 
 2847         tp = td->td_frame;
 2848         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2849             !CS_SECURE(regs->r_cs))
 2850                 return (EINVAL);
 2851         pcb = td->td_pcb;
 2852         tp->tf_fs = regs->r_fs;
 2853         tp->tf_es = regs->r_es;
 2854         tp->tf_ds = regs->r_ds;
 2855         tp->tf_edi = regs->r_edi;
 2856         tp->tf_esi = regs->r_esi;
 2857         tp->tf_ebp = regs->r_ebp;
 2858         tp->tf_ebx = regs->r_ebx;
 2859         tp->tf_edx = regs->r_edx;
 2860         tp->tf_ecx = regs->r_ecx;
 2861         tp->tf_eax = regs->r_eax;
 2862         tp->tf_eip = regs->r_eip;
 2863         tp->tf_cs = regs->r_cs;
 2864         tp->tf_eflags = regs->r_eflags;
 2865         tp->tf_esp = regs->r_esp;
 2866         tp->tf_ss = regs->r_ss;
 2867         pcb->pcb_gs = regs->r_gs;
 2868         return (0);
 2869 }
 2870 
 2871 int
 2872 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2873 {
 2874 
 2875         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2876             P_SHOULDSTOP(td->td_proc),
 2877             ("not suspended thread %p", td));
 2878         npxgetregs(td);
 2879         if (cpu_fxsr)
 2880                 npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 2881                     (struct save87 *)fpregs);
 2882         else
 2883                 bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 2884                     sizeof(*fpregs));
 2885         return (0);
 2886 }
 2887 
 2888 int
 2889 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2890 {
 2891 
 2892         critical_enter();
 2893         if (cpu_fxsr)
 2894                 npx_set_fpregs_xmm((struct save87 *)fpregs,
 2895                     &get_pcb_user_save_td(td)->sv_xmm);
 2896         else
 2897                 bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 2898                     sizeof(*fpregs));
 2899         npxuserinited(td);
 2900         critical_exit();
 2901         return (0);
 2902 }
 2903 
 2904 /*
 2905  * Get machine context.
 2906  */
 2907 int
 2908 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2909 {
 2910         struct trapframe *tp;
 2911         struct segment_descriptor *sdp;
 2912 
 2913         tp = td->td_frame;
 2914 
 2915         PROC_LOCK(curthread->td_proc);
 2916         mcp->mc_onstack = sigonstack(tp->tf_esp);
 2917         PROC_UNLOCK(curthread->td_proc);
 2918         mcp->mc_gs = td->td_pcb->pcb_gs;
 2919         mcp->mc_fs = tp->tf_fs;
 2920         mcp->mc_es = tp->tf_es;
 2921         mcp->mc_ds = tp->tf_ds;
 2922         mcp->mc_edi = tp->tf_edi;
 2923         mcp->mc_esi = tp->tf_esi;
 2924         mcp->mc_ebp = tp->tf_ebp;
 2925         mcp->mc_isp = tp->tf_isp;
 2926         mcp->mc_eflags = tp->tf_eflags;
 2927         if (flags & GET_MC_CLEAR_RET) {
 2928                 mcp->mc_eax = 0;
 2929                 mcp->mc_edx = 0;
 2930                 mcp->mc_eflags &= ~PSL_C;
 2931         } else {
 2932                 mcp->mc_eax = tp->tf_eax;
 2933                 mcp->mc_edx = tp->tf_edx;
 2934         }
 2935         mcp->mc_ebx = tp->tf_ebx;
 2936         mcp->mc_ecx = tp->tf_ecx;
 2937         mcp->mc_eip = tp->tf_eip;
 2938         mcp->mc_cs = tp->tf_cs;
 2939         mcp->mc_esp = tp->tf_esp;
 2940         mcp->mc_ss = tp->tf_ss;
 2941         mcp->mc_len = sizeof(*mcp);
 2942         get_fpcontext(td, mcp, NULL, 0);
 2943         sdp = &td->td_pcb->pcb_fsd;
 2944         mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2945         sdp = &td->td_pcb->pcb_gsd;
 2946         mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2947         mcp->mc_flags = 0;
 2948         mcp->mc_xfpustate = 0;
 2949         mcp->mc_xfpustate_len = 0;
 2950         bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 2951         return (0);
 2952 }
 2953 
 2954 /*
 2955  * Set machine context.
 2956  *
 2957  * However, we don't set any but the user modifiable flags, and we won't
 2958  * touch the cs selector.
 2959  */
 2960 int
 2961 set_mcontext(struct thread *td, mcontext_t *mcp)
 2962 {
 2963         struct trapframe *tp;
 2964         char *xfpustate;
 2965         int eflags, ret;
 2966 
 2967         tp = td->td_frame;
 2968         if (mcp->mc_len != sizeof(*mcp) ||
 2969             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 2970                 return (EINVAL);
 2971         eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 2972             (tp->tf_eflags & ~PSL_USERCHANGE);
 2973         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 2974                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 2975                     sizeof(union savefpu))
 2976                         return (EINVAL);
 2977                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 2978                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 2979                     mcp->mc_xfpustate_len);
 2980                 if (ret != 0)
 2981                         return (ret);
 2982         } else
 2983                 xfpustate = NULL;
 2984         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 2985         if (ret != 0)
 2986                 return (ret);
 2987         tp->tf_fs = mcp->mc_fs;
 2988         tp->tf_es = mcp->mc_es;
 2989         tp->tf_ds = mcp->mc_ds;
 2990         tp->tf_edi = mcp->mc_edi;
 2991         tp->tf_esi = mcp->mc_esi;
 2992         tp->tf_ebp = mcp->mc_ebp;
 2993         tp->tf_ebx = mcp->mc_ebx;
 2994         tp->tf_edx = mcp->mc_edx;
 2995         tp->tf_ecx = mcp->mc_ecx;
 2996         tp->tf_eax = mcp->mc_eax;
 2997         tp->tf_eip = mcp->mc_eip;
 2998         tp->tf_eflags = eflags;
 2999         tp->tf_esp = mcp->mc_esp;
 3000         tp->tf_ss = mcp->mc_ss;
 3001         td->td_pcb->pcb_gs = mcp->mc_gs;
 3002         return (0);
 3003 }
 3004 
 3005 static void
 3006 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 3007     size_t xfpusave_len)
 3008 {
 3009         size_t max_len, len;
 3010 
 3011         mcp->mc_ownedfp = npxgetregs(td);
 3012         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 3013             sizeof(mcp->mc_fpstate));
 3014         mcp->mc_fpformat = npxformat();
 3015         if (!use_xsave || xfpusave_len == 0)
 3016                 return;
 3017         max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 3018         len = xfpusave_len;
 3019         if (len > max_len) {
 3020                 len = max_len;
 3021                 bzero(xfpusave + max_len, len - max_len);
 3022         }
 3023         mcp->mc_flags |= _MC_HASFPXSTATE;
 3024         mcp->mc_xfpustate_len = len;
 3025         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 3026 }
 3027 
 3028 static int
 3029 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 3030     size_t xfpustate_len)
 3031 {
 3032         int error;
 3033 
 3034         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 3035                 return (0);
 3036         else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 3037             mcp->mc_fpformat != _MC_FPFMT_XMM)
 3038                 return (EINVAL);
 3039         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 3040                 /* We don't care what state is left in the FPU or PCB. */
 3041                 fpstate_drop(td);
 3042                 error = 0;
 3043         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 3044             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 3045                 error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate,
 3046                     xfpustate, xfpustate_len);
 3047         } else
 3048                 return (EINVAL);
 3049         return (error);
 3050 }
 3051 
 3052 static void
 3053 fpstate_drop(struct thread *td)
 3054 {
 3055 
 3056         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 3057         critical_enter();
 3058         if (PCPU_GET(fpcurthread) == td)
 3059                 npxdrop();
 3060         /*
 3061          * XXX force a full drop of the npx.  The above only drops it if we
 3062          * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 3063          *
 3064          * XXX I don't much like npxgetregs()'s semantics of doing a full
 3065          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 3066          * We only need to drop to !PCB_INITDONE in sendsig().  But
 3067          * sendsig() is the only caller of npxgetregs()... perhaps we just
 3068          * have too many layers.
 3069          */
 3070         curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 3071             PCB_NPXUSERINITDONE);
 3072         critical_exit();
 3073 }
 3074 
 3075 int
 3076 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 3077 {
 3078         struct pcb *pcb;
 3079 
 3080         if (td == NULL) {
 3081                 dbregs->dr[0] = rdr0();
 3082                 dbregs->dr[1] = rdr1();
 3083                 dbregs->dr[2] = rdr2();
 3084                 dbregs->dr[3] = rdr3();
 3085                 dbregs->dr[6] = rdr6();
 3086                 dbregs->dr[7] = rdr7();
 3087         } else {
 3088                 pcb = td->td_pcb;
 3089                 dbregs->dr[0] = pcb->pcb_dr0;
 3090                 dbregs->dr[1] = pcb->pcb_dr1;
 3091                 dbregs->dr[2] = pcb->pcb_dr2;
 3092                 dbregs->dr[3] = pcb->pcb_dr3;
 3093                 dbregs->dr[6] = pcb->pcb_dr6;
 3094                 dbregs->dr[7] = pcb->pcb_dr7;
 3095         }
 3096         dbregs->dr[4] = 0;
 3097         dbregs->dr[5] = 0;
 3098         return (0);
 3099 }
 3100 
 3101 int
 3102 set_dbregs(struct thread *td, struct dbreg *dbregs)
 3103 {
 3104         struct pcb *pcb;
 3105         int i;
 3106 
 3107         if (td == NULL) {
 3108                 load_dr0(dbregs->dr[0]);
 3109                 load_dr1(dbregs->dr[1]);
 3110                 load_dr2(dbregs->dr[2]);
 3111                 load_dr3(dbregs->dr[3]);
 3112                 load_dr6(dbregs->dr[6]);
 3113                 load_dr7(dbregs->dr[7]);
 3114         } else {
 3115                 /*
 3116                  * Don't let an illegal value for dr7 get set.  Specifically,
 3117                  * check for undefined settings.  Setting these bit patterns
 3118                  * result in undefined behaviour and can lead to an unexpected
 3119                  * TRCTRAP.
 3120                  */
 3121                 for (i = 0; i < 4; i++) {
 3122                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 3123                                 return (EINVAL);
 3124                         if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 3125                                 return (EINVAL);
 3126                 }
 3127                 
 3128                 pcb = td->td_pcb;
 3129                 
 3130                 /*
 3131                  * Don't let a process set a breakpoint that is not within the
 3132                  * process's address space.  If a process could do this, it
 3133                  * could halt the system by setting a breakpoint in the kernel
 3134                  * (if ddb was enabled).  Thus, we need to check to make sure
 3135                  * that no breakpoints are being enabled for addresses outside
 3136                  * process's address space.
 3137                  *
 3138                  * XXX - what about when the watched area of the user's
 3139                  * address space is written into from within the kernel
 3140                  * ... wouldn't that still cause a breakpoint to be generated
 3141                  * from within kernel mode?
 3142                  */
 3143 
 3144                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 3145                         /* dr0 is enabled */
 3146                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 3147                                 return (EINVAL);
 3148                 }
 3149                         
 3150                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 3151                         /* dr1 is enabled */
 3152                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 3153                                 return (EINVAL);
 3154                 }
 3155                         
 3156                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 3157                         /* dr2 is enabled */
 3158                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 3159                                 return (EINVAL);
 3160                 }
 3161                         
 3162                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 3163                         /* dr3 is enabled */
 3164                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 3165                                 return (EINVAL);
 3166                 }
 3167 
 3168                 pcb->pcb_dr0 = dbregs->dr[0];
 3169                 pcb->pcb_dr1 = dbregs->dr[1];
 3170                 pcb->pcb_dr2 = dbregs->dr[2];
 3171                 pcb->pcb_dr3 = dbregs->dr[3];
 3172                 pcb->pcb_dr6 = dbregs->dr[6];
 3173                 pcb->pcb_dr7 = dbregs->dr[7];
 3174 
 3175                 pcb->pcb_flags |= PCB_DBREGS;
 3176         }
 3177 
 3178         return (0);
 3179 }
 3180 
 3181 /*
 3182  * Return > 0 if a hardware breakpoint has been hit, and the
 3183  * breakpoint was in user space.  Return 0, otherwise.
 3184  */
 3185 int
 3186 user_dbreg_trap(register_t dr6)
 3187 {
 3188         u_int32_t dr7;
 3189         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 3190         int nbp;            /* number of breakpoints that triggered */
 3191         caddr_t addr[4];    /* breakpoint addresses */
 3192         int i;
 3193 
 3194         bp = dr6 & DBREG_DR6_BMASK;
 3195         if (bp == 0) {
 3196                 /*
 3197                  * None of the breakpoint bits are set meaning this
 3198                  * trap was not caused by any of the debug registers
 3199                  */
 3200                 return 0;
 3201         }
 3202 
 3203         dr7 = rdr7();
 3204         if ((dr7 & 0x000000ff) == 0) {
 3205                 /*
 3206                  * all GE and LE bits in the dr7 register are zero,
 3207                  * thus the trap couldn't have been caused by the
 3208                  * hardware debug registers
 3209                  */
 3210                 return 0;
 3211         }
 3212 
 3213         nbp = 0;
 3214 
 3215         /*
 3216          * at least one of the breakpoints were hit, check to see
 3217          * which ones and if any of them are user space addresses
 3218          */
 3219 
 3220         if (bp & 0x01) {
 3221                 addr[nbp++] = (caddr_t)rdr0();
 3222         }
 3223         if (bp & 0x02) {
 3224                 addr[nbp++] = (caddr_t)rdr1();
 3225         }
 3226         if (bp & 0x04) {
 3227                 addr[nbp++] = (caddr_t)rdr2();
 3228         }
 3229         if (bp & 0x08) {
 3230                 addr[nbp++] = (caddr_t)rdr3();
 3231         }
 3232 
 3233         for (i = 0; i < nbp; i++) {
 3234                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 3235                         /*
 3236                          * addr[i] is in user space
 3237                          */
 3238                         return nbp;
 3239                 }
 3240         }
 3241 
 3242         /*
 3243          * None of the breakpoints are in user space.
 3244          */
 3245         return 0;
 3246 }
 3247 
 3248 #ifdef KDB
 3249 
 3250 /*
 3251  * Provide inb() and outb() as functions.  They are normally only available as
 3252  * inline functions, thus cannot be called from the debugger.
 3253  */
 3254 
 3255 /* silence compiler warnings */
 3256 u_char inb_(u_short);
 3257 void outb_(u_short, u_char);
 3258 
 3259 u_char
 3260 inb_(u_short port)
 3261 {
 3262         return inb(port);
 3263 }
 3264 
 3265 void
 3266 outb_(u_short port, u_char data)
 3267 {
 3268         outb(port, data);
 3269 }
 3270 
 3271 #endif /* KDB */

Cache object: f930a2bab6592a5741dbad0d2ef15ff8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.