The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1992 Terrence R. Lambert.
    3  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.597.2.8 2006/02/07 03:10:38 davidxu Exp $");
   42 
   43 #include "opt_apic.h"
   44 #include "opt_atalk.h"
   45 #include "opt_compat.h"
   46 #include "opt_cpu.h"
   47 #include "opt_ddb.h"
   48 #include "opt_inet.h"
   49 #include "opt_ipx.h"
   50 #include "opt_isa.h"
   51 #include "opt_kstack_pages.h"
   52 #include "opt_maxmem.h"
   53 #include "opt_msgbuf.h"
   54 #include "opt_npx.h"
   55 #include "opt_perfmon.h"
   56 
   57 #include <sys/param.h>
   58 #include <sys/proc.h>
   59 #include <sys/systm.h>
   60 #include <sys/bio.h>
   61 #include <sys/buf.h>
   62 #include <sys/bus.h>
   63 #include <sys/callout.h>
   64 #include <sys/cons.h>
   65 #include <sys/cpu.h>
   66 #include <sys/eventhandler.h>
   67 #include <sys/exec.h>
   68 #include <sys/imgact.h>
   69 #include <sys/kdb.h>
   70 #include <sys/kernel.h>
   71 #include <sys/ktr.h>
   72 #include <sys/linker.h>
   73 #include <sys/lock.h>
   74 #include <sys/malloc.h>
   75 #include <sys/memrange.h>
   76 #include <sys/msgbuf.h>
   77 #include <sys/mutex.h>
   78 #include <sys/pcpu.h>
   79 #include <sys/ptrace.h>
   80 #include <sys/reboot.h>
   81 #include <sys/sched.h>
   82 #include <sys/signalvar.h>
   83 #include <sys/sysctl.h>
   84 #include <sys/sysent.h>
   85 #include <sys/sysproto.h>
   86 #include <sys/ucontext.h>
   87 #include <sys/vmmeter.h>
   88 
   89 #include <vm/vm.h>
   90 #include <vm/vm_extern.h>
   91 #include <vm/vm_kern.h>
   92 #include <vm/vm_page.h>
   93 #include <vm/vm_map.h>
   94 #include <vm/vm_object.h>
   95 #include <vm/vm_pager.h>
   96 #include <vm/vm_param.h>
   97 
   98 #ifdef DDB
   99 #ifndef KDB
  100 #error KDB must be enabled in order for DDB to work!
  101 #endif
  102 #include <ddb/ddb.h>
  103 #include <ddb/db_sym.h>
  104 #endif
  105 
  106 #include <isa/rtc.h>
  107 
  108 #include <net/netisr.h>
  109 
  110 #include <machine/bootinfo.h>
  111 #include <machine/clock.h>
  112 #include <machine/cpu.h>
  113 #include <machine/cputypes.h>
  114 #include <machine/intr_machdep.h>
  115 #include <machine/md_var.h>
  116 #include <machine/pc/bios.h>
  117 #include <machine/pcb.h>
  118 #include <machine/pcb_ext.h>
  119 #include <machine/proc.h>
  120 #include <machine/reg.h>
  121 #include <machine/sigframe.h>
  122 #include <machine/specialreg.h>
  123 #include <machine/vm86.h>
  124 #ifdef PERFMON
  125 #include <machine/perfmon.h>
  126 #endif
  127 #ifdef SMP
  128 #include <machine/privatespace.h>
  129 #include <machine/smp.h>
  130 #endif
  131 
  132 #ifdef DEV_ISA
  133 #include <i386/isa/icu.h>
  134 #endif
  135 
  136 /* Sanity check for __curthread() */
  137 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  138 
  139 extern void init386(int first);
  140 extern void dblfault_handler(void);
  141 
  142 extern void printcpuinfo(void); /* XXX header file */
  143 extern void finishidentcpu(void);
  144 extern void panicifcpuunsupported(void);
  145 extern void initializecpu(void);
  146 
  147 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  148 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  149 
  150 #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
  151 #define CPU_ENABLE_SSE
  152 #endif
  153 #if defined(CPU_DISABLE_SSE)
  154 #undef CPU_ENABLE_SSE
  155 #endif
  156 
  157 static void cpu_startup(void *);
  158 static void fpstate_drop(struct thread *td);
  159 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
  160 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
  161 #ifdef CPU_ENABLE_SSE
  162 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
  163 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
  164 #endif /* CPU_ENABLE_SSE */
  165 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
  166 
  167 #ifdef DDB
  168 extern vm_offset_t ksym_start, ksym_end;
  169 #endif
  170 
  171 int     _udatasel, _ucodesel;
  172 u_int   basemem;
  173 
  174 int cold = 1;
  175 
  176 #ifdef COMPAT_43
  177 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
  178 #endif
  179 #ifdef COMPAT_FREEBSD4
  180 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
  181     u_long code);
  182 #endif
  183 
  184 long Maxmem = 0;
  185 long realmem = 0;
  186 
  187 vm_paddr_t phys_avail[10];
  188 
  189 /* must be 2 less so 0 0 can signal end of chunks */
  190 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
  191 
  192 struct kva_md_info kmi;
  193 
  194 static struct trapframe proc0_tf;
  195 #ifndef SMP
  196 static struct pcpu __pcpu;
  197 #endif
  198 
  199 struct mtx icu_lock;
  200 
  201 struct mem_range_softc mem_range_softc;
  202 
  203 static void
  204 cpu_startup(dummy)
  205         void *dummy;
  206 {
  207         /*
  208          * Good {morning,afternoon,evening,night}.
  209          */
  210         startrtclock();
  211         printcpuinfo();
  212         panicifcpuunsupported();
  213 #ifdef PERFMON
  214         perfmon_init();
  215 #endif
  216         printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
  217             ptoa((uintmax_t)Maxmem) / 1048576);
  218         realmem = Maxmem;
  219         /*
  220          * Display any holes after the first chunk of extended memory.
  221          */
  222         if (bootverbose) {
  223                 int indx;
  224 
  225                 printf("Physical memory chunk(s):\n");
  226                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  227                         vm_paddr_t size;
  228 
  229                         size = phys_avail[indx + 1] - phys_avail[indx];
  230                         printf(
  231                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  232                             (uintmax_t)phys_avail[indx],
  233                             (uintmax_t)phys_avail[indx + 1] - 1,
  234                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  235                 }
  236         }
  237 
  238         vm_ksubmap_init(&kmi);
  239 
  240         printf("avail memory = %ju (%ju MB)\n",
  241             ptoa((uintmax_t)cnt.v_free_count),
  242             ptoa((uintmax_t)cnt.v_free_count) / 1048576);
  243 
  244         /*
  245          * Set up buffers, so they can be used to read disk labels.
  246          */
  247         bufinit();
  248         vm_pager_bufferinit();
  249 
  250         cpu_setregs();
  251 }
  252 
  253 /*
  254  * Send an interrupt to process.
  255  *
  256  * Stack is set up to allow sigcode stored
  257  * at top to call routine, followed by kcall
  258  * to sigreturn routine below.  After sigreturn
  259  * resets the signal mask, the stack, and the
  260  * frame pointer, it returns to the user
  261  * specified pc, psl.
  262  */
  263 #ifdef COMPAT_43
  264 static void
  265 osendsig(catcher, sig, mask, code)
  266         sig_t catcher;
  267         int sig;
  268         sigset_t *mask;
  269         u_long code;
  270 {
  271         struct osigframe sf, *fp;
  272         struct proc *p;
  273         struct thread *td;
  274         struct sigacts *psp;
  275         struct trapframe *regs;
  276         int oonstack;
  277 
  278         td = curthread;
  279         p = td->td_proc;
  280         PROC_LOCK_ASSERT(p, MA_OWNED);
  281         psp = p->p_sigacts;
  282         mtx_assert(&psp->ps_mtx, MA_OWNED);
  283         regs = td->td_frame;
  284         oonstack = sigonstack(regs->tf_esp);
  285 
  286         /* Allocate space for the signal handler context. */
  287         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  288             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  289                 fp = (struct osigframe *)(td->td_sigstk.ss_sp +
  290                     td->td_sigstk.ss_size - sizeof(struct osigframe));
  291 #if defined(COMPAT_43)
  292                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  293 #endif
  294         } else
  295                 fp = (struct osigframe *)regs->tf_esp - 1;
  296 
  297         /* Translate the signal if appropriate. */
  298         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  299                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  300 
  301         /* Build the argument list for the signal handler. */
  302         sf.sf_signum = sig;
  303         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  304         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  305                 /* Signal handler installed with SA_SIGINFO. */
  306                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  307                 sf.sf_siginfo.si_signo = sig;
  308                 sf.sf_siginfo.si_code = code;
  309                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  310         } else {
  311                 /* Old FreeBSD-style arguments. */
  312                 sf.sf_arg2 = code;
  313                 sf.sf_addr = regs->tf_err;
  314                 sf.sf_ahu.sf_handler = catcher;
  315         }
  316         mtx_unlock(&psp->ps_mtx);
  317         PROC_UNLOCK(p);
  318 
  319         /* Save most if not all of trap frame. */
  320         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  321         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  322         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  323         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  324         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  325         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  326         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  327         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  328         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  329         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  330         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  331         sf.sf_siginfo.si_sc.sc_gs = rgs();
  332         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  333 
  334         /* Build the signal context to be used by osigreturn(). */
  335         sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
  336         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  337         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  338         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  339         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  340         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  341         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  342         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  343 
  344         /*
  345          * If we're a vm86 process, we want to save the segment registers.
  346          * We also change eflags to be our emulated eflags, not the actual
  347          * eflags.
  348          */
  349         if (regs->tf_eflags & PSL_VM) {
  350                 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
  351                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  352                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  353 
  354                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  355                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  356                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  357                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  358 
  359                 if (vm86->vm86_has_vme == 0)
  360                         sf.sf_siginfo.si_sc.sc_ps =
  361                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  362                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  363 
  364                 /* See sendsig() for comments. */
  365                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  366         }
  367 
  368         /*
  369          * Copy the sigframe out to the user's stack.
  370          */
  371         if (copyout(&sf, fp, sizeof(*fp)) != 0) {
  372 #ifdef DEBUG
  373                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  374 #endif
  375                 PROC_LOCK(p);
  376                 sigexit(td, SIGILL);
  377         }
  378 
  379         regs->tf_esp = (int)fp;
  380         regs->tf_eip = PS_STRINGS - szosigcode;
  381         regs->tf_eflags &= ~PSL_T;
  382         regs->tf_cs = _ucodesel;
  383         regs->tf_ds = _udatasel;
  384         regs->tf_es = _udatasel;
  385         regs->tf_fs = _udatasel;
  386         load_gs(_udatasel);
  387         regs->tf_ss = _udatasel;
  388         PROC_LOCK(p);
  389         mtx_lock(&psp->ps_mtx);
  390 }
  391 #endif /* COMPAT_43 */
  392 
  393 #ifdef COMPAT_FREEBSD4
  394 static void
  395 freebsd4_sendsig(catcher, sig, mask, code)
  396         sig_t catcher;
  397         int sig;
  398         sigset_t *mask;
  399         u_long code;
  400 {
  401         struct sigframe4 sf, *sfp;
  402         struct proc *p;
  403         struct thread *td;
  404         struct sigacts *psp;
  405         struct trapframe *regs;
  406         int oonstack;
  407 
  408         td = curthread;
  409         p = td->td_proc;
  410         PROC_LOCK_ASSERT(p, MA_OWNED);
  411         psp = p->p_sigacts;
  412         mtx_assert(&psp->ps_mtx, MA_OWNED);
  413         regs = td->td_frame;
  414         oonstack = sigonstack(regs->tf_esp);
  415 
  416         /* Save user context. */
  417         bzero(&sf, sizeof(sf));
  418         sf.sf_uc.uc_sigmask = *mask;
  419         sf.sf_uc.uc_stack = td->td_sigstk;
  420         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  421             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  422         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  423         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  424         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  425 
  426         /* Allocate space for the signal handler context. */
  427         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  428             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  429                 sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
  430                     td->td_sigstk.ss_size - sizeof(struct sigframe4));
  431 #if defined(COMPAT_43)
  432                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  433 #endif
  434         } else
  435                 sfp = (struct sigframe4 *)regs->tf_esp - 1;
  436 
  437         /* Translate the signal if appropriate. */
  438         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  439                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  440 
  441         /* Build the argument list for the signal handler. */
  442         sf.sf_signum = sig;
  443         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  444         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  445                 /* Signal handler installed with SA_SIGINFO. */
  446                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  447                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  448 
  449                 /* Fill in POSIX parts */
  450                 sf.sf_si.si_signo = sig;
  451                 sf.sf_si.si_code = code;
  452                 sf.sf_si.si_addr = (void *)regs->tf_err;
  453         } else {
  454                 /* Old FreeBSD-style arguments. */
  455                 sf.sf_siginfo = code;
  456                 sf.sf_addr = regs->tf_err;
  457                 sf.sf_ahu.sf_handler = catcher;
  458         }
  459         mtx_unlock(&psp->ps_mtx);
  460         PROC_UNLOCK(p);
  461 
  462         /*
  463          * If we're a vm86 process, we want to save the segment registers.
  464          * We also change eflags to be our emulated eflags, not the actual
  465          * eflags.
  466          */
  467         if (regs->tf_eflags & PSL_VM) {
  468                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  469                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  470 
  471                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  472                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  473                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  474                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  475 
  476                 if (vm86->vm86_has_vme == 0)
  477                         sf.sf_uc.uc_mcontext.mc_eflags =
  478                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  479                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  480 
  481                 /*
  482                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  483                  * syscalls made by the signal handler.  This just avoids
  484                  * wasting time for our lazy fixup of such faults.  PSL_NT
  485                  * does nothing in vm86 mode, but vm86 programs can set it
  486                  * almost legitimately in probes for old cpu types.
  487                  */
  488                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  489         }
  490 
  491         /*
  492          * Copy the sigframe out to the user's stack.
  493          */
  494         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  495 #ifdef DEBUG
  496                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  497 #endif
  498                 PROC_LOCK(p);
  499                 sigexit(td, SIGILL);
  500         }
  501 
  502         regs->tf_esp = (int)sfp;
  503         regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
  504         regs->tf_eflags &= ~PSL_T;
  505         regs->tf_cs = _ucodesel;
  506         regs->tf_ds = _udatasel;
  507         regs->tf_es = _udatasel;
  508         regs->tf_fs = _udatasel;
  509         regs->tf_ss = _udatasel;
  510         PROC_LOCK(p);
  511         mtx_lock(&psp->ps_mtx);
  512 }
  513 #endif  /* COMPAT_FREEBSD4 */
  514 
  515 void
  516 sendsig(catcher, sig, mask, code)
  517         sig_t catcher;
  518         int sig;
  519         sigset_t *mask;
  520         u_long code;
  521 {
  522         struct sigframe sf, *sfp;
  523         struct proc *p;
  524         struct thread *td;
  525         struct sigacts *psp;
  526         char *sp;
  527         struct trapframe *regs;
  528         int oonstack;
  529 
  530         td = curthread;
  531         p = td->td_proc;
  532         PROC_LOCK_ASSERT(p, MA_OWNED);
  533         psp = p->p_sigacts;
  534         mtx_assert(&psp->ps_mtx, MA_OWNED);
  535 #ifdef COMPAT_FREEBSD4
  536         if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
  537                 freebsd4_sendsig(catcher, sig, mask, code);
  538                 return;
  539         }
  540 #endif
  541 #ifdef COMPAT_43
  542         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  543                 osendsig(catcher, sig, mask, code);
  544                 return;
  545         }
  546 #endif
  547         regs = td->td_frame;
  548         oonstack = sigonstack(regs->tf_esp);
  549 
  550         /* Save user context. */
  551         bzero(&sf, sizeof(sf));
  552         sf.sf_uc.uc_sigmask = *mask;
  553         sf.sf_uc.uc_stack = td->td_sigstk;
  554         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  555             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  556         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  557         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  558         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  559         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  560         get_fpcontext(td, &sf.sf_uc.uc_mcontext);
  561         fpstate_drop(td);
  562 
  563         /* Allocate space for the signal handler context. */
  564         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  565             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  566                 sp = td->td_sigstk.ss_sp +
  567                     td->td_sigstk.ss_size - sizeof(struct sigframe);
  568 #if defined(COMPAT_43)
  569                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  570 #endif
  571         } else
  572                 sp = (char *)regs->tf_esp - sizeof(struct sigframe);
  573         /* Align to 16 bytes. */
  574         sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
  575 
  576         /* Translate the signal if appropriate. */
  577         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  578                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  579 
  580         /* Build the argument list for the signal handler. */
  581         sf.sf_signum = sig;
  582         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  583         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  584                 /* Signal handler installed with SA_SIGINFO. */
  585                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  586                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  587 
  588                 /* Fill in POSIX parts */
  589                 sf.sf_si.si_signo = sig;
  590                 sf.sf_si.si_code = code;
  591                 sf.sf_si.si_addr = (void *)regs->tf_err;
  592         } else {
  593                 /* Old FreeBSD-style arguments. */
  594                 sf.sf_siginfo = code;
  595                 sf.sf_addr = regs->tf_err;
  596                 sf.sf_ahu.sf_handler = catcher;
  597         }
  598         mtx_unlock(&psp->ps_mtx);
  599         PROC_UNLOCK(p);
  600 
  601         /*
  602          * If we're a vm86 process, we want to save the segment registers.
  603          * We also change eflags to be our emulated eflags, not the actual
  604          * eflags.
  605          */
  606         if (regs->tf_eflags & PSL_VM) {
  607                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  608                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  609 
  610                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  611                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  612                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  613                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  614 
  615                 if (vm86->vm86_has_vme == 0)
  616                         sf.sf_uc.uc_mcontext.mc_eflags =
  617                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  618                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  619 
  620                 /*
  621                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  622                  * syscalls made by the signal handler.  This just avoids
  623                  * wasting time for our lazy fixup of such faults.  PSL_NT
  624                  * does nothing in vm86 mode, but vm86 programs can set it
  625                  * almost legitimately in probes for old cpu types.
  626                  */
  627                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  628         }
  629 
  630         /*
  631          * Copy the sigframe out to the user's stack.
  632          */
  633         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  634 #ifdef DEBUG
  635                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  636 #endif
  637                 PROC_LOCK(p);
  638                 sigexit(td, SIGILL);
  639         }
  640 
  641         regs->tf_esp = (int)sfp;
  642         regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
  643         regs->tf_eflags &= ~PSL_T;
  644         regs->tf_cs = _ucodesel;
  645         regs->tf_ds = _udatasel;
  646         regs->tf_es = _udatasel;
  647         regs->tf_fs = _udatasel;
  648         regs->tf_ss = _udatasel;
  649         PROC_LOCK(p);
  650         mtx_lock(&psp->ps_mtx);
  651 }
  652 
  653 /*
  654  * Build siginfo_t for SA thread
  655  */
  656 void
  657 cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
  658 {
  659         struct proc *p;
  660         struct thread *td;
  661 
  662         td = curthread;
  663         p = td->td_proc;
  664         PROC_LOCK_ASSERT(p, MA_OWNED);
  665 
  666         bzero(si, sizeof(*si));
  667         si->si_signo = sig;
  668         si->si_code = code;
  669         si->si_addr = (void *)td->td_frame->tf_err;
  670         /* XXXKSE fill other fields */
  671 }
  672 
  673 /*
  674  * System call to cleanup state after a signal
  675  * has been taken.  Reset signal mask and
  676  * stack state from context left by sendsig (above).
  677  * Return to previous pc and psl as specified by
  678  * context left by sendsig. Check carefully to
  679  * make sure that the user has not modified the
  680  * state to gain improper privileges.
  681  *
  682  * MPSAFE
  683  */
  684 #ifdef COMPAT_43
  685 int
  686 osigreturn(td, uap)
  687         struct thread *td;
  688         struct osigreturn_args /* {
  689                 struct osigcontext *sigcntxp;
  690         } */ *uap;
  691 {
  692         struct osigcontext sc;
  693         struct trapframe *regs;
  694         struct osigcontext *scp;
  695         struct proc *p = td->td_proc;
  696         int eflags, error;
  697 
  698         regs = td->td_frame;
  699         error = copyin(uap->sigcntxp, &sc, sizeof(sc));
  700         if (error != 0)
  701                 return (error);
  702         scp = &sc;
  703         eflags = scp->sc_ps;
  704         if (eflags & PSL_VM) {
  705                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  706                 struct vm86_kernel *vm86;
  707 
  708                 /*
  709                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  710                  * set up the vm86 area, and we can't enter vm86 mode.
  711                  */
  712                 if (td->td_pcb->pcb_ext == 0)
  713                         return (EINVAL);
  714                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  715                 if (vm86->vm86_inited == 0)
  716                         return (EINVAL);
  717 
  718                 /* Go back to user mode if both flags are set. */
  719                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  720                         trapsignal(td, SIGBUS, 0);
  721 
  722                 if (vm86->vm86_has_vme) {
  723                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  724                             (eflags & VME_USERCHANGE) | PSL_VM;
  725                 } else {
  726                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  727                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  728                             (eflags & VM_USERCHANGE) | PSL_VM;
  729                 }
  730                 tf->tf_vm86_ds = scp->sc_ds;
  731                 tf->tf_vm86_es = scp->sc_es;
  732                 tf->tf_vm86_fs = scp->sc_fs;
  733                 tf->tf_vm86_gs = scp->sc_gs;
  734                 tf->tf_ds = _udatasel;
  735                 tf->tf_es = _udatasel;
  736                 tf->tf_fs = _udatasel;
  737         } else {
  738                 /*
  739                  * Don't allow users to change privileged or reserved flags.
  740                  */
  741                 /*
  742                  * XXX do allow users to change the privileged flag PSL_RF.
  743                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  744                  * should sometimes set it there too.  tf_eflags is kept in
  745                  * the signal context during signal handling and there is no
  746                  * other place to remember it, so the PSL_RF bit may be
  747                  * corrupted by the signal handler without us knowing.
  748                  * Corruption of the PSL_RF bit at worst causes one more or
  749                  * one less debugger trap, so allowing it is fairly harmless.
  750                  */
  751                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  752                         return (EINVAL);
  753                 }
  754 
  755                 /*
  756                  * Don't allow users to load a valid privileged %cs.  Let the
  757                  * hardware check for invalid selectors, excess privilege in
  758                  * other selectors, invalid %eip's and invalid %esp's.
  759                  */
  760                 if (!CS_SECURE(scp->sc_cs)) {
  761                         trapsignal(td, SIGBUS, T_PROTFLT);
  762                         return (EINVAL);
  763                 }
  764                 regs->tf_ds = scp->sc_ds;
  765                 regs->tf_es = scp->sc_es;
  766                 regs->tf_fs = scp->sc_fs;
  767         }
  768 
  769         /* Restore remaining registers. */
  770         regs->tf_eax = scp->sc_eax;
  771         regs->tf_ebx = scp->sc_ebx;
  772         regs->tf_ecx = scp->sc_ecx;
  773         regs->tf_edx = scp->sc_edx;
  774         regs->tf_esi = scp->sc_esi;
  775         regs->tf_edi = scp->sc_edi;
  776         regs->tf_cs = scp->sc_cs;
  777         regs->tf_ss = scp->sc_ss;
  778         regs->tf_isp = scp->sc_isp;
  779         regs->tf_ebp = scp->sc_fp;
  780         regs->tf_esp = scp->sc_sp;
  781         regs->tf_eip = scp->sc_pc;
  782         regs->tf_eflags = eflags;
  783 
  784         PROC_LOCK(p);
  785 #if defined(COMPAT_43)
  786         if (scp->sc_onstack & 1)
  787                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  788         else
  789                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  790 #endif
  791         SIGSETOLD(td->td_sigmask, scp->sc_mask);
  792         SIG_CANTMASK(td->td_sigmask);
  793         signotify(td);
  794         PROC_UNLOCK(p);
  795         return (EJUSTRETURN);
  796 }
  797 #endif /* COMPAT_43 */
  798 
  799 #ifdef COMPAT_FREEBSD4
  800 /*
  801  * MPSAFE
  802  */
  803 int
  804 freebsd4_sigreturn(td, uap)
  805         struct thread *td;
  806         struct freebsd4_sigreturn_args /* {
  807                 const ucontext4 *sigcntxp;
  808         } */ *uap;
  809 {
  810         struct ucontext4 uc;
  811         struct proc *p = td->td_proc;
  812         struct trapframe *regs;
  813         const struct ucontext4 *ucp;
  814         int cs, eflags, error;
  815 
  816         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  817         if (error != 0)
  818                 return (error);
  819         ucp = &uc;
  820         regs = td->td_frame;
  821         eflags = ucp->uc_mcontext.mc_eflags;
  822         if (eflags & PSL_VM) {
  823                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  824                 struct vm86_kernel *vm86;
  825 
  826                 /*
  827                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  828                  * set up the vm86 area, and we can't enter vm86 mode.
  829                  */
  830                 if (td->td_pcb->pcb_ext == 0)
  831                         return (EINVAL);
  832                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  833                 if (vm86->vm86_inited == 0)
  834                         return (EINVAL);
  835 
  836                 /* Go back to user mode if both flags are set. */
  837                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  838                         trapsignal(td, SIGBUS, 0);
  839 
  840                 if (vm86->vm86_has_vme) {
  841                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  842                             (eflags & VME_USERCHANGE) | PSL_VM;
  843                 } else {
  844                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  845                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  846                             (eflags & VM_USERCHANGE) | PSL_VM;
  847                 }
  848                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  849                 tf->tf_eflags = eflags;
  850                 tf->tf_vm86_ds = tf->tf_ds;
  851                 tf->tf_vm86_es = tf->tf_es;
  852                 tf->tf_vm86_fs = tf->tf_fs;
  853                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  854                 tf->tf_ds = _udatasel;
  855                 tf->tf_es = _udatasel;
  856                 tf->tf_fs = _udatasel;
  857         } else {
  858                 /*
  859                  * Don't allow users to change privileged or reserved flags.
  860                  */
  861                 /*
  862                  * XXX do allow users to change the privileged flag PSL_RF.
  863                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  864                  * should sometimes set it there too.  tf_eflags is kept in
  865                  * the signal context during signal handling and there is no
  866                  * other place to remember it, so the PSL_RF bit may be
  867                  * corrupted by the signal handler without us knowing.
  868                  * Corruption of the PSL_RF bit at worst causes one more or
  869                  * one less debugger trap, so allowing it is fairly harmless.
  870                  */
  871                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  872                         printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
  873                         return (EINVAL);
  874                 }
  875 
  876                 /*
  877                  * Don't allow users to load a valid privileged %cs.  Let the
  878                  * hardware check for invalid selectors, excess privilege in
  879                  * other selectors, invalid %eip's and invalid %esp's.
  880                  */
  881                 cs = ucp->uc_mcontext.mc_cs;
  882                 if (!CS_SECURE(cs)) {
  883                         printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
  884                         trapsignal(td, SIGBUS, T_PROTFLT);
  885                         return (EINVAL);
  886                 }
  887 
  888                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  889         }
  890 
  891         PROC_LOCK(p);
  892 #if defined(COMPAT_43)
  893         if (ucp->uc_mcontext.mc_onstack & 1)
  894                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  895         else
  896                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  897 #endif
  898 
  899         td->td_sigmask = ucp->uc_sigmask;
  900         SIG_CANTMASK(td->td_sigmask);
  901         signotify(td);
  902         PROC_UNLOCK(p);
  903         return (EJUSTRETURN);
  904 }
  905 #endif  /* COMPAT_FREEBSD4 */
  906 
  907 /*
  908  * MPSAFE
  909  */
  910 int
  911 sigreturn(td, uap)
  912         struct thread *td;
  913         struct sigreturn_args /* {
  914                 const __ucontext *sigcntxp;
  915         } */ *uap;
  916 {
  917         ucontext_t uc;
  918         struct proc *p = td->td_proc;
  919         struct trapframe *regs;
  920         const ucontext_t *ucp;
  921         int cs, eflags, error, ret;
  922 
  923         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  924         if (error != 0)
  925                 return (error);
  926         ucp = &uc;
  927         regs = td->td_frame;
  928         eflags = ucp->uc_mcontext.mc_eflags;
  929         if (eflags & PSL_VM) {
  930                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  931                 struct vm86_kernel *vm86;
  932 
  933                 /*
  934                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  935                  * set up the vm86 area, and we can't enter vm86 mode.
  936                  */
  937                 if (td->td_pcb->pcb_ext == 0)
  938                         return (EINVAL);
  939                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  940                 if (vm86->vm86_inited == 0)
  941                         return (EINVAL);
  942 
  943                 /* Go back to user mode if both flags are set. */
  944                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  945                         trapsignal(td, SIGBUS, 0);
  946 
  947                 if (vm86->vm86_has_vme) {
  948                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  949                             (eflags & VME_USERCHANGE) | PSL_VM;
  950                 } else {
  951                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  952                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  953                             (eflags & VM_USERCHANGE) | PSL_VM;
  954                 }
  955                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  956                 tf->tf_eflags = eflags;
  957                 tf->tf_vm86_ds = tf->tf_ds;
  958                 tf->tf_vm86_es = tf->tf_es;
  959                 tf->tf_vm86_fs = tf->tf_fs;
  960                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  961                 tf->tf_ds = _udatasel;
  962                 tf->tf_es = _udatasel;
  963                 tf->tf_fs = _udatasel;
  964         } else {
  965                 /*
  966                  * Don't allow users to change privileged or reserved flags.
  967                  */
  968                 /*
  969                  * XXX do allow users to change the privileged flag PSL_RF.
  970                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  971                  * should sometimes set it there too.  tf_eflags is kept in
  972                  * the signal context during signal handling and there is no
  973                  * other place to remember it, so the PSL_RF bit may be
  974                  * corrupted by the signal handler without us knowing.
  975                  * Corruption of the PSL_RF bit at worst causes one more or
  976                  * one less debugger trap, so allowing it is fairly harmless.
  977                  */
  978                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  979                         printf("sigreturn: eflags = 0x%x\n", eflags);
  980                         return (EINVAL);
  981                 }
  982 
  983                 /*
  984                  * Don't allow users to load a valid privileged %cs.  Let the
  985                  * hardware check for invalid selectors, excess privilege in
  986                  * other selectors, invalid %eip's and invalid %esp's.
  987                  */
  988                 cs = ucp->uc_mcontext.mc_cs;
  989                 if (!CS_SECURE(cs)) {
  990                         printf("sigreturn: cs = 0x%x\n", cs);
  991                         trapsignal(td, SIGBUS, T_PROTFLT);
  992                         return (EINVAL);
  993                 }
  994 
  995                 ret = set_fpcontext(td, &ucp->uc_mcontext);
  996                 if (ret != 0)
  997                         return (ret);
  998                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  999         }
 1000 
 1001         PROC_LOCK(p);
 1002 #if defined(COMPAT_43)
 1003         if (ucp->uc_mcontext.mc_onstack & 1)
 1004                 td->td_sigstk.ss_flags |= SS_ONSTACK;
 1005         else
 1006                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 1007 #endif
 1008 
 1009         td->td_sigmask = ucp->uc_sigmask;
 1010         SIG_CANTMASK(td->td_sigmask);
 1011         signotify(td);
 1012         PROC_UNLOCK(p);
 1013         return (EJUSTRETURN);
 1014 }
 1015 
 1016 /*
 1017  * Machine dependent boot() routine
 1018  *
 1019  * I haven't seen anything to put here yet
 1020  * Possibly some stuff might be grafted back here from boot()
 1021  */
 1022 void
 1023 cpu_boot(int howto)
 1024 {
 1025 }
 1026 
 1027 /* Get current clock frequency for the given cpu id. */
 1028 int
 1029 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 1030 {
 1031         register_t reg;
 1032         uint64_t tsc1, tsc2;
 1033 
 1034         if (pcpu_find(cpu_id) == NULL || rate == NULL)
 1035                 return (EINVAL);
 1036         if (!tsc_present)
 1037                 return (EOPNOTSUPP);
 1038 
 1039         /* If we're booting, trust the rate calibrated moments ago. */
 1040         if (cold) {
 1041                 *rate = tsc_freq;
 1042                 return (0);
 1043         }
 1044 
 1045 #ifdef SMP
 1046         /* Schedule ourselves on the indicated cpu. */
 1047         mtx_lock_spin(&sched_lock);
 1048         sched_bind(curthread, cpu_id);
 1049         mtx_unlock_spin(&sched_lock);
 1050 #endif
 1051 
 1052         /* Calibrate by measuring a short delay. */
 1053         reg = intr_disable();
 1054         tsc1 = rdtsc();
 1055         DELAY(1000);
 1056         tsc2 = rdtsc();
 1057         intr_restore(reg);
 1058 
 1059 #ifdef SMP
 1060         mtx_lock_spin(&sched_lock);
 1061         sched_unbind(curthread);
 1062         mtx_unlock_spin(&sched_lock);
 1063 #endif
 1064 
 1065         /*
 1066          * Calculate the difference in readings, convert to Mhz, and
 1067          * subtract 0.5% of the total.  Empirical testing has shown that
 1068          * overhead in DELAY() works out to approximately this value.
 1069          */
 1070         tsc2 -= tsc1;
 1071         *rate = tsc2 * 1000 - tsc2 * 5;
 1072         return (0);
 1073 }
 1074 
 1075 /*
 1076  * Shutdown the CPU as much as possible
 1077  */
 1078 void
 1079 cpu_halt(void)
 1080 {
 1081         for (;;)
 1082                 __asm__ ("hlt");
 1083 }
 1084 
 1085 /*
 1086  * Hook to idle the CPU when possible.  In the SMP case we default to
 1087  * off because a halted cpu will not currently pick up a new thread in the
 1088  * run queue until the next timer tick.  If turned on this will result in
 1089  * approximately a 4.2% loss in real time performance in buildworld tests
 1090  * (but improves user and sys times oddly enough), and saves approximately
 1091  * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
 1092  *
 1093  * XXX we need to have a cpu mask of idle cpus and generate an IPI or
 1094  * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
 1095  * Then we can have our cake and eat it too.
 1096  *
 1097  * XXX I'm turning it on for SMP as well by default for now.  It seems to
 1098  * help lock contention somewhat, and this is critical for HTT. -Peter
 1099  */
 1100 static int      cpu_idle_hlt = 1;
 1101 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
 1102     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 1103 
 1104 static void
 1105 cpu_idle_default(void)
 1106 {
 1107         /*
 1108          * we must absolutely guarentee that hlt is the
 1109          * absolute next instruction after sti or we
 1110          * introduce a timing window.
 1111          */
 1112         __asm __volatile("sti; hlt");
 1113 }
 1114 
 1115 /*
 1116  * Note that we have to be careful here to avoid a race between checking
 1117  * sched_runnable() and actually halting.  If we don't do this, we may waste
 1118  * the time between calling hlt and the next interrupt even though there
 1119  * is a runnable process.
 1120  */
 1121 void
 1122 cpu_idle(void)
 1123 {
 1124 
 1125 #ifdef SMP
 1126         if (mp_grab_cpu_hlt())
 1127                 return;
 1128 #endif
 1129 
 1130         if (cpu_idle_hlt) {
 1131                 disable_intr();
 1132                 if (sched_runnable())
 1133                         enable_intr();
 1134                 else
 1135                         (*cpu_idle_hook)();
 1136         }
 1137 }
 1138 
 1139 /* Other subsystems (e.g., ACPI) can hook this later. */
 1140 void (*cpu_idle_hook)(void) = cpu_idle_default;
 1141 
 1142 /*
 1143  * Clear registers on exec
 1144  */
 1145 void
 1146 exec_setregs(td, entry, stack, ps_strings)
 1147         struct thread *td;
 1148         u_long entry;
 1149         u_long stack;
 1150         u_long ps_strings;
 1151 {
 1152         struct trapframe *regs = td->td_frame;
 1153         struct pcb *pcb = td->td_pcb;
 1154 
 1155         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1156         pcb->pcb_gs = _udatasel;
 1157         load_gs(_udatasel);
 1158 
 1159         if (td->td_proc->p_md.md_ldt)
 1160                 user_ldt_free(td);
 1161   
 1162         bzero((char *)regs, sizeof(struct trapframe));
 1163         regs->tf_eip = entry;
 1164         regs->tf_esp = stack;
 1165         regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 1166         regs->tf_ss = _udatasel;
 1167         regs->tf_ds = _udatasel;
 1168         regs->tf_es = _udatasel;
 1169         regs->tf_fs = _udatasel;
 1170         regs->tf_cs = _ucodesel;
 1171 
 1172         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1173         regs->tf_ebx = ps_strings;
 1174 
 1175         /*
 1176          * Reset the hardware debug registers if they were in use.
 1177          * They won't have any meaning for the newly exec'd process.  
 1178          */
 1179         if (pcb->pcb_flags & PCB_DBREGS) {
 1180                 pcb->pcb_dr0 = 0;
 1181                 pcb->pcb_dr1 = 0;
 1182                 pcb->pcb_dr2 = 0;
 1183                 pcb->pcb_dr3 = 0;
 1184                 pcb->pcb_dr6 = 0;
 1185                 pcb->pcb_dr7 = 0;
 1186                 if (pcb == PCPU_GET(curpcb)) {
 1187                         /*
 1188                          * Clear the debug registers on the running
 1189                          * CPU, otherwise they will end up affecting
 1190                          * the next process we switch to.
 1191                          */
 1192                         reset_dbregs();
 1193                 }
 1194                 pcb->pcb_flags &= ~PCB_DBREGS;
 1195         }
 1196 
 1197         /*
 1198          * Initialize the math emulator (if any) for the current process.
 1199          * Actually, just clear the bit that says that the emulator has
 1200          * been initialized.  Initialization is delayed until the process
 1201          * traps to the emulator (if it is done at all) mainly because
 1202          * emulators don't provide an entry point for initialization.
 1203          */
 1204         td->td_pcb->pcb_flags &= ~FP_SOFTFP;
 1205 
 1206         /*
 1207          * Drop the FP state if we hold it, so that the process gets a
 1208          * clean FP state if it uses the FPU again.
 1209          */
 1210         fpstate_drop(td);
 1211 
 1212         /*
 1213          * XXX - Linux emulator
 1214          * Make sure sure edx is 0x0 on entry. Linux binaries depend
 1215          * on it.
 1216          */
 1217         td->td_retval[1] = 0;
 1218 }
 1219 
 1220 void
 1221 cpu_setregs(void)
 1222 {
 1223         unsigned int cr0;
 1224 
 1225         cr0 = rcr0();
 1226         /*
 1227          * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
 1228          * BSP.  See the comments there about why we set them.
 1229          */
 1230         cr0 |= CR0_MP | CR0_NE | CR0_TS;
 1231 #ifndef I386_CPU
 1232         cr0 |= CR0_WP | CR0_AM;
 1233 #endif
 1234         load_cr0(cr0);
 1235         load_gs(_udatasel);
 1236 }
 1237 
 1238 static int
 1239 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 1240 {
 1241         int error;
 1242         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 1243                 req);
 1244         if (!error && req->newptr)
 1245                 resettodr();
 1246         return (error);
 1247 }
 1248 
 1249 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 1250         &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 1251 
 1252 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 1253         CTLFLAG_RW, &disable_rtc_set, 0, "");
 1254 
 1255 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 1256         CTLFLAG_RD, &bootinfo, bootinfo, "");
 1257 
 1258 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 1259         CTLFLAG_RW, &wall_cmos_clock, 0, "");
 1260 
 1261 u_long bootdev;         /* not a struct cdev *- encoding is different */
 1262 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1263         CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 1264 
 1265 /*
 1266  * Initialize 386 and configure to run kernel
 1267  */
 1268 
 1269 /*
 1270  * Initialize segments & interrupt table
 1271  */
 1272 
 1273 int _default_ldt;
 1274 union descriptor gdt[NGDT * MAXCPU];    /* global descriptor table */
 1275 static struct gate_descriptor idt0[NIDT];
 1276 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1277 union descriptor ldt[NLDT];             /* local descriptor table */
 1278 struct region_descriptor r_gdt, r_idt;  /* table descriptors */
 1279 
 1280 int private_tss;                        /* flag indicating private tss */
 1281 
 1282 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 1283 extern int has_f00f_bug;
 1284 #endif
 1285 
 1286 static struct i386tss dblfault_tss;
 1287 static char dblfault_stack[PAGE_SIZE];
 1288 
 1289 extern  vm_offset_t     proc0kstack;
 1290 
 1291 
 1292 /* software prototypes -- in more palatable form */
 1293 struct soft_segment_descriptor gdt_segs[] = {
 1294 /* GNULL_SEL    0 Null Descriptor */
 1295 {       0x0,                    /* segment base address  */
 1296         0x0,                    /* length */
 1297         0,                      /* segment type */
 1298         0,                      /* segment descriptor priority level */
 1299         0,                      /* segment descriptor present */
 1300         0, 0,
 1301         0,                      /* default 32 vs 16 bit size */
 1302         0                       /* limit granularity (byte/page units)*/ },
 1303 /* GCODE_SEL    1 Code Descriptor for kernel */
 1304 {       0x0,                    /* segment base address  */
 1305         0xfffff,                /* length - all address space */
 1306         SDT_MEMERA,             /* segment type */
 1307         0,                      /* segment descriptor priority level */
 1308         1,                      /* segment descriptor present */
 1309         0, 0,
 1310         1,                      /* default 32 vs 16 bit size */
 1311         1                       /* limit granularity (byte/page units)*/ },
 1312 /* GDATA_SEL    2 Data Descriptor for kernel */
 1313 {       0x0,                    /* segment base address  */
 1314         0xfffff,                /* length - all address space */
 1315         SDT_MEMRWA,             /* segment type */
 1316         0,                      /* segment descriptor priority level */
 1317         1,                      /* segment descriptor present */
 1318         0, 0,
 1319         1,                      /* default 32 vs 16 bit size */
 1320         1                       /* limit granularity (byte/page units)*/ },
 1321 /* GPRIV_SEL    3 SMP Per-Processor Private Data Descriptor */
 1322 {       0x0,                    /* segment base address  */
 1323         0xfffff,                /* length - all address space */
 1324         SDT_MEMRWA,             /* segment type */
 1325         0,                      /* segment descriptor priority level */
 1326         1,                      /* segment descriptor present */
 1327         0, 0,
 1328         1,                      /* default 32 vs 16 bit size */
 1329         1                       /* limit granularity (byte/page units)*/ },
 1330 /* GPROC0_SEL   4 Proc 0 Tss Descriptor */
 1331 {
 1332         0x0,                    /* segment base address */
 1333         sizeof(struct i386tss)-1,/* length  */
 1334         SDT_SYS386TSS,          /* segment type */
 1335         0,                      /* segment descriptor priority level */
 1336         1,                      /* segment descriptor present */
 1337         0, 0,
 1338         0,                      /* unused - default 32 vs 16 bit size */
 1339         0                       /* limit granularity (byte/page units)*/ },
 1340 /* GLDT_SEL     5 LDT Descriptor */
 1341 {       (int) ldt,              /* segment base address  */
 1342         sizeof(ldt)-1,          /* length - all address space */
 1343         SDT_SYSLDT,             /* segment type */
 1344         SEL_UPL,                /* segment descriptor priority level */
 1345         1,                      /* segment descriptor present */
 1346         0, 0,
 1347         0,                      /* unused - default 32 vs 16 bit size */
 1348         0                       /* limit granularity (byte/page units)*/ },
 1349 /* GUSERLDT_SEL 6 User LDT Descriptor per process */
 1350 {       (int) ldt,              /* segment base address  */
 1351         (512 * sizeof(union descriptor)-1),             /* length */
 1352         SDT_SYSLDT,             /* segment type */
 1353         0,                      /* segment descriptor priority level */
 1354         1,                      /* segment descriptor present */
 1355         0, 0,
 1356         0,                      /* unused - default 32 vs 16 bit size */
 1357         0                       /* limit granularity (byte/page units)*/ },
 1358 /* GNDIS_SEL    7 NDIS Descriptor */
 1359 {       0x0,                    /* segment base address  */
 1360         0x0,                    /* length - all address space */
 1361         0,                      /* segment type */
 1362         0,                      /* segment descriptor priority level */
 1363         0,                      /* segment descriptor present */
 1364         0, 0,
 1365         0,                      /* default 32 vs 16 bit size */
 1366         0                       /* limit granularity (byte/page units)*/ },
 1367 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1368 {       0x400,                  /* segment base address */
 1369         0xfffff,                /* length */
 1370         SDT_MEMRWA,             /* segment type */
 1371         0,                      /* segment descriptor priority level */
 1372         1,                      /* segment descriptor present */
 1373         0, 0,
 1374         1,                      /* default 32 vs 16 bit size */
 1375         1                       /* limit granularity (byte/page units)*/ },
 1376 /* GPANIC_SEL   9 Panic Tss Descriptor */
 1377 {       (int) &dblfault_tss,    /* segment base address  */
 1378         sizeof(struct i386tss)-1,/* length - all address space */
 1379         SDT_SYS386TSS,          /* segment type */
 1380         0,                      /* segment descriptor priority level */
 1381         1,                      /* segment descriptor present */
 1382         0, 0,
 1383         0,                      /* unused - default 32 vs 16 bit size */
 1384         0                       /* limit granularity (byte/page units)*/ },
 1385 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 1386 {       0,                      /* segment base address (overwritten)  */
 1387         0xfffff,                /* length */
 1388         SDT_MEMERA,             /* segment type */
 1389         0,                      /* segment descriptor priority level */
 1390         1,                      /* segment descriptor present */
 1391         0, 0,
 1392         0,                      /* default 32 vs 16 bit size */
 1393         1                       /* limit granularity (byte/page units)*/ },
 1394 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 1395 {       0,                      /* segment base address (overwritten)  */
 1396         0xfffff,                /* length */
 1397         SDT_MEMERA,             /* segment type */
 1398         0,                      /* segment descriptor priority level */
 1399         1,                      /* segment descriptor present */
 1400         0, 0,
 1401         0,                      /* default 32 vs 16 bit size */
 1402         1                       /* limit granularity (byte/page units)*/ },
 1403 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 1404 {       0,                      /* segment base address (overwritten) */
 1405         0xfffff,                /* length */
 1406         SDT_MEMRWA,             /* segment type */
 1407         0,                      /* segment descriptor priority level */
 1408         1,                      /* segment descriptor present */
 1409         0, 0,
 1410         1,                      /* default 32 vs 16 bit size */
 1411         1                       /* limit granularity (byte/page units)*/ },
 1412 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 1413 {       0,                      /* segment base address (overwritten) */
 1414         0xfffff,                /* length */
 1415         SDT_MEMRWA,             /* segment type */
 1416         0,                      /* segment descriptor priority level */
 1417         1,                      /* segment descriptor present */
 1418         0, 0,
 1419         0,                      /* default 32 vs 16 bit size */
 1420         1                       /* limit granularity (byte/page units)*/ },
 1421 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 1422 {       0,                      /* segment base address (overwritten) */
 1423         0xfffff,                /* length */
 1424         SDT_MEMRWA,             /* segment type */
 1425         0,                      /* segment descriptor priority level */
 1426         1,                      /* segment descriptor present */
 1427         0, 0,
 1428         0,                      /* default 32 vs 16 bit size */
 1429         1                       /* limit granularity (byte/page units)*/ },
 1430 /* GUFS_SEL     14 %fs Descriptor for user */
 1431 {       0x0,                    /* segment base address  */
 1432         0xfffff,                /* length - all address space */
 1433         SDT_MEMRWA,             /* segment type */
 1434         SEL_UPL,                /* segment descriptor priority level */
 1435         1,                      /* segment descriptor present */
 1436         0, 0,
 1437         1,                      /* default 32 vs 16 bit size */
 1438         1                       /* limit granularity (byte/page units)*/ },
 1439 /* GUGS_SEL     15 %gs Descriptor for user */
 1440 {       0x0,                    /* segment base address  */
 1441         0xfffff,                /* length - all address space */
 1442         SDT_MEMRWA,             /* segment type */
 1443         SEL_UPL,                /* segment descriptor priority level */
 1444         1,                      /* segment descriptor present */
 1445         0, 0,
 1446         1,                      /* default 32 vs 16 bit size */
 1447         1                       /* limit granularity (byte/page units)*/ },
 1448 };
 1449 
 1450 static struct soft_segment_descriptor ldt_segs[] = {
 1451         /* Null Descriptor - overwritten by call gate */
 1452 {       0x0,                    /* segment base address  */
 1453         0x0,                    /* length - all address space */
 1454         0,                      /* segment type */
 1455         0,                      /* segment descriptor priority level */
 1456         0,                      /* segment descriptor present */
 1457         0, 0,
 1458         0,                      /* default 32 vs 16 bit size */
 1459         0                       /* limit granularity (byte/page units)*/ },
 1460         /* Null Descriptor - overwritten by call gate */
 1461 {       0x0,                    /* segment base address  */
 1462         0x0,                    /* length - all address space */
 1463         0,                      /* segment type */
 1464         0,                      /* segment descriptor priority level */
 1465         0,                      /* segment descriptor present */
 1466         0, 0,
 1467         0,                      /* default 32 vs 16 bit size */
 1468         0                       /* limit granularity (byte/page units)*/ },
 1469         /* Null Descriptor - overwritten by call gate */
 1470 {       0x0,                    /* segment base address  */
 1471         0x0,                    /* length - all address space */
 1472         0,                      /* segment type */
 1473         0,                      /* segment descriptor priority level */
 1474         0,                      /* segment descriptor present */
 1475         0, 0,
 1476         0,                      /* default 32 vs 16 bit size */
 1477         0                       /* limit granularity (byte/page units)*/ },
 1478         /* Code Descriptor for user */
 1479 {       0x0,                    /* segment base address  */
 1480         0xfffff,                /* length - all address space */
 1481         SDT_MEMERA,             /* segment type */
 1482         SEL_UPL,                /* segment descriptor priority level */
 1483         1,                      /* segment descriptor present */
 1484         0, 0,
 1485         1,                      /* default 32 vs 16 bit size */
 1486         1                       /* limit granularity (byte/page units)*/ },
 1487         /* Null Descriptor - overwritten by call gate */
 1488 {       0x0,                    /* segment base address  */
 1489         0x0,                    /* length - all address space */
 1490         0,                      /* segment type */
 1491         0,                      /* segment descriptor priority level */
 1492         0,                      /* segment descriptor present */
 1493         0, 0,
 1494         0,                      /* default 32 vs 16 bit size */
 1495         0                       /* limit granularity (byte/page units)*/ },
 1496         /* Data Descriptor for user */
 1497 {       0x0,                    /* segment base address  */
 1498         0xfffff,                /* length - all address space */
 1499         SDT_MEMRWA,             /* segment type */
 1500         SEL_UPL,                /* segment descriptor priority level */
 1501         1,                      /* segment descriptor present */
 1502         0, 0,
 1503         1,                      /* default 32 vs 16 bit size */
 1504         1                       /* limit granularity (byte/page units)*/ },
 1505 };
 1506 
 1507 void
 1508 setidt(idx, func, typ, dpl, selec)
 1509         int idx;
 1510         inthand_t *func;
 1511         int typ;
 1512         int dpl;
 1513         int selec;
 1514 {
 1515         struct gate_descriptor *ip;
 1516 
 1517         ip = idt + idx;
 1518         ip->gd_looffset = (int)func;
 1519         ip->gd_selector = selec;
 1520         ip->gd_stkcpy = 0;
 1521         ip->gd_xx = 0;
 1522         ip->gd_type = typ;
 1523         ip->gd_dpl = dpl;
 1524         ip->gd_p = 1;
 1525         ip->gd_hioffset = ((int)func)>>16 ;
 1526 }
 1527 
 1528 #define IDTVEC(name)    __CONCAT(X,name)
 1529 
 1530 extern inthand_t
 1531         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1532         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1533         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1534         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1535         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 1536 
 1537 #ifdef DDB
 1538 /*
 1539  * Display the index and function name of any IDT entries that don't use
 1540  * the default 'rsvd' entry point.
 1541  */
 1542 DB_SHOW_COMMAND(idt, db_show_idt)
 1543 {
 1544         struct gate_descriptor *ip;
 1545         int idx, quit;
 1546         uintptr_t func;
 1547 
 1548         ip = idt;
 1549         db_setup_paging(db_simple_pager, &quit, db_lines_per_page);
 1550         for (idx = 0, quit = 0; idx < NIDT; idx++) {
 1551                 func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 1552                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
 1553                         db_printf("%3d\t", idx);
 1554                         db_printsym(func, DB_STGY_PROC);
 1555                         db_printf("\n");
 1556                 }
 1557                 ip++;
 1558         }
 1559 }
 1560 #endif
 1561 
 1562 void
 1563 sdtossd(sd, ssd)
 1564         struct segment_descriptor *sd;
 1565         struct soft_segment_descriptor *ssd;
 1566 {
 1567         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1568         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1569         ssd->ssd_type  = sd->sd_type;
 1570         ssd->ssd_dpl   = sd->sd_dpl;
 1571         ssd->ssd_p     = sd->sd_p;
 1572         ssd->ssd_def32 = sd->sd_def32;
 1573         ssd->ssd_gran  = sd->sd_gran;
 1574 }
 1575 
 1576 #define PHYSMAP_SIZE    (2 * 8)
 1577 
 1578 /*
 1579  * Populate the (physmap) array with base/bound pairs describing the
 1580  * available physical memory in the system, then test this memory and
 1581  * build the phys_avail array describing the actually-available memory.
 1582  *
 1583  * If we cannot accurately determine the physical memory map, then use
 1584  * value from the 0xE801 call, and failing that, the RTC.
 1585  *
 1586  * Total memory size may be set by the kernel environment variable
 1587  * hw.physmem or the compile-time define MAXMEM.
 1588  *
 1589  * XXX first should be vm_paddr_t.
 1590  */
 1591 static void
 1592 getmemsize(int first)
 1593 {
 1594         int i, physmap_idx, pa_indx;
 1595         int hasbrokenint12;
 1596         u_int extmem;
 1597         struct vm86frame vmf;
 1598         struct vm86context vmc;
 1599         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1600         pt_entry_t *pte;
 1601         char *cp;
 1602         struct bios_smap *smap;
 1603 
 1604         hasbrokenint12 = 0;
 1605         TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 1606         bzero(&vmf, sizeof(vmf));
 1607         bzero(physmap, sizeof(physmap));
 1608         basemem = 0;
 1609 
 1610         /*
 1611          * Some newer BIOSes has broken INT 12H implementation which cause
 1612          * kernel panic immediately. In this case, we need to scan SMAP
 1613          * with INT 15:E820 first, then determine base memory size.
 1614          */
 1615         if (hasbrokenint12) {
 1616                 goto int15e820;
 1617         }
 1618 
 1619         /*
 1620          * Perform "base memory" related probes & setup
 1621          */
 1622         vm86_intcall(0x12, &vmf);
 1623         basemem = vmf.vmf_ax;
 1624         if (basemem > 640) {
 1625                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1626                         basemem);
 1627                 basemem = 640;
 1628         }
 1629 
 1630         /*
 1631          * XXX if biosbasemem is now < 640, there is a `hole'
 1632          * between the end of base memory and the start of
 1633          * ISA memory.  The hole may be empty or it may
 1634          * contain BIOS code or data.  Map it read/write so
 1635          * that the BIOS can write to it.  (Memory from 0 to
 1636          * the physical end of the kernel is mapped read-only
 1637          * to begin with and then parts of it are remapped.
 1638          * The parts that aren't remapped form holes that
 1639          * remain read-only and are unused by the kernel.
 1640          * The base memory area is below the physical end of
 1641          * the kernel and right now forms a read-only hole.
 1642          * The part of it from PAGE_SIZE to
 1643          * (trunc_page(biosbasemem * 1024) - 1) will be
 1644          * remapped and used by the kernel later.)
 1645          *
 1646          * This code is similar to the code used in
 1647          * pmap_mapdev, but since no memory needs to be
 1648          * allocated we simply change the mapping.
 1649          */
 1650         for (pa = trunc_page(basemem * 1024);
 1651              pa < ISA_HOLE_START; pa += PAGE_SIZE)
 1652                 pmap_kenter(KERNBASE + pa, pa);
 1653 
 1654         /*
 1655          * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 1656          * the vm86 page table so that vm86 can scribble on them using
 1657          * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 1658          * page 0, at least as initialized here?
 1659          */
 1660         pte = (pt_entry_t *)vm86paddr;
 1661         for (i = basemem / 4; i < 160; i++)
 1662                 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1663 
 1664 int15e820:
 1665         /*
 1666          * map page 1 R/W into the kernel page table so we can use it
 1667          * as a buffer.  The kernel will unmap this page later.
 1668          */
 1669         pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 1670 
 1671         /*
 1672          * get memory map with INT 15:E820
 1673          */
 1674         vmc.npages = 0;
 1675         smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 1676         vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 1677 
 1678         physmap_idx = 0;
 1679         vmf.vmf_ebx = 0;
 1680         do {
 1681                 vmf.vmf_eax = 0xE820;
 1682                 vmf.vmf_edx = SMAP_SIG;
 1683                 vmf.vmf_ecx = sizeof(struct bios_smap);
 1684                 i = vm86_datacall(0x15, &vmf, &vmc);
 1685                 if (i || vmf.vmf_eax != SMAP_SIG)
 1686                         break;
 1687                 if (boothowto & RB_VERBOSE)
 1688                         printf("SMAP type=%02x base=%016llx len=%016llx\n",
 1689                             smap->type, smap->base, smap->length);
 1690 
 1691                 if (smap->type != 0x01)
 1692                         goto next_run;
 1693 
 1694                 if (smap->length == 0)
 1695                         goto next_run;
 1696 
 1697 #ifndef PAE
 1698                 if (smap->base >= 0xffffffff) {
 1699                         printf("%uK of memory above 4GB ignored\n",
 1700                             (u_int)(smap->length / 1024));
 1701                         goto next_run;
 1702                 }
 1703 #endif
 1704 
 1705                 for (i = 0; i <= physmap_idx; i += 2) {
 1706                         if (smap->base < physmap[i + 1]) {
 1707                                 if (boothowto & RB_VERBOSE)
 1708                                         printf(
 1709         "Overlapping or non-montonic memory region, ignoring second region\n");
 1710                                 goto next_run;
 1711                         }
 1712                 }
 1713 
 1714                 if (smap->base == physmap[physmap_idx + 1]) {
 1715                         physmap[physmap_idx + 1] += smap->length;
 1716                         goto next_run;
 1717                 }
 1718 
 1719                 physmap_idx += 2;
 1720                 if (physmap_idx == PHYSMAP_SIZE) {
 1721                         printf(
 1722                 "Too many segments in the physical address map, giving up\n");
 1723                         break;
 1724                 }
 1725                 physmap[physmap_idx] = smap->base;
 1726                 physmap[physmap_idx + 1] = smap->base + smap->length;
 1727 next_run: ;
 1728         } while (vmf.vmf_ebx != 0);
 1729 
 1730         /*
 1731          * Perform "base memory" related probes & setup based on SMAP
 1732          */
 1733         if (basemem == 0) {
 1734                 for (i = 0; i <= physmap_idx; i += 2) {
 1735                         if (physmap[i] == 0x00000000) {
 1736                                 basemem = physmap[i + 1] / 1024;
 1737                                 break;
 1738                         }
 1739                 }
 1740 
 1741                 /*
 1742                  * XXX this function is horribly organized and has to the same
 1743                  * things that it does above here.
 1744                  */
 1745                 if (basemem == 0)
 1746                         basemem = 640;
 1747                 if (basemem > 640) {
 1748                         printf(
 1749                     "Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1750                             basemem);
 1751                         basemem = 640;
 1752                 }
 1753 
 1754                 /*
 1755                  * Let vm86 scribble on pages between basemem and
 1756                  * ISA_HOLE_START, as above.
 1757                  */
 1758                 for (pa = trunc_page(basemem * 1024);
 1759                      pa < ISA_HOLE_START; pa += PAGE_SIZE)
 1760                         pmap_kenter(KERNBASE + pa, pa);
 1761                 pte = (pt_entry_t *)vm86paddr;
 1762                 for (i = basemem / 4; i < 160; i++)
 1763                         pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1764         }
 1765 
 1766         if (physmap[1] != 0)
 1767                 goto physmap_done;
 1768 
 1769         /*
 1770          * If we failed above, try memory map with INT 15:E801
 1771          */
 1772         vmf.vmf_ax = 0xE801;
 1773         if (vm86_intcall(0x15, &vmf) == 0) {
 1774                 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 1775         } else {
 1776 #if 0
 1777                 vmf.vmf_ah = 0x88;
 1778                 vm86_intcall(0x15, &vmf);
 1779                 extmem = vmf.vmf_ax;
 1780 #else
 1781                 /*
 1782                  * Prefer the RTC value for extended memory.
 1783                  */
 1784                 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 1785 #endif
 1786         }
 1787 
 1788         /*
 1789          * Special hack for chipsets that still remap the 384k hole when
 1790          * there's 16MB of memory - this really confuses people that
 1791          * are trying to use bus mastering ISA controllers with the
 1792          * "16MB limit"; they only have 16MB, but the remapping puts
 1793          * them beyond the limit.
 1794          *
 1795          * If extended memory is between 15-16MB (16-17MB phys address range),
 1796          *      chop it to 15MB.
 1797          */
 1798         if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 1799                 extmem = 15 * 1024;
 1800 
 1801         physmap[0] = 0;
 1802         physmap[1] = basemem * 1024;
 1803         physmap_idx = 2;
 1804         physmap[physmap_idx] = 0x100000;
 1805         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1806 
 1807 physmap_done:
 1808         /*
 1809          * Now, physmap contains a map of physical memory.
 1810          */
 1811 
 1812 #ifdef SMP
 1813         /* make hole for AP bootstrap code */
 1814         physmap[1] = mp_bootaddress(physmap[1]);
 1815 #endif
 1816 
 1817         /*
 1818          * Maxmem isn't the "maximum memory", it's one larger than the
 1819          * highest page of the physical address space.  It should be
 1820          * called something like "Maxphyspage".  We may adjust this 
 1821          * based on ``hw.physmem'' and the results of the memory test.
 1822          */
 1823         Maxmem = atop(physmap[physmap_idx + 1]);
 1824 
 1825 #ifdef MAXMEM
 1826         Maxmem = MAXMEM / 4;
 1827 #endif
 1828 
 1829         /*
 1830          * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 1831          * for the appropriate modifiers.  This overrides MAXMEM.
 1832          */
 1833         if ((cp = getenv("hw.physmem")) != NULL) {
 1834                 u_int64_t AllowMem, sanity;
 1835                 char *ep;
 1836 
 1837                 sanity = AllowMem = strtouq(cp, &ep, 0);
 1838                 if ((ep != cp) && (*ep != 0)) {
 1839                         switch(*ep) {
 1840                         case 'g':
 1841                         case 'G':
 1842                                 AllowMem <<= 10;
 1843                         case 'm':
 1844                         case 'M':
 1845                                 AllowMem <<= 10;
 1846                         case 'k':
 1847                         case 'K':
 1848                                 AllowMem <<= 10;
 1849                                 break;
 1850                         default:
 1851                                 AllowMem = sanity = 0;
 1852                         }
 1853                         if (AllowMem < sanity)
 1854                                 AllowMem = 0;
 1855                 }
 1856                 if (AllowMem == 0)
 1857                         printf("Ignoring invalid memory size of '%s'\n", cp);
 1858                 else
 1859                         Maxmem = atop(AllowMem);
 1860                 freeenv(cp);
 1861         }
 1862 
 1863         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1864             (boothowto & RB_VERBOSE))
 1865                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1866 
 1867         /*
 1868          * If Maxmem has been increased beyond what the system has detected,
 1869          * extend the last memory segment to the new limit.
 1870          */ 
 1871         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 1872                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 1873 
 1874         /* call pmap initialization to make new kernel address space */
 1875         pmap_bootstrap(first, 0);
 1876 
 1877         /*
 1878          * Size up each available chunk of physical memory.
 1879          */
 1880         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 1881         pa_indx = 0;
 1882         phys_avail[pa_indx++] = physmap[0];
 1883         phys_avail[pa_indx] = physmap[0];
 1884         pte = CMAP1;
 1885 
 1886         /*
 1887          * physmap is in bytes, so when converting to page boundaries,
 1888          * round up the start address and round down the end address.
 1889          */
 1890         for (i = 0; i <= physmap_idx; i += 2) {
 1891                 vm_paddr_t end;
 1892 
 1893                 end = ptoa((vm_paddr_t)Maxmem);
 1894                 if (physmap[i + 1] < end)
 1895                         end = trunc_page(physmap[i + 1]);
 1896                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1897                         int tmp, page_bad;
 1898                         int *ptr = (int *)CADDR1;
 1899 
 1900                         /*
 1901                          * block out kernel memory as not available.
 1902                          */
 1903                         if (pa >= KERNLOAD && pa < first)
 1904                                 continue;
 1905         
 1906                         page_bad = FALSE;
 1907 
 1908                         /*
 1909                          * map page into kernel: valid, read/write,non-cacheable
 1910                          */
 1911                         *pte = pa | PG_V | PG_RW | PG_N;
 1912                         invltlb();
 1913 
 1914                         tmp = *(int *)ptr;
 1915                         /*
 1916                          * Test for alternating 1's and 0's
 1917                          */
 1918                         *(volatile int *)ptr = 0xaaaaaaaa;
 1919                         if (*(volatile int *)ptr != 0xaaaaaaaa) {
 1920                                 page_bad = TRUE;
 1921                         }
 1922                         /*
 1923                          * Test for alternating 0's and 1's
 1924                          */
 1925                         *(volatile int *)ptr = 0x55555555;
 1926                         if (*(volatile int *)ptr != 0x55555555) {
 1927                         page_bad = TRUE;
 1928                         }
 1929                         /*
 1930                          * Test for all 1's
 1931                          */
 1932                         *(volatile int *)ptr = 0xffffffff;
 1933                         if (*(volatile int *)ptr != 0xffffffff) {
 1934                                 page_bad = TRUE;
 1935                         }
 1936                         /*
 1937                          * Test for all 0's
 1938                          */
 1939                         *(volatile int *)ptr = 0x0;
 1940                         if (*(volatile int *)ptr != 0x0) {
 1941                                 page_bad = TRUE;
 1942                         }
 1943                         /*
 1944                          * Restore original value.
 1945                          */
 1946                         *(int *)ptr = tmp;
 1947 
 1948                         /*
 1949                          * Adjust array of valid/good pages.
 1950                          */
 1951                         if (page_bad == TRUE) {
 1952                                 continue;
 1953                         }
 1954                         /*
 1955                          * If this good page is a continuation of the
 1956                          * previous set of good pages, then just increase
 1957                          * the end pointer. Otherwise start a new chunk.
 1958                          * Note that "end" points one higher than end,
 1959                          * making the range >= start and < end.
 1960                          * If we're also doing a speculative memory
 1961                          * test and we at or past the end, bump up Maxmem
 1962                          * so that we keep going. The first bad page
 1963                          * will terminate the loop.
 1964                          */
 1965                         if (phys_avail[pa_indx] == pa) {
 1966                                 phys_avail[pa_indx] += PAGE_SIZE;
 1967                         } else {
 1968                                 pa_indx++;
 1969                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1970                                         printf(
 1971                 "Too many holes in the physical address space, giving up\n");
 1972                                         pa_indx--;
 1973                                         break;
 1974                                 }
 1975                                 phys_avail[pa_indx++] = pa;     /* start */
 1976                                 phys_avail[pa_indx] = pa + PAGE_SIZE;   /* end */
 1977                         }
 1978                         physmem++;
 1979                 }
 1980         }
 1981         *pte = 0;
 1982         invltlb();
 1983 
 1984         /*
 1985          * XXX
 1986          * The last chunk must contain at least one page plus the message
 1987          * buffer to avoid complicating other code (message buffer address
 1988          * calculation, etc.).
 1989          */
 1990         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1991             round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 1992                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1993                 phys_avail[pa_indx--] = 0;
 1994                 phys_avail[pa_indx--] = 0;
 1995         }
 1996 
 1997         Maxmem = atop(phys_avail[pa_indx]);
 1998 
 1999         /* Trim off space for the message buffer. */
 2000         phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 2001 
 2002         avail_end = phys_avail[pa_indx];
 2003 }
 2004 
 2005 void
 2006 init386(first)
 2007         int first;
 2008 {
 2009         struct gate_descriptor *gdp;
 2010         int gsel_tss, metadata_missing, off, x;
 2011         struct pcpu *pc;
 2012 
 2013         thread0.td_kstack = proc0kstack;
 2014         thread0.td_pcb = (struct pcb *)
 2015            (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 2016 
 2017         /*
 2018          * This may be done better later if it gets more high level
 2019          * components in it. If so just link td->td_proc here.
 2020          */
 2021         proc_linkup(&proc0, &ksegrp0, &thread0);
 2022 
 2023         metadata_missing = 0;
 2024         if (bootinfo.bi_modulep) {
 2025                 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 2026                 preload_bootstrap_relocate(KERNBASE);
 2027         } else {
 2028                 metadata_missing = 1;
 2029         }
 2030         if (envmode == 1)
 2031                 kern_envp = static_env;
 2032         else if (bootinfo.bi_envp)
 2033                 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 2034 
 2035         /* Init basic tunables, hz etc */
 2036         init_param1();
 2037 
 2038         /*
 2039          * Make gdt memory segments.  All segments cover the full 4GB
 2040          * of address space and permissions are enforced at page level.
 2041          */
 2042         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 2043         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 2044 #ifdef SMP
 2045         pc = &SMP_prvspace[0].pcpu;
 2046 #else
 2047         pc = &__pcpu;
 2048 #endif
 2049         gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 2050         gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 2051         gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 2052 
 2053         for (x = 0; x < NGDT; x++)
 2054                 ssdtosd(&gdt_segs[x], &gdt[x].sd);
 2055 
 2056         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 2057         r_gdt.rd_base =  (int) gdt;
 2058         lgdt(&r_gdt);
 2059 
 2060         pcpu_init(pc, 0, sizeof(struct pcpu));
 2061         PCPU_SET(prvspace, pc);
 2062         PCPU_SET(curthread, &thread0);
 2063         PCPU_SET(curpcb, thread0.td_pcb);
 2064 
 2065         /*
 2066          * Initialize mutexes.
 2067          *
 2068          * icu_lock: in order to allow an interrupt to occur in a critical
 2069          *           section, to set pcpu->ipending (etc...) properly, we
 2070          *           must be able to get the icu lock, so it can't be
 2071          *           under witness.
 2072          */
 2073         mutex_init();
 2074         mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
 2075         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 2076 
 2077         /* make ldt memory segments */
 2078         ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 2079         ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 2080         for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 2081                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 2082 
 2083         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2084         lldt(_default_ldt);
 2085         PCPU_SET(currentldt, _default_ldt);
 2086 
 2087         /* exceptions */
 2088         for (x = 0; x < NIDT; x++)
 2089                 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 2090                     GSEL(GCODE_SEL, SEL_KPL));
 2091         setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 2092             GSEL(GCODE_SEL, SEL_KPL));
 2093         setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 2094             GSEL(GCODE_SEL, SEL_KPL));
 2095         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 2096             GSEL(GCODE_SEL, SEL_KPL));
 2097         setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 2098             GSEL(GCODE_SEL, SEL_KPL));
 2099         setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 2100             GSEL(GCODE_SEL, SEL_KPL));
 2101         setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 2102             GSEL(GCODE_SEL, SEL_KPL));
 2103         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2104             GSEL(GCODE_SEL, SEL_KPL));
 2105         setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 2106             , GSEL(GCODE_SEL, SEL_KPL));
 2107         setidt(IDT_DF, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 2108         setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 2109             GSEL(GCODE_SEL, SEL_KPL));
 2110         setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 2111             GSEL(GCODE_SEL, SEL_KPL));
 2112         setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 2113             GSEL(GCODE_SEL, SEL_KPL));
 2114         setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 2115             GSEL(GCODE_SEL, SEL_KPL));
 2116         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2117             GSEL(GCODE_SEL, SEL_KPL));
 2118         setidt(IDT_PF, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 2119             GSEL(GCODE_SEL, SEL_KPL));
 2120         setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 2121             GSEL(GCODE_SEL, SEL_KPL));
 2122         setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 2123             GSEL(GCODE_SEL, SEL_KPL));
 2124         setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 2125             GSEL(GCODE_SEL, SEL_KPL));
 2126         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 2127             GSEL(GCODE_SEL, SEL_KPL));
 2128         setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 2129             GSEL(GCODE_SEL, SEL_KPL));
 2130 
 2131         r_idt.rd_limit = sizeof(idt0) - 1;
 2132         r_idt.rd_base = (int) idt;
 2133         lidt(&r_idt);
 2134 
 2135         /*
 2136          * Initialize the console before we print anything out.
 2137          */
 2138         cninit();
 2139 
 2140         if (metadata_missing)
 2141                 printf("WARNING: loader(8) metadata is missing!\n");
 2142 
 2143 #ifdef DEV_ISA
 2144         elcr_probe();
 2145         atpic_startup();
 2146 #endif
 2147 
 2148 #ifdef DDB
 2149         ksym_start = bootinfo.bi_symtab;
 2150         ksym_end = bootinfo.bi_esymtab;
 2151 #endif
 2152 
 2153         kdb_init();
 2154 
 2155 #ifdef KDB
 2156         if (boothowto & RB_KDB)
 2157                 kdb_enter("Boot flags requested debugger");
 2158 #endif
 2159 
 2160         finishidentcpu();       /* Final stage of CPU initialization */
 2161         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2162             GSEL(GCODE_SEL, SEL_KPL));
 2163         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2164             GSEL(GCODE_SEL, SEL_KPL));
 2165         initializecpu();        /* Initialize CPU registers */
 2166 
 2167         /* make an initial tss so cpu can get interrupt stack on syscall! */
 2168         /* Note: -16 is so we can grow the trapframe if we came from vm86 */
 2169         PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 2170             KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
 2171         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 2172         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 2173         private_tss = 0;
 2174         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2175         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2176         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 2177         ltr(gsel_tss);
 2178 
 2179         /* pointer to selector slot for %fs/%gs */
 2180         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2181 
 2182         dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 2183             dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 2184         dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 2185             dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 2186 #ifdef PAE
 2187         dblfault_tss.tss_cr3 = (int)IdlePDPT;
 2188 #else
 2189         dblfault_tss.tss_cr3 = (int)IdlePTD;
 2190 #endif
 2191         dblfault_tss.tss_eip = (int)dblfault_handler;
 2192         dblfault_tss.tss_eflags = PSL_KERNEL;
 2193         dblfault_tss.tss_ds = dblfault_tss.tss_es =
 2194             dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2195         dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2196         dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2197         dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2198 
 2199         vm86_initialize();
 2200         getmemsize(first);
 2201         init_param2(physmem);
 2202 
 2203         /* now running on new page tables, configured,and u/iom is accessible */
 2204 
 2205         /* Map the message buffer. */
 2206         for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 2207                 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 2208 
 2209         msgbufinit(msgbufp, MSGBUF_SIZE);
 2210 
 2211         /* make a call gate to reenter kernel with */
 2212         gdp = &ldt[LSYS5CALLS_SEL].gd;
 2213 
 2214         x = (int) &IDTVEC(lcall_syscall);
 2215         gdp->gd_looffset = x;
 2216         gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 2217         gdp->gd_stkcpy = 1;
 2218         gdp->gd_type = SDT_SYS386CGT;
 2219         gdp->gd_dpl = SEL_UPL;
 2220         gdp->gd_p = 1;
 2221         gdp->gd_hioffset = x >> 16;
 2222 
 2223         /* XXX does this work? */
 2224         ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2225         ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2226 
 2227         /* transfer to user mode */
 2228 
 2229         _ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 2230         _udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 2231 
 2232         /* setup proc 0's pcb */
 2233         thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
 2234 #ifdef PAE
 2235         thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 2236 #else
 2237         thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 2238 #endif
 2239         thread0.td_pcb->pcb_ext = 0;
 2240         thread0.td_frame = &proc0_tf;
 2241 }
 2242 
 2243 void
 2244 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 2245 {
 2246 
 2247         pcpu->pc_acpi_id = 0xffffffff;
 2248 }
 2249 
 2250 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2251 static void f00f_hack(void *unused);
 2252 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL)
 2253 
 2254 static void
 2255 f00f_hack(void *unused)
 2256 {
 2257         struct gate_descriptor *new_idt;
 2258         vm_offset_t tmp;
 2259 
 2260         if (!has_f00f_bug)
 2261                 return;
 2262 
 2263         GIANT_REQUIRED;
 2264 
 2265         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2266 
 2267         tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 2268         if (tmp == 0)
 2269                 panic("kmem_alloc returned 0");
 2270 
 2271         /* Put the problematic entry (#6) at the end of the lower page. */
 2272         new_idt = (struct gate_descriptor*)
 2273             (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 2274         bcopy(idt, new_idt, sizeof(idt0));
 2275         r_idt.rd_base = (u_int)new_idt;
 2276         lidt(&r_idt);
 2277         idt = new_idt;
 2278         if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 2279                            VM_PROT_READ, FALSE) != KERN_SUCCESS)
 2280                 panic("vm_map_protect failed");
 2281 }
 2282 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2283 
 2284 /*
 2285  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2286  * we want to start a backtrace from the function that caused us to enter
 2287  * the debugger. We have the context in the trapframe, but base the trace
 2288  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2289  * enough for a backtrace.
 2290  */
 2291 void
 2292 makectx(struct trapframe *tf, struct pcb *pcb)
 2293 {
 2294 
 2295         pcb->pcb_edi = tf->tf_edi;
 2296         pcb->pcb_esi = tf->tf_esi;
 2297         pcb->pcb_ebp = tf->tf_ebp;
 2298         pcb->pcb_ebx = tf->tf_ebx;
 2299         pcb->pcb_eip = tf->tf_eip;
 2300         pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 2301 }
 2302 
 2303 int
 2304 ptrace_set_pc(struct thread *td, u_long addr)
 2305 {
 2306 
 2307         td->td_frame->tf_eip = addr;
 2308         return (0);
 2309 }
 2310 
 2311 int
 2312 ptrace_single_step(struct thread *td)
 2313 {
 2314         td->td_frame->tf_eflags |= PSL_T;
 2315         return (0);
 2316 }
 2317 
 2318 int
 2319 ptrace_clear_single_step(struct thread *td)
 2320 {
 2321         td->td_frame->tf_eflags &= ~PSL_T;
 2322         return (0);
 2323 }
 2324 
 2325 int
 2326 fill_regs(struct thread *td, struct reg *regs)
 2327 {
 2328         struct pcb *pcb;
 2329         struct trapframe *tp;
 2330 
 2331         tp = td->td_frame;
 2332         regs->r_fs = tp->tf_fs;
 2333         regs->r_es = tp->tf_es;
 2334         regs->r_ds = tp->tf_ds;
 2335         regs->r_edi = tp->tf_edi;
 2336         regs->r_esi = tp->tf_esi;
 2337         regs->r_ebp = tp->tf_ebp;
 2338         regs->r_ebx = tp->tf_ebx;
 2339         regs->r_edx = tp->tf_edx;
 2340         regs->r_ecx = tp->tf_ecx;
 2341         regs->r_eax = tp->tf_eax;
 2342         regs->r_eip = tp->tf_eip;
 2343         regs->r_cs = tp->tf_cs;
 2344         regs->r_eflags = tp->tf_eflags;
 2345         regs->r_esp = tp->tf_esp;
 2346         regs->r_ss = tp->tf_ss;
 2347         pcb = td->td_pcb;
 2348         regs->r_gs = pcb->pcb_gs;
 2349         return (0);
 2350 }
 2351 
 2352 int
 2353 set_regs(struct thread *td, struct reg *regs)
 2354 {
 2355         struct pcb *pcb;
 2356         struct trapframe *tp;
 2357 
 2358         tp = td->td_frame;
 2359         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2360             !CS_SECURE(regs->r_cs))
 2361                 return (EINVAL);
 2362         tp->tf_fs = regs->r_fs;
 2363         tp->tf_es = regs->r_es;
 2364         tp->tf_ds = regs->r_ds;
 2365         tp->tf_edi = regs->r_edi;
 2366         tp->tf_esi = regs->r_esi;
 2367         tp->tf_ebp = regs->r_ebp;
 2368         tp->tf_ebx = regs->r_ebx;
 2369         tp->tf_edx = regs->r_edx;
 2370         tp->tf_ecx = regs->r_ecx;
 2371         tp->tf_eax = regs->r_eax;
 2372         tp->tf_eip = regs->r_eip;
 2373         tp->tf_cs = regs->r_cs;
 2374         tp->tf_eflags = regs->r_eflags;
 2375         tp->tf_esp = regs->r_esp;
 2376         tp->tf_ss = regs->r_ss;
 2377         pcb = td->td_pcb;
 2378         pcb->pcb_gs = regs->r_gs;
 2379         return (0);
 2380 }
 2381 
 2382 #ifdef CPU_ENABLE_SSE
 2383 static void
 2384 fill_fpregs_xmm(sv_xmm, sv_87)
 2385         struct savexmm *sv_xmm;
 2386         struct save87 *sv_87;
 2387 {
 2388         register struct env87 *penv_87 = &sv_87->sv_env;
 2389         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2390         int i;
 2391 
 2392         bzero(sv_87, sizeof(*sv_87));
 2393 
 2394         /* FPU control/status */
 2395         penv_87->en_cw = penv_xmm->en_cw;
 2396         penv_87->en_sw = penv_xmm->en_sw;
 2397         penv_87->en_tw = penv_xmm->en_tw;
 2398         penv_87->en_fip = penv_xmm->en_fip;
 2399         penv_87->en_fcs = penv_xmm->en_fcs;
 2400         penv_87->en_opcode = penv_xmm->en_opcode;
 2401         penv_87->en_foo = penv_xmm->en_foo;
 2402         penv_87->en_fos = penv_xmm->en_fos;
 2403 
 2404         /* FPU registers */
 2405         for (i = 0; i < 8; ++i)
 2406                 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 2407 }
 2408 
 2409 static void
 2410 set_fpregs_xmm(sv_87, sv_xmm)
 2411         struct save87 *sv_87;
 2412         struct savexmm *sv_xmm;
 2413 {
 2414         register struct env87 *penv_87 = &sv_87->sv_env;
 2415         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2416         int i;
 2417 
 2418         /* FPU control/status */
 2419         penv_xmm->en_cw = penv_87->en_cw;
 2420         penv_xmm->en_sw = penv_87->en_sw;
 2421         penv_xmm->en_tw = penv_87->en_tw;
 2422         penv_xmm->en_fip = penv_87->en_fip;
 2423         penv_xmm->en_fcs = penv_87->en_fcs;
 2424         penv_xmm->en_opcode = penv_87->en_opcode;
 2425         penv_xmm->en_foo = penv_87->en_foo;
 2426         penv_xmm->en_fos = penv_87->en_fos;
 2427 
 2428         /* FPU registers */
 2429         for (i = 0; i < 8; ++i)
 2430                 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 2431 }
 2432 #endif /* CPU_ENABLE_SSE */
 2433 
 2434 int
 2435 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2436 {
 2437 #ifdef CPU_ENABLE_SSE
 2438         if (cpu_fxsr) {
 2439                 fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
 2440                                                 (struct save87 *)fpregs);
 2441                 return (0);
 2442         }
 2443 #endif /* CPU_ENABLE_SSE */
 2444         bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
 2445         return (0);
 2446 }
 2447 
 2448 int
 2449 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2450 {
 2451 #ifdef CPU_ENABLE_SSE
 2452         if (cpu_fxsr) {
 2453                 set_fpregs_xmm((struct save87 *)fpregs,
 2454                                            &td->td_pcb->pcb_save.sv_xmm);
 2455                 return (0);
 2456         }
 2457 #endif /* CPU_ENABLE_SSE */
 2458         bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
 2459         return (0);
 2460 }
 2461 
 2462 /*
 2463  * Get machine context.
 2464  */
 2465 int
 2466 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2467 {
 2468         struct trapframe *tp;
 2469 
 2470         tp = td->td_frame;
 2471 
 2472         PROC_LOCK(curthread->td_proc);
 2473         mcp->mc_onstack = sigonstack(tp->tf_esp);
 2474         PROC_UNLOCK(curthread->td_proc);
 2475         mcp->mc_gs = td->td_pcb->pcb_gs;
 2476         mcp->mc_fs = tp->tf_fs;
 2477         mcp->mc_es = tp->tf_es;
 2478         mcp->mc_ds = tp->tf_ds;
 2479         mcp->mc_edi = tp->tf_edi;
 2480         mcp->mc_esi = tp->tf_esi;
 2481         mcp->mc_ebp = tp->tf_ebp;
 2482         mcp->mc_isp = tp->tf_isp;
 2483         mcp->mc_eflags = tp->tf_eflags;
 2484         if (flags & GET_MC_CLEAR_RET) {
 2485                 mcp->mc_eax = 0;
 2486                 mcp->mc_edx = 0;
 2487                 mcp->mc_eflags &= ~PSL_C;
 2488         } else {
 2489                 mcp->mc_eax = tp->tf_eax;
 2490                 mcp->mc_edx = tp->tf_edx;
 2491         }
 2492         mcp->mc_ebx = tp->tf_ebx;
 2493         mcp->mc_ecx = tp->tf_ecx;
 2494         mcp->mc_eip = tp->tf_eip;
 2495         mcp->mc_cs = tp->tf_cs;
 2496         mcp->mc_esp = tp->tf_esp;
 2497         mcp->mc_ss = tp->tf_ss;
 2498         mcp->mc_len = sizeof(*mcp);
 2499         get_fpcontext(td, mcp);
 2500         return (0);
 2501 }
 2502 
 2503 /*
 2504  * Set machine context.
 2505  *
 2506  * However, we don't set any but the user modifiable flags, and we won't
 2507  * touch the cs selector.
 2508  */
 2509 int
 2510 set_mcontext(struct thread *td, const mcontext_t *mcp)
 2511 {
 2512         struct trapframe *tp;
 2513         int eflags, ret;
 2514 
 2515         tp = td->td_frame;
 2516         if (mcp->mc_len != sizeof(*mcp))
 2517                 return (EINVAL);
 2518         eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 2519             (tp->tf_eflags & ~PSL_USERCHANGE);
 2520         if ((ret = set_fpcontext(td, mcp)) == 0) {
 2521                 tp->tf_fs = mcp->mc_fs;
 2522                 tp->tf_es = mcp->mc_es;
 2523                 tp->tf_ds = mcp->mc_ds;
 2524                 tp->tf_edi = mcp->mc_edi;
 2525                 tp->tf_esi = mcp->mc_esi;
 2526                 tp->tf_ebp = mcp->mc_ebp;
 2527                 tp->tf_ebx = mcp->mc_ebx;
 2528                 tp->tf_edx = mcp->mc_edx;
 2529                 tp->tf_ecx = mcp->mc_ecx;
 2530                 tp->tf_eax = mcp->mc_eax;
 2531                 tp->tf_eip = mcp->mc_eip;
 2532                 tp->tf_eflags = eflags;
 2533                 tp->tf_esp = mcp->mc_esp;
 2534                 tp->tf_ss = mcp->mc_ss;
 2535                 td->td_pcb->pcb_gs = mcp->mc_gs;
 2536                 ret = 0;
 2537         }
 2538         return (ret);
 2539 }
 2540 
 2541 static void
 2542 get_fpcontext(struct thread *td, mcontext_t *mcp)
 2543 {
 2544 #ifndef DEV_NPX
 2545         mcp->mc_fpformat = _MC_FPFMT_NODEV;
 2546         mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 2547 #else
 2548         union savefpu *addr;
 2549 
 2550         /*
 2551          * XXX mc_fpstate might be misaligned, since its declaration is not
 2552          * unportabilized using __attribute__((aligned(16))) like the
 2553          * declaration of struct savemm, and anyway, alignment doesn't work
 2554          * for auto variables since we don't use gcc's pessimal stack
 2555          * alignment.  Work around this by abusing the spare fields after
 2556          * mcp->mc_fpstate.
 2557          *
 2558          * XXX unpessimize most cases by only aligning when fxsave might be
 2559          * called, although this requires knowing too much about
 2560          * npxgetregs()'s internals.
 2561          */
 2562         addr = (union savefpu *)&mcp->mc_fpstate;
 2563         if (td == PCPU_GET(fpcurthread) &&
 2564 #ifdef CPU_ENABLE_SSE
 2565             cpu_fxsr &&
 2566 #endif
 2567             ((uintptr_t)(void *)addr & 0xF)) {
 2568                 do
 2569                         addr = (void *)((char *)addr + 4);
 2570                 while ((uintptr_t)(void *)addr & 0xF);
 2571         }
 2572         mcp->mc_ownedfp = npxgetregs(td, addr);
 2573         if (addr != (union savefpu *)&mcp->mc_fpstate) {
 2574                 bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 2575                 bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
 2576         }
 2577         mcp->mc_fpformat = npxformat();
 2578 #endif
 2579 }
 2580 
 2581 static int
 2582 set_fpcontext(struct thread *td, const mcontext_t *mcp)
 2583 {
 2584         union savefpu *addr;
 2585 
 2586         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 2587                 return (0);
 2588         else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 2589             mcp->mc_fpformat != _MC_FPFMT_XMM)
 2590                 return (EINVAL);
 2591         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
 2592                 /* We don't care what state is left in the FPU or PCB. */
 2593                 fpstate_drop(td);
 2594         else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 2595             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 2596                 /* XXX align as above. */
 2597                 addr = (union savefpu *)&mcp->mc_fpstate;
 2598                 if (td == PCPU_GET(fpcurthread) &&
 2599 #ifdef CPU_ENABLE_SSE
 2600                     cpu_fxsr &&
 2601 #endif
 2602                     ((uintptr_t)(void *)addr & 0xF)) {
 2603                         do
 2604                                 addr = (void *)((char *)addr + 4);
 2605                         while ((uintptr_t)(void *)addr & 0xF);
 2606                         bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
 2607                 }
 2608 #ifdef DEV_NPX
 2609                 /*
 2610                  * XXX we violate the dubious requirement that npxsetregs()
 2611                  * be called with interrupts disabled.
 2612                  */
 2613                 npxsetregs(td, addr);
 2614 #endif
 2615                 /*
 2616                  * Don't bother putting things back where they were in the
 2617                  * misaligned case, since we know that the caller won't use
 2618                  * them again.
 2619                  */
 2620         } else
 2621                 return (EINVAL);
 2622         return (0);
 2623 }
 2624 
 2625 static void
 2626 fpstate_drop(struct thread *td)
 2627 {
 2628         register_t s;
 2629 
 2630         s = intr_disable();
 2631 #ifdef DEV_NPX
 2632         if (PCPU_GET(fpcurthread) == td)
 2633                 npxdrop();
 2634 #endif
 2635         /*
 2636          * XXX force a full drop of the npx.  The above only drops it if we
 2637          * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 2638          *
 2639          * XXX I don't much like npxgetregs()'s semantics of doing a full
 2640          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 2641          * We only need to drop to !PCB_INITDONE in sendsig().  But
 2642          * sendsig() is the only caller of npxgetregs()... perhaps we just
 2643          * have too many layers.
 2644          */
 2645         curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 2646         intr_restore(s);
 2647 }
 2648 
 2649 int
 2650 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 2651 {
 2652         struct pcb *pcb;
 2653 
 2654         if (td == NULL) {
 2655                 dbregs->dr[0] = rdr0();
 2656                 dbregs->dr[1] = rdr1();
 2657                 dbregs->dr[2] = rdr2();
 2658                 dbregs->dr[3] = rdr3();
 2659                 dbregs->dr[4] = rdr4();
 2660                 dbregs->dr[5] = rdr5();
 2661                 dbregs->dr[6] = rdr6();
 2662                 dbregs->dr[7] = rdr7();
 2663         } else {
 2664                 pcb = td->td_pcb;
 2665                 dbregs->dr[0] = pcb->pcb_dr0;
 2666                 dbregs->dr[1] = pcb->pcb_dr1;
 2667                 dbregs->dr[2] = pcb->pcb_dr2;
 2668                 dbregs->dr[3] = pcb->pcb_dr3;
 2669                 dbregs->dr[4] = 0;
 2670                 dbregs->dr[5] = 0;
 2671                 dbregs->dr[6] = pcb->pcb_dr6;
 2672                 dbregs->dr[7] = pcb->pcb_dr7;
 2673         }
 2674         return (0);
 2675 }
 2676 
 2677 int
 2678 set_dbregs(struct thread *td, struct dbreg *dbregs)
 2679 {
 2680         struct pcb *pcb;
 2681         int i;
 2682         u_int32_t mask1, mask2;
 2683 
 2684         if (td == NULL) {
 2685                 load_dr0(dbregs->dr[0]);
 2686                 load_dr1(dbregs->dr[1]);
 2687                 load_dr2(dbregs->dr[2]);
 2688                 load_dr3(dbregs->dr[3]);
 2689                 load_dr4(dbregs->dr[4]);
 2690                 load_dr5(dbregs->dr[5]);
 2691                 load_dr6(dbregs->dr[6]);
 2692                 load_dr7(dbregs->dr[7]);
 2693         } else {
 2694                 /*
 2695                  * Don't let an illegal value for dr7 get set.  Specifically,
 2696                  * check for undefined settings.  Setting these bit patterns
 2697                  * result in undefined behaviour and can lead to an unexpected
 2698                  * TRCTRAP.
 2699                  */
 2700                 for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 2701                      i++, mask1 <<= 2, mask2 <<= 2)
 2702                         if ((dbregs->dr[7] & mask1) == mask2)
 2703                                 return (EINVAL);
 2704                 
 2705                 pcb = td->td_pcb;
 2706                 
 2707                 /*
 2708                  * Don't let a process set a breakpoint that is not within the
 2709                  * process's address space.  If a process could do this, it
 2710                  * could halt the system by setting a breakpoint in the kernel
 2711                  * (if ddb was enabled).  Thus, we need to check to make sure
 2712                  * that no breakpoints are being enabled for addresses outside
 2713                  * process's address space, unless, perhaps, we were called by
 2714                  * uid 0.
 2715                  *
 2716                  * XXX - what about when the watched area of the user's
 2717                  * address space is written into from within the kernel
 2718                  * ... wouldn't that still cause a breakpoint to be generated
 2719                  * from within kernel mode?
 2720                  */
 2721 
 2722                 if (suser(td) != 0) {
 2723                         if (dbregs->dr[7] & 0x3) {
 2724                                 /* dr0 is enabled */
 2725                                 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 2726                                         return (EINVAL);
 2727                         }
 2728                         
 2729                         if (dbregs->dr[7] & (0x3<<2)) {
 2730                                 /* dr1 is enabled */
 2731                                 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 2732                                         return (EINVAL);
 2733                         }
 2734                         
 2735                         if (dbregs->dr[7] & (0x3<<4)) {
 2736                                 /* dr2 is enabled */
 2737                                 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 2738                                         return (EINVAL);
 2739                         }
 2740                         
 2741                         if (dbregs->dr[7] & (0x3<<6)) {
 2742                                 /* dr3 is enabled */
 2743                                 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 2744                                         return (EINVAL);
 2745                         }
 2746                 }
 2747 
 2748                 pcb->pcb_dr0 = dbregs->dr[0];
 2749                 pcb->pcb_dr1 = dbregs->dr[1];
 2750                 pcb->pcb_dr2 = dbregs->dr[2];
 2751                 pcb->pcb_dr3 = dbregs->dr[3];
 2752                 pcb->pcb_dr6 = dbregs->dr[6];
 2753                 pcb->pcb_dr7 = dbregs->dr[7];
 2754 
 2755                 pcb->pcb_flags |= PCB_DBREGS;
 2756         }
 2757 
 2758         return (0);
 2759 }
 2760 
 2761 /*
 2762  * Return > 0 if a hardware breakpoint has been hit, and the
 2763  * breakpoint was in user space.  Return 0, otherwise.
 2764  */
 2765 int
 2766 user_dbreg_trap(void)
 2767 {
 2768         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
 2769         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 2770         int nbp;            /* number of breakpoints that triggered */
 2771         caddr_t addr[4];    /* breakpoint addresses */
 2772         int i;
 2773         
 2774         dr7 = rdr7();
 2775         if ((dr7 & 0x000000ff) == 0) {
 2776                 /*
 2777                  * all GE and LE bits in the dr7 register are zero,
 2778                  * thus the trap couldn't have been caused by the
 2779                  * hardware debug registers
 2780                  */
 2781                 return 0;
 2782         }
 2783 
 2784         nbp = 0;
 2785         dr6 = rdr6();
 2786         bp = dr6 & 0x0000000f;
 2787 
 2788         if (!bp) {
 2789                 /*
 2790                  * None of the breakpoint bits are set meaning this
 2791                  * trap was not caused by any of the debug registers
 2792                  */
 2793                 return 0;
 2794         }
 2795 
 2796         /*
 2797          * at least one of the breakpoints were hit, check to see
 2798          * which ones and if any of them are user space addresses
 2799          */
 2800 
 2801         if (bp & 0x01) {
 2802                 addr[nbp++] = (caddr_t)rdr0();
 2803         }
 2804         if (bp & 0x02) {
 2805                 addr[nbp++] = (caddr_t)rdr1();
 2806         }
 2807         if (bp & 0x04) {
 2808                 addr[nbp++] = (caddr_t)rdr2();
 2809         }
 2810         if (bp & 0x08) {
 2811                 addr[nbp++] = (caddr_t)rdr3();
 2812         }
 2813 
 2814         for (i=0; i<nbp; i++) {
 2815                 if (addr[i] <
 2816                     (caddr_t)VM_MAXUSER_ADDRESS) {
 2817                         /*
 2818                          * addr[i] is in user space
 2819                          */
 2820                         return nbp;
 2821                 }
 2822         }
 2823 
 2824         /*
 2825          * None of the breakpoints are in user space.
 2826          */
 2827         return 0;
 2828 }
 2829 
 2830 #ifndef DEV_APIC
 2831 #include <machine/apicvar.h>
 2832 
 2833 /*
 2834  * Provide stub functions so that the MADT APIC enumerator in the acpi
 2835  * kernel module will link against a kernel without 'device apic'.
 2836  *
 2837  * XXX - This is a gross hack.
 2838  */
 2839 void
 2840 apic_register_enumerator(struct apic_enumerator *enumerator)
 2841 {
 2842 }
 2843 
 2844 void *
 2845 ioapic_create(uintptr_t addr, int32_t id, int intbase)
 2846 {
 2847         return (NULL);
 2848 }
 2849 
 2850 int
 2851 ioapic_disable_pin(void *cookie, u_int pin)
 2852 {
 2853         return (ENXIO);
 2854 }
 2855 
 2856 void
 2857 ioapic_enable_mixed_mode(void)
 2858 {
 2859 }
 2860 
 2861 int
 2862 ioapic_get_vector(void *cookie, u_int pin)
 2863 {
 2864         return (-1);
 2865 }
 2866 
 2867 void
 2868 ioapic_register(void *cookie)
 2869 {
 2870 }
 2871 
 2872 int
 2873 ioapic_remap_vector(void *cookie, u_int pin, int vector)
 2874 {
 2875         return (ENXIO);
 2876 }
 2877 
 2878 int
 2879 ioapic_set_extint(void *cookie, u_int pin)
 2880 {
 2881         return (ENXIO);
 2882 }
 2883 
 2884 int
 2885 ioapic_set_nmi(void *cookie, u_int pin)
 2886 {
 2887         return (ENXIO);
 2888 }
 2889 
 2890 int
 2891 ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
 2892 {
 2893         return (ENXIO);
 2894 }
 2895 
 2896 int
 2897 ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
 2898 {
 2899         return (ENXIO);
 2900 }
 2901 
 2902 void
 2903 lapic_create(u_int apic_id, int boot_cpu)
 2904 {
 2905 }
 2906 
 2907 void
 2908 lapic_init(uintptr_t addr)
 2909 {
 2910 }
 2911 
 2912 int
 2913 lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode)
 2914 {
 2915         return (ENXIO);
 2916 }
 2917 
 2918 int
 2919 lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
 2920 {
 2921         return (ENXIO);
 2922 }
 2923 
 2924 int
 2925 lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger)
 2926 {
 2927         return (ENXIO);
 2928 }
 2929 #endif
 2930 
 2931 #ifdef KDB
 2932 
 2933 /*
 2934  * Provide inb() and outb() as functions.  They are normally only
 2935  * available as macros calling inlined functions, thus cannot be
 2936  * called from the debugger.
 2937  *
 2938  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
 2939  */
 2940 
 2941 #undef inb
 2942 #undef outb
 2943 
 2944 /* silence compiler warnings */
 2945 u_char inb(u_int);
 2946 void outb(u_int, u_char);
 2947 
 2948 u_char
 2949 inb(u_int port)
 2950 {
 2951         u_char  data;
 2952         /*
 2953          * We use %%dx and not %1 here because i/o is done at %dx and not at
 2954          * %edx, while gcc generates inferior code (movw instead of movl)
 2955          * if we tell it to load (u_short) port.
 2956          */
 2957         __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 2958         return (data);
 2959 }
 2960 
 2961 void
 2962 outb(u_int port, u_char data)
 2963 {
 2964         u_char  al;
 2965         /*
 2966          * Use an unnecessary assignment to help gcc's register allocator.
 2967          * This make a large difference for gcc-1.40 and a tiny difference
 2968          * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 2969          * best results.  gcc-2.6.0 can't handle this.
 2970          */
 2971         al = data;
 2972         __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 2973 }
 2974 
 2975 #endif /* KDB */

Cache object: 6c46db90c89c58e145f0f6704cd22536


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.