The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1992 Terrence R. Lambert.
    3  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/5.4/sys/i386/i386/machdep.c 145335 2005-04-20 19:11:07Z cvs2svn $");
   42 
   43 #include "opt_apic.h"
   44 #include "opt_atalk.h"
   45 #include "opt_compat.h"
   46 #include "opt_cpu.h"
   47 #include "opt_ddb.h"
   48 #include "opt_inet.h"
   49 #include "opt_ipx.h"
   50 #include "opt_isa.h"
   51 #include "opt_kstack_pages.h"
   52 #include "opt_maxmem.h"
   53 #include "opt_msgbuf.h"
   54 #include "opt_npx.h"
   55 #include "opt_perfmon.h"
   56 
   57 #include <sys/param.h>
   58 #include <sys/proc.h>
   59 #include <sys/systm.h>
   60 #include <sys/bio.h>
   61 #include <sys/buf.h>
   62 #include <sys/bus.h>
   63 #include <sys/callout.h>
   64 #include <sys/cons.h>
   65 #include <sys/cpu.h>
   66 #include <sys/eventhandler.h>
   67 #include <sys/exec.h>
   68 #include <sys/imgact.h>
   69 #include <sys/kdb.h>
   70 #include <sys/kernel.h>
   71 #include <sys/ktr.h>
   72 #include <sys/linker.h>
   73 #include <sys/lock.h>
   74 #include <sys/malloc.h>
   75 #include <sys/memrange.h>
   76 #include <sys/msgbuf.h>
   77 #include <sys/mutex.h>
   78 #include <sys/pcpu.h>
   79 #include <sys/ptrace.h>
   80 #include <sys/reboot.h>
   81 #include <sys/sched.h>
   82 #include <sys/signalvar.h>
   83 #include <sys/sysctl.h>
   84 #include <sys/sysent.h>
   85 #include <sys/sysproto.h>
   86 #include <sys/ucontext.h>
   87 #include <sys/vmmeter.h>
   88 
   89 #include <vm/vm.h>
   90 #include <vm/vm_extern.h>
   91 #include <vm/vm_kern.h>
   92 #include <vm/vm_page.h>
   93 #include <vm/vm_map.h>
   94 #include <vm/vm_object.h>
   95 #include <vm/vm_pager.h>
   96 #include <vm/vm_param.h>
   97 
   98 #ifdef DDB
   99 #ifndef KDB
  100 #error KDB must be enabled in order for DDB to work!
  101 #endif
  102 #include <ddb/ddb.h>
  103 #include <ddb/db_sym.h>
  104 #endif
  105 
  106 #include <isa/rtc.h>
  107 
  108 #include <net/netisr.h>
  109 
  110 #include <machine/bootinfo.h>
  111 #include <machine/clock.h>
  112 #include <machine/cpu.h>
  113 #include <machine/cputypes.h>
  114 #include <machine/intr_machdep.h>
  115 #include <machine/md_var.h>
  116 #include <machine/pc/bios.h>
  117 #include <machine/pcb.h>
  118 #include <machine/pcb_ext.h>
  119 #include <machine/proc.h>
  120 #include <machine/reg.h>
  121 #include <machine/sigframe.h>
  122 #include <machine/specialreg.h>
  123 #include <machine/vm86.h>
  124 #ifdef PERFMON
  125 #include <machine/perfmon.h>
  126 #endif
  127 #ifdef SMP
  128 #include <machine/privatespace.h>
  129 #include <machine/smp.h>
  130 #endif
  131 
  132 #ifdef DEV_ISA
  133 #include <i386/isa/icu.h>
  134 #endif
  135 
  136 /* Sanity check for __curthread() */
  137 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  138 
  139 extern void init386(int first);
  140 extern void dblfault_handler(void);
  141 
  142 extern void printcpuinfo(void); /* XXX header file */
  143 extern void finishidentcpu(void);
  144 extern void panicifcpuunsupported(void);
  145 extern void initializecpu(void);
  146 
  147 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  148 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  149 
  150 #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
  151 #define CPU_ENABLE_SSE
  152 #endif
  153 #if defined(CPU_DISABLE_SSE)
  154 #undef CPU_ENABLE_SSE
  155 #endif
  156 
  157 static void cpu_startup(void *);
  158 static void fpstate_drop(struct thread *td);
  159 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
  160 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
  161 #ifdef CPU_ENABLE_SSE
  162 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
  163 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
  164 #endif /* CPU_ENABLE_SSE */
  165 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
  166 
  167 #ifdef DDB
  168 extern vm_offset_t ksym_start, ksym_end;
  169 #endif
  170 
  171 int     _udatasel, _ucodesel;
  172 u_int   basemem;
  173 
  174 int cold = 1;
  175 
  176 #ifdef COMPAT_43
  177 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
  178 #endif
  179 #ifdef COMPAT_FREEBSD4
  180 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
  181     u_long code);
  182 #endif
  183 
  184 long Maxmem = 0;
  185 long realmem = 0;
  186 
  187 vm_paddr_t phys_avail[10];
  188 
  189 /* must be 2 less so 0 0 can signal end of chunks */
  190 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
  191 
  192 struct kva_md_info kmi;
  193 
  194 static struct trapframe proc0_tf;
  195 #ifndef SMP
  196 static struct pcpu __pcpu;
  197 #endif
  198 
  199 struct mtx icu_lock;
  200 
  201 struct mem_range_softc mem_range_softc;
  202 
  203 static void
  204 cpu_startup(dummy)
  205         void *dummy;
  206 {
  207         /*
  208          * Good {morning,afternoon,evening,night}.
  209          */
  210         startrtclock();
  211         printcpuinfo();
  212         panicifcpuunsupported();
  213 #ifdef PERFMON
  214         perfmon_init();
  215 #endif
  216         printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
  217             ptoa((uintmax_t)Maxmem) / 1048576);
  218         realmem = Maxmem;
  219         /*
  220          * Display any holes after the first chunk of extended memory.
  221          */
  222         if (bootverbose) {
  223                 int indx;
  224 
  225                 printf("Physical memory chunk(s):\n");
  226                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  227                         vm_paddr_t size;
  228 
  229                         size = phys_avail[indx + 1] - phys_avail[indx];
  230                         printf(
  231                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  232                             (uintmax_t)phys_avail[indx],
  233                             (uintmax_t)phys_avail[indx + 1] - 1,
  234                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  235                 }
  236         }
  237 
  238         vm_ksubmap_init(&kmi);
  239 
  240         printf("avail memory = %ju (%ju MB)\n",
  241             ptoa((uintmax_t)cnt.v_free_count),
  242             ptoa((uintmax_t)cnt.v_free_count) / 1048576);
  243 
  244         /*
  245          * Set up buffers, so they can be used to read disk labels.
  246          */
  247         bufinit();
  248         vm_pager_bufferinit();
  249 
  250         cpu_setregs();
  251 }
  252 
  253 /*
  254  * Send an interrupt to process.
  255  *
  256  * Stack is set up to allow sigcode stored
  257  * at top to call routine, followed by kcall
  258  * to sigreturn routine below.  After sigreturn
  259  * resets the signal mask, the stack, and the
  260  * frame pointer, it returns to the user
  261  * specified pc, psl.
  262  */
  263 #ifdef COMPAT_43
  264 static void
  265 osendsig(catcher, sig, mask, code)
  266         sig_t catcher;
  267         int sig;
  268         sigset_t *mask;
  269         u_long code;
  270 {
  271         struct osigframe sf, *fp;
  272         struct proc *p;
  273         struct thread *td;
  274         struct sigacts *psp;
  275         struct trapframe *regs;
  276         int oonstack;
  277 
  278         td = curthread;
  279         p = td->td_proc;
  280         PROC_LOCK_ASSERT(p, MA_OWNED);
  281         psp = p->p_sigacts;
  282         mtx_assert(&psp->ps_mtx, MA_OWNED);
  283         regs = td->td_frame;
  284         oonstack = sigonstack(regs->tf_esp);
  285 
  286         /* Allocate space for the signal handler context. */
  287         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  288             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  289                 fp = (struct osigframe *)(td->td_sigstk.ss_sp +
  290                     td->td_sigstk.ss_size - sizeof(struct osigframe));
  291 #if defined(COMPAT_43)
  292                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  293 #endif
  294         } else
  295                 fp = (struct osigframe *)regs->tf_esp - 1;
  296 
  297         /* Translate the signal if appropriate. */
  298         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  299                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  300 
  301         /* Build the argument list for the signal handler. */
  302         sf.sf_signum = sig;
  303         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  304         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  305                 /* Signal handler installed with SA_SIGINFO. */
  306                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  307                 sf.sf_siginfo.si_signo = sig;
  308                 sf.sf_siginfo.si_code = code;
  309                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  310         } else {
  311                 /* Old FreeBSD-style arguments. */
  312                 sf.sf_arg2 = code;
  313                 sf.sf_addr = regs->tf_err;
  314                 sf.sf_ahu.sf_handler = catcher;
  315         }
  316         mtx_unlock(&psp->ps_mtx);
  317         PROC_UNLOCK(p);
  318 
  319         /* Save most if not all of trap frame. */
  320         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  321         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  322         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  323         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  324         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  325         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  326         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  327         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  328         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  329         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  330         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  331         sf.sf_siginfo.si_sc.sc_gs = rgs();
  332         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  333 
  334         /* Build the signal context to be used by osigreturn(). */
  335         sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
  336         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  337         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  338         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  339         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  340         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  341         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  342         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  343 
  344         /*
  345          * If we're a vm86 process, we want to save the segment registers.
  346          * We also change eflags to be our emulated eflags, not the actual
  347          * eflags.
  348          */
  349         if (regs->tf_eflags & PSL_VM) {
  350                 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
  351                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  352                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  353 
  354                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  355                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  356                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  357                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  358 
  359                 if (vm86->vm86_has_vme == 0)
  360                         sf.sf_siginfo.si_sc.sc_ps =
  361                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  362                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  363 
  364                 /* See sendsig() for comments. */
  365                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  366         }
  367 
  368         /*
  369          * Copy the sigframe out to the user's stack.
  370          */
  371         if (copyout(&sf, fp, sizeof(*fp)) != 0) {
  372 #ifdef DEBUG
  373                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  374 #endif
  375                 PROC_LOCK(p);
  376                 sigexit(td, SIGILL);
  377         }
  378 
  379         regs->tf_esp = (int)fp;
  380         regs->tf_eip = PS_STRINGS - szosigcode;
  381         regs->tf_eflags &= ~PSL_T;
  382         regs->tf_cs = _ucodesel;
  383         regs->tf_ds = _udatasel;
  384         regs->tf_es = _udatasel;
  385         regs->tf_fs = _udatasel;
  386         load_gs(_udatasel);
  387         regs->tf_ss = _udatasel;
  388         PROC_LOCK(p);
  389         mtx_lock(&psp->ps_mtx);
  390 }
  391 #endif /* COMPAT_43 */
  392 
  393 #ifdef COMPAT_FREEBSD4
  394 static void
  395 freebsd4_sendsig(catcher, sig, mask, code)
  396         sig_t catcher;
  397         int sig;
  398         sigset_t *mask;
  399         u_long code;
  400 {
  401         struct sigframe4 sf, *sfp;
  402         struct proc *p;
  403         struct thread *td;
  404         struct sigacts *psp;
  405         struct trapframe *regs;
  406         int oonstack;
  407 
  408         td = curthread;
  409         p = td->td_proc;
  410         PROC_LOCK_ASSERT(p, MA_OWNED);
  411         psp = p->p_sigacts;
  412         mtx_assert(&psp->ps_mtx, MA_OWNED);
  413         regs = td->td_frame;
  414         oonstack = sigonstack(regs->tf_esp);
  415 
  416         /* Save user context. */
  417         bzero(&sf, sizeof(sf));
  418         sf.sf_uc.uc_sigmask = *mask;
  419         sf.sf_uc.uc_stack = td->td_sigstk;
  420         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  421             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  422         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  423         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  424         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  425 
  426         /* Allocate space for the signal handler context. */
  427         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  428             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  429                 sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
  430                     td->td_sigstk.ss_size - sizeof(struct sigframe4));
  431 #if defined(COMPAT_43)
  432                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  433 #endif
  434         } else
  435                 sfp = (struct sigframe4 *)regs->tf_esp - 1;
  436 
  437         /* Translate the signal if appropriate. */
  438         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  439                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  440 
  441         /* Build the argument list for the signal handler. */
  442         sf.sf_signum = sig;
  443         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  444         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  445                 /* Signal handler installed with SA_SIGINFO. */
  446                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  447                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  448 
  449                 /* Fill in POSIX parts */
  450                 sf.sf_si.si_signo = sig;
  451                 sf.sf_si.si_code = code;
  452                 sf.sf_si.si_addr = (void *)regs->tf_err;
  453         } else {
  454                 /* Old FreeBSD-style arguments. */
  455                 sf.sf_siginfo = code;
  456                 sf.sf_addr = regs->tf_err;
  457                 sf.sf_ahu.sf_handler = catcher;
  458         }
  459         mtx_unlock(&psp->ps_mtx);
  460         PROC_UNLOCK(p);
  461 
  462         /*
  463          * If we're a vm86 process, we want to save the segment registers.
  464          * We also change eflags to be our emulated eflags, not the actual
  465          * eflags.
  466          */
  467         if (regs->tf_eflags & PSL_VM) {
  468                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  469                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  470 
  471                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  472                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  473                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  474                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  475 
  476                 if (vm86->vm86_has_vme == 0)
  477                         sf.sf_uc.uc_mcontext.mc_eflags =
  478                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  479                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  480 
  481                 /*
  482                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  483                  * syscalls made by the signal handler.  This just avoids
  484                  * wasting time for our lazy fixup of such faults.  PSL_NT
  485                  * does nothing in vm86 mode, but vm86 programs can set it
  486                  * almost legitimately in probes for old cpu types.
  487                  */
  488                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  489         }
  490 
  491         /*
  492          * Copy the sigframe out to the user's stack.
  493          */
  494         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  495 #ifdef DEBUG
  496                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  497 #endif
  498                 PROC_LOCK(p);
  499                 sigexit(td, SIGILL);
  500         }
  501 
  502         regs->tf_esp = (int)sfp;
  503         regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
  504         regs->tf_eflags &= ~PSL_T;
  505         regs->tf_cs = _ucodesel;
  506         regs->tf_ds = _udatasel;
  507         regs->tf_es = _udatasel;
  508         regs->tf_fs = _udatasel;
  509         regs->tf_ss = _udatasel;
  510         PROC_LOCK(p);
  511         mtx_lock(&psp->ps_mtx);
  512 }
  513 #endif  /* COMPAT_FREEBSD4 */
  514 
  515 void
  516 sendsig(catcher, sig, mask, code)
  517         sig_t catcher;
  518         int sig;
  519         sigset_t *mask;
  520         u_long code;
  521 {
  522         struct sigframe sf, *sfp;
  523         struct proc *p;
  524         struct thread *td;
  525         struct sigacts *psp;
  526         char *sp;
  527         struct trapframe *regs;
  528         int oonstack;
  529 
  530         td = curthread;
  531         p = td->td_proc;
  532         PROC_LOCK_ASSERT(p, MA_OWNED);
  533         psp = p->p_sigacts;
  534         mtx_assert(&psp->ps_mtx, MA_OWNED);
  535 #ifdef COMPAT_FREEBSD4
  536         if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
  537                 freebsd4_sendsig(catcher, sig, mask, code);
  538                 return;
  539         }
  540 #endif
  541 #ifdef COMPAT_43
  542         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  543                 osendsig(catcher, sig, mask, code);
  544                 return;
  545         }
  546 #endif
  547         regs = td->td_frame;
  548         oonstack = sigonstack(regs->tf_esp);
  549 
  550         /* Save user context. */
  551         bzero(&sf, sizeof(sf));
  552         sf.sf_uc.uc_sigmask = *mask;
  553         sf.sf_uc.uc_stack = td->td_sigstk;
  554         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  555             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  556         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  557         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  558         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  559         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  560         get_fpcontext(td, &sf.sf_uc.uc_mcontext);
  561         fpstate_drop(td);
  562 
  563         /* Allocate space for the signal handler context. */
  564         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  565             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  566                 sp = td->td_sigstk.ss_sp +
  567                     td->td_sigstk.ss_size - sizeof(struct sigframe);
  568 #if defined(COMPAT_43)
  569                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  570 #endif
  571         } else
  572                 sp = (char *)regs->tf_esp - sizeof(struct sigframe);
  573         /* Align to 16 bytes. */
  574         sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
  575 
  576         /* Translate the signal if appropriate. */
  577         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  578                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  579 
  580         /* Build the argument list for the signal handler. */
  581         sf.sf_signum = sig;
  582         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  583         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  584                 /* Signal handler installed with SA_SIGINFO. */
  585                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  586                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  587 
  588                 /* Fill in POSIX parts */
  589                 sf.sf_si.si_signo = sig;
  590                 sf.sf_si.si_code = code;
  591                 sf.sf_si.si_addr = (void *)regs->tf_err;
  592         } else {
  593                 /* Old FreeBSD-style arguments. */
  594                 sf.sf_siginfo = code;
  595                 sf.sf_addr = regs->tf_err;
  596                 sf.sf_ahu.sf_handler = catcher;
  597         }
  598         mtx_unlock(&psp->ps_mtx);
  599         PROC_UNLOCK(p);
  600 
  601         /*
  602          * If we're a vm86 process, we want to save the segment registers.
  603          * We also change eflags to be our emulated eflags, not the actual
  604          * eflags.
  605          */
  606         if (regs->tf_eflags & PSL_VM) {
  607                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  608                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  609 
  610                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  611                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  612                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  613                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  614 
  615                 if (vm86->vm86_has_vme == 0)
  616                         sf.sf_uc.uc_mcontext.mc_eflags =
  617                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  618                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  619 
  620                 /*
  621                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  622                  * syscalls made by the signal handler.  This just avoids
  623                  * wasting time for our lazy fixup of such faults.  PSL_NT
  624                  * does nothing in vm86 mode, but vm86 programs can set it
  625                  * almost legitimately in probes for old cpu types.
  626                  */
  627                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  628         }
  629 
  630         /*
  631          * Copy the sigframe out to the user's stack.
  632          */
  633         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  634 #ifdef DEBUG
  635                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  636 #endif
  637                 PROC_LOCK(p);
  638                 sigexit(td, SIGILL);
  639         }
  640 
  641         regs->tf_esp = (int)sfp;
  642         regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
  643         regs->tf_eflags &= ~PSL_T;
  644         regs->tf_cs = _ucodesel;
  645         regs->tf_ds = _udatasel;
  646         regs->tf_es = _udatasel;
  647         regs->tf_fs = _udatasel;
  648         regs->tf_ss = _udatasel;
  649         PROC_LOCK(p);
  650         mtx_lock(&psp->ps_mtx);
  651 }
  652 
  653 /*
  654  * Build siginfo_t for SA thread
  655  */
  656 void
  657 cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
  658 {
  659         struct proc *p;
  660         struct thread *td;
  661 
  662         td = curthread;
  663         p = td->td_proc;
  664         PROC_LOCK_ASSERT(p, MA_OWNED);
  665 
  666         bzero(si, sizeof(*si));
  667         si->si_signo = sig;
  668         si->si_code = code;
  669         si->si_addr = (void *)td->td_frame->tf_err;
  670         /* XXXKSE fill other fields */
  671 }
  672 
  673 /*
  674  * System call to cleanup state after a signal
  675  * has been taken.  Reset signal mask and
  676  * stack state from context left by sendsig (above).
  677  * Return to previous pc and psl as specified by
  678  * context left by sendsig. Check carefully to
  679  * make sure that the user has not modified the
  680  * state to gain improper privileges.
  681  *
  682  * MPSAFE
  683  */
  684 #ifdef COMPAT_43
  685 int
  686 osigreturn(td, uap)
  687         struct thread *td;
  688         struct osigreturn_args /* {
  689                 struct osigcontext *sigcntxp;
  690         } */ *uap;
  691 {
  692         struct osigcontext sc;
  693         struct trapframe *regs;
  694         struct osigcontext *scp;
  695         struct proc *p = td->td_proc;
  696         int eflags, error;
  697 
  698         regs = td->td_frame;
  699         error = copyin(uap->sigcntxp, &sc, sizeof(sc));
  700         if (error != 0)
  701                 return (error);
  702         scp = &sc;
  703         eflags = scp->sc_ps;
  704         if (eflags & PSL_VM) {
  705                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  706                 struct vm86_kernel *vm86;
  707 
  708                 /*
  709                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  710                  * set up the vm86 area, and we can't enter vm86 mode.
  711                  */
  712                 if (td->td_pcb->pcb_ext == 0)
  713                         return (EINVAL);
  714                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  715                 if (vm86->vm86_inited == 0)
  716                         return (EINVAL);
  717 
  718                 /* Go back to user mode if both flags are set. */
  719                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  720                         trapsignal(td, SIGBUS, 0);
  721 
  722                 if (vm86->vm86_has_vme) {
  723                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  724                             (eflags & VME_USERCHANGE) | PSL_VM;
  725                 } else {
  726                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  727                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  728                             (eflags & VM_USERCHANGE) | PSL_VM;
  729                 }
  730                 tf->tf_vm86_ds = scp->sc_ds;
  731                 tf->tf_vm86_es = scp->sc_es;
  732                 tf->tf_vm86_fs = scp->sc_fs;
  733                 tf->tf_vm86_gs = scp->sc_gs;
  734                 tf->tf_ds = _udatasel;
  735                 tf->tf_es = _udatasel;
  736                 tf->tf_fs = _udatasel;
  737         } else {
  738                 /*
  739                  * Don't allow users to change privileged or reserved flags.
  740                  */
  741                 /*
  742                  * XXX do allow users to change the privileged flag PSL_RF.
  743                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  744                  * should sometimes set it there too.  tf_eflags is kept in
  745                  * the signal context during signal handling and there is no
  746                  * other place to remember it, so the PSL_RF bit may be
  747                  * corrupted by the signal handler without us knowing.
  748                  * Corruption of the PSL_RF bit at worst causes one more or
  749                  * one less debugger trap, so allowing it is fairly harmless.
  750                  */
  751                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  752                         return (EINVAL);
  753                 }
  754 
  755                 /*
  756                  * Don't allow users to load a valid privileged %cs.  Let the
  757                  * hardware check for invalid selectors, excess privilege in
  758                  * other selectors, invalid %eip's and invalid %esp's.
  759                  */
  760                 if (!CS_SECURE(scp->sc_cs)) {
  761                         trapsignal(td, SIGBUS, T_PROTFLT);
  762                         return (EINVAL);
  763                 }
  764                 regs->tf_ds = scp->sc_ds;
  765                 regs->tf_es = scp->sc_es;
  766                 regs->tf_fs = scp->sc_fs;
  767         }
  768 
  769         /* Restore remaining registers. */
  770         regs->tf_eax = scp->sc_eax;
  771         regs->tf_ebx = scp->sc_ebx;
  772         regs->tf_ecx = scp->sc_ecx;
  773         regs->tf_edx = scp->sc_edx;
  774         regs->tf_esi = scp->sc_esi;
  775         regs->tf_edi = scp->sc_edi;
  776         regs->tf_cs = scp->sc_cs;
  777         regs->tf_ss = scp->sc_ss;
  778         regs->tf_isp = scp->sc_isp;
  779         regs->tf_ebp = scp->sc_fp;
  780         regs->tf_esp = scp->sc_sp;
  781         regs->tf_eip = scp->sc_pc;
  782         regs->tf_eflags = eflags;
  783 
  784         PROC_LOCK(p);
  785 #if defined(COMPAT_43)
  786         if (scp->sc_onstack & 1)
  787                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  788         else
  789                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  790 #endif
  791         SIGSETOLD(td->td_sigmask, scp->sc_mask);
  792         SIG_CANTMASK(td->td_sigmask);
  793         signotify(td);
  794         PROC_UNLOCK(p);
  795         return (EJUSTRETURN);
  796 }
  797 #endif /* COMPAT_43 */
  798 
  799 #ifdef COMPAT_FREEBSD4
  800 /*
  801  * MPSAFE
  802  */
  803 int
  804 freebsd4_sigreturn(td, uap)
  805         struct thread *td;
  806         struct freebsd4_sigreturn_args /* {
  807                 const ucontext4 *sigcntxp;
  808         } */ *uap;
  809 {
  810         struct ucontext4 uc;
  811         struct proc *p = td->td_proc;
  812         struct trapframe *regs;
  813         const struct ucontext4 *ucp;
  814         int cs, eflags, error;
  815 
  816         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  817         if (error != 0)
  818                 return (error);
  819         ucp = &uc;
  820         regs = td->td_frame;
  821         eflags = ucp->uc_mcontext.mc_eflags;
  822         if (eflags & PSL_VM) {
  823                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  824                 struct vm86_kernel *vm86;
  825 
  826                 /*
  827                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  828                  * set up the vm86 area, and we can't enter vm86 mode.
  829                  */
  830                 if (td->td_pcb->pcb_ext == 0)
  831                         return (EINVAL);
  832                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  833                 if (vm86->vm86_inited == 0)
  834                         return (EINVAL);
  835 
  836                 /* Go back to user mode if both flags are set. */
  837                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  838                         trapsignal(td, SIGBUS, 0);
  839 
  840                 if (vm86->vm86_has_vme) {
  841                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  842                             (eflags & VME_USERCHANGE) | PSL_VM;
  843                 } else {
  844                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  845                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  846                             (eflags & VM_USERCHANGE) | PSL_VM;
  847                 }
  848                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  849                 tf->tf_eflags = eflags;
  850                 tf->tf_vm86_ds = tf->tf_ds;
  851                 tf->tf_vm86_es = tf->tf_es;
  852                 tf->tf_vm86_fs = tf->tf_fs;
  853                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  854                 tf->tf_ds = _udatasel;
  855                 tf->tf_es = _udatasel;
  856                 tf->tf_fs = _udatasel;
  857         } else {
  858                 /*
  859                  * Don't allow users to change privileged or reserved flags.
  860                  */
  861                 /*
  862                  * XXX do allow users to change the privileged flag PSL_RF.
  863                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  864                  * should sometimes set it there too.  tf_eflags is kept in
  865                  * the signal context during signal handling and there is no
  866                  * other place to remember it, so the PSL_RF bit may be
  867                  * corrupted by the signal handler without us knowing.
  868                  * Corruption of the PSL_RF bit at worst causes one more or
  869                  * one less debugger trap, so allowing it is fairly harmless.
  870                  */
  871                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  872                         printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
  873                         return (EINVAL);
  874                 }
  875 
  876                 /*
  877                  * Don't allow users to load a valid privileged %cs.  Let the
  878                  * hardware check for invalid selectors, excess privilege in
  879                  * other selectors, invalid %eip's and invalid %esp's.
  880                  */
  881                 cs = ucp->uc_mcontext.mc_cs;
  882                 if (!CS_SECURE(cs)) {
  883                         printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
  884                         trapsignal(td, SIGBUS, T_PROTFLT);
  885                         return (EINVAL);
  886                 }
  887 
  888                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  889         }
  890 
  891         PROC_LOCK(p);
  892 #if defined(COMPAT_43)
  893         if (ucp->uc_mcontext.mc_onstack & 1)
  894                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  895         else
  896                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  897 #endif
  898 
  899         td->td_sigmask = ucp->uc_sigmask;
  900         SIG_CANTMASK(td->td_sigmask);
  901         signotify(td);
  902         PROC_UNLOCK(p);
  903         return (EJUSTRETURN);
  904 }
  905 #endif  /* COMPAT_FREEBSD4 */
  906 
  907 /*
  908  * MPSAFE
  909  */
  910 int
  911 sigreturn(td, uap)
  912         struct thread *td;
  913         struct sigreturn_args /* {
  914                 const __ucontext *sigcntxp;
  915         } */ *uap;
  916 {
  917         ucontext_t uc;
  918         struct proc *p = td->td_proc;
  919         struct trapframe *regs;
  920         const ucontext_t *ucp;
  921         int cs, eflags, error, ret;
  922 
  923         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  924         if (error != 0)
  925                 return (error);
  926         ucp = &uc;
  927         regs = td->td_frame;
  928         eflags = ucp->uc_mcontext.mc_eflags;
  929         if (eflags & PSL_VM) {
  930                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  931                 struct vm86_kernel *vm86;
  932 
  933                 /*
  934                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  935                  * set up the vm86 area, and we can't enter vm86 mode.
  936                  */
  937                 if (td->td_pcb->pcb_ext == 0)
  938                         return (EINVAL);
  939                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  940                 if (vm86->vm86_inited == 0)
  941                         return (EINVAL);
  942 
  943                 /* Go back to user mode if both flags are set. */
  944                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  945                         trapsignal(td, SIGBUS, 0);
  946 
  947                 if (vm86->vm86_has_vme) {
  948                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  949                             (eflags & VME_USERCHANGE) | PSL_VM;
  950                 } else {
  951                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  952                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  953                             (eflags & VM_USERCHANGE) | PSL_VM;
  954                 }
  955                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  956                 tf->tf_eflags = eflags;
  957                 tf->tf_vm86_ds = tf->tf_ds;
  958                 tf->tf_vm86_es = tf->tf_es;
  959                 tf->tf_vm86_fs = tf->tf_fs;
  960                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  961                 tf->tf_ds = _udatasel;
  962                 tf->tf_es = _udatasel;
  963                 tf->tf_fs = _udatasel;
  964         } else {
  965                 /*
  966                  * Don't allow users to change privileged or reserved flags.
  967                  */
  968                 /*
  969                  * XXX do allow users to change the privileged flag PSL_RF.
  970                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  971                  * should sometimes set it there too.  tf_eflags is kept in
  972                  * the signal context during signal handling and there is no
  973                  * other place to remember it, so the PSL_RF bit may be
  974                  * corrupted by the signal handler without us knowing.
  975                  * Corruption of the PSL_RF bit at worst causes one more or
  976                  * one less debugger trap, so allowing it is fairly harmless.
  977                  */
  978                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  979                         printf("sigreturn: eflags = 0x%x\n", eflags);
  980                         return (EINVAL);
  981                 }
  982 
  983                 /*
  984                  * Don't allow users to load a valid privileged %cs.  Let the
  985                  * hardware check for invalid selectors, excess privilege in
  986                  * other selectors, invalid %eip's and invalid %esp's.
  987                  */
  988                 cs = ucp->uc_mcontext.mc_cs;
  989                 if (!CS_SECURE(cs)) {
  990                         printf("sigreturn: cs = 0x%x\n", cs);
  991                         trapsignal(td, SIGBUS, T_PROTFLT);
  992                         return (EINVAL);
  993                 }
  994 
  995                 ret = set_fpcontext(td, &ucp->uc_mcontext);
  996                 if (ret != 0)
  997                         return (ret);
  998                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  999         }
 1000 
 1001         PROC_LOCK(p);
 1002 #if defined(COMPAT_43)
 1003         if (ucp->uc_mcontext.mc_onstack & 1)
 1004                 td->td_sigstk.ss_flags |= SS_ONSTACK;
 1005         else
 1006                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 1007 #endif
 1008 
 1009         td->td_sigmask = ucp->uc_sigmask;
 1010         SIG_CANTMASK(td->td_sigmask);
 1011         signotify(td);
 1012         PROC_UNLOCK(p);
 1013         return (EJUSTRETURN);
 1014 }
 1015 
 1016 /*
 1017  * Machine dependent boot() routine
 1018  *
 1019  * I haven't seen anything to put here yet
 1020  * Possibly some stuff might be grafted back here from boot()
 1021  */
 1022 void
 1023 cpu_boot(int howto)
 1024 {
 1025 }
 1026 
 1027 /* Get current clock frequency for the given cpu id. */
 1028 int
 1029 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 1030 {
 1031         register_t reg;
 1032         uint64_t tsc1, tsc2;
 1033 
 1034         if (pcpu_find(cpu_id) == NULL || rate == NULL)
 1035                 return (EINVAL);
 1036         if (!tsc_present)
 1037                 return (EOPNOTSUPP);
 1038 
 1039         /* If we're booting, trust the rate calibrated moments ago. */
 1040         if (cold) {
 1041                 *rate = tsc_freq;
 1042                 return (0);
 1043         }
 1044 
 1045 #ifdef SMP
 1046         /* Schedule ourselves on the indicated cpu. */
 1047         mtx_lock_spin(&sched_lock);
 1048         sched_bind(curthread, cpu_id);
 1049         mtx_unlock_spin(&sched_lock);
 1050 #endif
 1051 
 1052         /* Calibrate by measuring a short delay. */
 1053         reg = intr_disable();
 1054         tsc1 = rdtsc();
 1055         DELAY(1000);
 1056         tsc2 = rdtsc();
 1057         intr_restore(reg);
 1058 
 1059 #ifdef SMP
 1060         mtx_lock_spin(&sched_lock);
 1061         sched_unbind(curthread);
 1062         mtx_unlock_spin(&sched_lock);
 1063 #endif
 1064 
 1065         /*
 1066          * Calculate the difference in readings, convert to Mhz, and
 1067          * subtract 0.5% of the total.  Empirical testing has shown that
 1068          * overhead in DELAY() works out to approximately this value.
 1069          */
 1070         tsc2 -= tsc1;
 1071         *rate = tsc2 * 1000 - tsc2 * 5;
 1072         return (0);
 1073 }
 1074 
 1075 /*
 1076  * Shutdown the CPU as much as possible
 1077  */
 1078 void
 1079 cpu_halt(void)
 1080 {
 1081         for (;;)
 1082                 __asm__ ("hlt");
 1083 }
 1084 
 1085 /*
 1086  * Hook to idle the CPU when possible.  In the SMP case we default to
 1087  * off because a halted cpu will not currently pick up a new thread in the
 1088  * run queue until the next timer tick.  If turned on this will result in
 1089  * approximately a 4.2% loss in real time performance in buildworld tests
 1090  * (but improves user and sys times oddly enough), and saves approximately
 1091  * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
 1092  *
 1093  * XXX we need to have a cpu mask of idle cpus and generate an IPI or
 1094  * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
 1095  * Then we can have our cake and eat it too.
 1096  *
 1097  * XXX I'm turning it on for SMP as well by default for now.  It seems to
 1098  * help lock contention somewhat, and this is critical for HTT. -Peter
 1099  */
 1100 static int      cpu_idle_hlt = 1;
 1101 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
 1102     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 1103 
 1104 static void
 1105 cpu_idle_default(void)
 1106 {
 1107         /*
 1108          * we must absolutely guarentee that hlt is the
 1109          * absolute next instruction after sti or we
 1110          * introduce a timing window.
 1111          */
 1112         __asm __volatile("sti; hlt");
 1113 }
 1114 
 1115 /*
 1116  * Note that we have to be careful here to avoid a race between checking
 1117  * sched_runnable() and actually halting.  If we don't do this, we may waste
 1118  * the time between calling hlt and the next interrupt even though there
 1119  * is a runnable process.
 1120  */
 1121 void
 1122 cpu_idle(void)
 1123 {
 1124 
 1125 #ifdef SMP
 1126         if (mp_grab_cpu_hlt())
 1127                 return;
 1128 #endif
 1129 
 1130         if (cpu_idle_hlt) {
 1131                 disable_intr();
 1132                 if (sched_runnable())
 1133                         enable_intr();
 1134                 else
 1135                         (*cpu_idle_hook)();
 1136         }
 1137 }
 1138 
 1139 /* Other subsystems (e.g., ACPI) can hook this later. */
 1140 void (*cpu_idle_hook)(void) = cpu_idle_default;
 1141 
 1142 /*
 1143  * Clear registers on exec
 1144  */
 1145 void
 1146 exec_setregs(td, entry, stack, ps_strings)
 1147         struct thread *td;
 1148         u_long entry;
 1149         u_long stack;
 1150         u_long ps_strings;
 1151 {
 1152         struct trapframe *regs = td->td_frame;
 1153         struct pcb *pcb = td->td_pcb;
 1154 
 1155         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1156         pcb->pcb_gs = _udatasel;
 1157         load_gs(_udatasel);
 1158 
 1159         if (td->td_proc->p_md.md_ldt)
 1160                 user_ldt_free(td);
 1161   
 1162         bzero((char *)regs, sizeof(struct trapframe));
 1163         regs->tf_eip = entry;
 1164         regs->tf_esp = stack;
 1165         regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 1166         regs->tf_ss = _udatasel;
 1167         regs->tf_ds = _udatasel;
 1168         regs->tf_es = _udatasel;
 1169         regs->tf_fs = _udatasel;
 1170         regs->tf_cs = _ucodesel;
 1171 
 1172         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1173         regs->tf_ebx = ps_strings;
 1174 
 1175         /*
 1176          * Reset the hardware debug registers if they were in use.
 1177          * They won't have any meaning for the newly exec'd process.  
 1178          */
 1179         if (pcb->pcb_flags & PCB_DBREGS) {
 1180                 pcb->pcb_dr0 = 0;
 1181                 pcb->pcb_dr1 = 0;
 1182                 pcb->pcb_dr2 = 0;
 1183                 pcb->pcb_dr3 = 0;
 1184                 pcb->pcb_dr6 = 0;
 1185                 pcb->pcb_dr7 = 0;
 1186                 if (pcb == PCPU_GET(curpcb)) {
 1187                         /*
 1188                          * Clear the debug registers on the running
 1189                          * CPU, otherwise they will end up affecting
 1190                          * the next process we switch to.
 1191                          */
 1192                         reset_dbregs();
 1193                 }
 1194                 pcb->pcb_flags &= ~PCB_DBREGS;
 1195         }
 1196 
 1197         /*
 1198          * Initialize the math emulator (if any) for the current process.
 1199          * Actually, just clear the bit that says that the emulator has
 1200          * been initialized.  Initialization is delayed until the process
 1201          * traps to the emulator (if it is done at all) mainly because
 1202          * emulators don't provide an entry point for initialization.
 1203          */
 1204         td->td_pcb->pcb_flags &= ~FP_SOFTFP;
 1205 
 1206         /*
 1207          * Drop the FP state if we hold it, so that the process gets a
 1208          * clean FP state if it uses the FPU again.
 1209          */
 1210         fpstate_drop(td);
 1211 
 1212         /*
 1213          * XXX - Linux emulator
 1214          * Make sure sure edx is 0x0 on entry. Linux binaries depend
 1215          * on it.
 1216          */
 1217         td->td_retval[1] = 0;
 1218 }
 1219 
 1220 void
 1221 cpu_setregs(void)
 1222 {
 1223         unsigned int cr0;
 1224 
 1225         cr0 = rcr0();
 1226         /*
 1227          * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
 1228          * BSP.  See the comments there about why we set them.
 1229          */
 1230         cr0 |= CR0_MP | CR0_NE | CR0_TS;
 1231 #ifndef I386_CPU
 1232         cr0 |= CR0_WP | CR0_AM;
 1233 #endif
 1234         load_cr0(cr0);
 1235         load_gs(_udatasel);
 1236 }
 1237 
 1238 static int
 1239 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 1240 {
 1241         int error;
 1242         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 1243                 req);
 1244         if (!error && req->newptr)
 1245                 resettodr();
 1246         return (error);
 1247 }
 1248 
 1249 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 1250         &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 1251 
 1252 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 1253         CTLFLAG_RW, &disable_rtc_set, 0, "");
 1254 
 1255 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 1256         CTLFLAG_RD, &bootinfo, bootinfo, "");
 1257 
 1258 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 1259         CTLFLAG_RW, &wall_cmos_clock, 0, "");
 1260 
 1261 u_long bootdev;         /* not a struct cdev *- encoding is different */
 1262 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1263         CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 1264 
 1265 /*
 1266  * Initialize 386 and configure to run kernel
 1267  */
 1268 
 1269 /*
 1270  * Initialize segments & interrupt table
 1271  */
 1272 
 1273 int _default_ldt;
 1274 union descriptor gdt[NGDT * MAXCPU];    /* global descriptor table */
 1275 static struct gate_descriptor idt0[NIDT];
 1276 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1277 union descriptor ldt[NLDT];             /* local descriptor table */
 1278 struct region_descriptor r_gdt, r_idt;  /* table descriptors */
 1279 
 1280 int private_tss;                        /* flag indicating private tss */
 1281 
 1282 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 1283 extern int has_f00f_bug;
 1284 #endif
 1285 
 1286 static struct i386tss dblfault_tss;
 1287 static char dblfault_stack[PAGE_SIZE];
 1288 
 1289 extern  vm_offset_t     proc0kstack;
 1290 
 1291 
 1292 /* software prototypes -- in more palatable form */
 1293 struct soft_segment_descriptor gdt_segs[] = {
 1294 /* GNULL_SEL    0 Null Descriptor */
 1295 {       0x0,                    /* segment base address  */
 1296         0x0,                    /* length */
 1297         0,                      /* segment type */
 1298         0,                      /* segment descriptor priority level */
 1299         0,                      /* segment descriptor present */
 1300         0, 0,
 1301         0,                      /* default 32 vs 16 bit size */
 1302         0                       /* limit granularity (byte/page units)*/ },
 1303 /* GCODE_SEL    1 Code Descriptor for kernel */
 1304 {       0x0,                    /* segment base address  */
 1305         0xfffff,                /* length - all address space */
 1306         SDT_MEMERA,             /* segment type */
 1307         0,                      /* segment descriptor priority level */
 1308         1,                      /* segment descriptor present */
 1309         0, 0,
 1310         1,                      /* default 32 vs 16 bit size */
 1311         1                       /* limit granularity (byte/page units)*/ },
 1312 /* GDATA_SEL    2 Data Descriptor for kernel */
 1313 {       0x0,                    /* segment base address  */
 1314         0xfffff,                /* length - all address space */
 1315         SDT_MEMRWA,             /* segment type */
 1316         0,                      /* segment descriptor priority level */
 1317         1,                      /* segment descriptor present */
 1318         0, 0,
 1319         1,                      /* default 32 vs 16 bit size */
 1320         1                       /* limit granularity (byte/page units)*/ },
 1321 /* GPRIV_SEL    3 SMP Per-Processor Private Data Descriptor */
 1322 {       0x0,                    /* segment base address  */
 1323         0xfffff,                /* length - all address space */
 1324         SDT_MEMRWA,             /* segment type */
 1325         0,                      /* segment descriptor priority level */
 1326         1,                      /* segment descriptor present */
 1327         0, 0,
 1328         1,                      /* default 32 vs 16 bit size */
 1329         1                       /* limit granularity (byte/page units)*/ },
 1330 /* GPROC0_SEL   4 Proc 0 Tss Descriptor */
 1331 {
 1332         0x0,                    /* segment base address */
 1333         sizeof(struct i386tss)-1,/* length  */
 1334         SDT_SYS386TSS,          /* segment type */
 1335         0,                      /* segment descriptor priority level */
 1336         1,                      /* segment descriptor present */
 1337         0, 0,
 1338         0,                      /* unused - default 32 vs 16 bit size */
 1339         0                       /* limit granularity (byte/page units)*/ },
 1340 /* GLDT_SEL     5 LDT Descriptor */
 1341 {       (int) ldt,              /* segment base address  */
 1342         sizeof(ldt)-1,          /* length - all address space */
 1343         SDT_SYSLDT,             /* segment type */
 1344         SEL_UPL,                /* segment descriptor priority level */
 1345         1,                      /* segment descriptor present */
 1346         0, 0,
 1347         0,                      /* unused - default 32 vs 16 bit size */
 1348         0                       /* limit granularity (byte/page units)*/ },
 1349 /* GUSERLDT_SEL 6 User LDT Descriptor per process */
 1350 {       (int) ldt,              /* segment base address  */
 1351         (512 * sizeof(union descriptor)-1),             /* length */
 1352         SDT_SYSLDT,             /* segment type */
 1353         0,                      /* segment descriptor priority level */
 1354         1,                      /* segment descriptor present */
 1355         0, 0,
 1356         0,                      /* unused - default 32 vs 16 bit size */
 1357         0                       /* limit granularity (byte/page units)*/ },
 1358 /* GTGATE_SEL   7 Null Descriptor - Placeholder */
 1359 {       0x0,                    /* segment base address  */
 1360         0x0,                    /* length - all address space */
 1361         0,                      /* segment type */
 1362         0,                      /* segment descriptor priority level */
 1363         0,                      /* segment descriptor present */
 1364         0, 0,
 1365         0,                      /* default 32 vs 16 bit size */
 1366         0                       /* limit granularity (byte/page units)*/ },
 1367 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1368 {       0x400,                  /* segment base address */
 1369         0xfffff,                /* length */
 1370         SDT_MEMRWA,             /* segment type */
 1371         0,                      /* segment descriptor priority level */
 1372         1,                      /* segment descriptor present */
 1373         0, 0,
 1374         1,                      /* default 32 vs 16 bit size */
 1375         1                       /* limit granularity (byte/page units)*/ },
 1376 /* GPANIC_SEL   9 Panic Tss Descriptor */
 1377 {       (int) &dblfault_tss,    /* segment base address  */
 1378         sizeof(struct i386tss)-1,/* length - all address space */
 1379         SDT_SYS386TSS,          /* segment type */
 1380         0,                      /* segment descriptor priority level */
 1381         1,                      /* segment descriptor present */
 1382         0, 0,
 1383         0,                      /* unused - default 32 vs 16 bit size */
 1384         0                       /* limit granularity (byte/page units)*/ },
 1385 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 1386 {       0,                      /* segment base address (overwritten)  */
 1387         0xfffff,                /* length */
 1388         SDT_MEMERA,             /* segment type */
 1389         0,                      /* segment descriptor priority level */
 1390         1,                      /* segment descriptor present */
 1391         0, 0,
 1392         0,                      /* default 32 vs 16 bit size */
 1393         1                       /* limit granularity (byte/page units)*/ },
 1394 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 1395 {       0,                      /* segment base address (overwritten)  */
 1396         0xfffff,                /* length */
 1397         SDT_MEMERA,             /* segment type */
 1398         0,                      /* segment descriptor priority level */
 1399         1,                      /* segment descriptor present */
 1400         0, 0,
 1401         0,                      /* default 32 vs 16 bit size */
 1402         1                       /* limit granularity (byte/page units)*/ },
 1403 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 1404 {       0,                      /* segment base address (overwritten) */
 1405         0xfffff,                /* length */
 1406         SDT_MEMRWA,             /* segment type */
 1407         0,                      /* segment descriptor priority level */
 1408         1,                      /* segment descriptor present */
 1409         0, 0,
 1410         1,                      /* default 32 vs 16 bit size */
 1411         1                       /* limit granularity (byte/page units)*/ },
 1412 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 1413 {       0,                      /* segment base address (overwritten) */
 1414         0xfffff,                /* length */
 1415         SDT_MEMRWA,             /* segment type */
 1416         0,                      /* segment descriptor priority level */
 1417         1,                      /* segment descriptor present */
 1418         0, 0,
 1419         0,                      /* default 32 vs 16 bit size */
 1420         1                       /* limit granularity (byte/page units)*/ },
 1421 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 1422 {       0,                      /* segment base address (overwritten) */
 1423         0xfffff,                /* length */
 1424         SDT_MEMRWA,             /* segment type */
 1425         0,                      /* segment descriptor priority level */
 1426         1,                      /* segment descriptor present */
 1427         0, 0,
 1428         0,                      /* default 32 vs 16 bit size */
 1429         1                       /* limit granularity (byte/page units)*/ },
 1430 };
 1431 
 1432 static struct soft_segment_descriptor ldt_segs[] = {
 1433         /* Null Descriptor - overwritten by call gate */
 1434 {       0x0,                    /* segment base address  */
 1435         0x0,                    /* length - all address space */
 1436         0,                      /* segment type */
 1437         0,                      /* segment descriptor priority level */
 1438         0,                      /* segment descriptor present */
 1439         0, 0,
 1440         0,                      /* default 32 vs 16 bit size */
 1441         0                       /* limit granularity (byte/page units)*/ },
 1442         /* Null Descriptor - overwritten by call gate */
 1443 {       0x0,                    /* segment base address  */
 1444         0x0,                    /* length - all address space */
 1445         0,                      /* segment type */
 1446         0,                      /* segment descriptor priority level */
 1447         0,                      /* segment descriptor present */
 1448         0, 0,
 1449         0,                      /* default 32 vs 16 bit size */
 1450         0                       /* limit granularity (byte/page units)*/ },
 1451         /* Null Descriptor - overwritten by call gate */
 1452 {       0x0,                    /* segment base address  */
 1453         0x0,                    /* length - all address space */
 1454         0,                      /* segment type */
 1455         0,                      /* segment descriptor priority level */
 1456         0,                      /* segment descriptor present */
 1457         0, 0,
 1458         0,                      /* default 32 vs 16 bit size */
 1459         0                       /* limit granularity (byte/page units)*/ },
 1460         /* Code Descriptor for user */
 1461 {       0x0,                    /* segment base address  */
 1462         0xfffff,                /* length - all address space */
 1463         SDT_MEMERA,             /* segment type */
 1464         SEL_UPL,                /* segment descriptor priority level */
 1465         1,                      /* segment descriptor present */
 1466         0, 0,
 1467         1,                      /* default 32 vs 16 bit size */
 1468         1                       /* limit granularity (byte/page units)*/ },
 1469         /* Null Descriptor - overwritten by call gate */
 1470 {       0x0,                    /* segment base address  */
 1471         0x0,                    /* length - all address space */
 1472         0,                      /* segment type */
 1473         0,                      /* segment descriptor priority level */
 1474         0,                      /* segment descriptor present */
 1475         0, 0,
 1476         0,                      /* default 32 vs 16 bit size */
 1477         0                       /* limit granularity (byte/page units)*/ },
 1478         /* Data Descriptor for user */
 1479 {       0x0,                    /* segment base address  */
 1480         0xfffff,                /* length - all address space */
 1481         SDT_MEMRWA,             /* segment type */
 1482         SEL_UPL,                /* segment descriptor priority level */
 1483         1,                      /* segment descriptor present */
 1484         0, 0,
 1485         1,                      /* default 32 vs 16 bit size */
 1486         1                       /* limit granularity (byte/page units)*/ },
 1487 };
 1488 
 1489 void
 1490 setidt(idx, func, typ, dpl, selec)
 1491         int idx;
 1492         inthand_t *func;
 1493         int typ;
 1494         int dpl;
 1495         int selec;
 1496 {
 1497         struct gate_descriptor *ip;
 1498 
 1499         ip = idt + idx;
 1500         ip->gd_looffset = (int)func;
 1501         ip->gd_selector = selec;
 1502         ip->gd_stkcpy = 0;
 1503         ip->gd_xx = 0;
 1504         ip->gd_type = typ;
 1505         ip->gd_dpl = dpl;
 1506         ip->gd_p = 1;
 1507         ip->gd_hioffset = ((int)func)>>16 ;
 1508 }
 1509 
 1510 #define IDTVEC(name)    __CONCAT(X,name)
 1511 
 1512 extern inthand_t
 1513         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1514         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1515         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1516         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1517         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 1518 
 1519 #ifdef DDB
 1520 /*
 1521  * Display the index and function name of any IDT entries that don't use
 1522  * the default 'rsvd' entry point.
 1523  */
 1524 DB_SHOW_COMMAND(idt, db_show_idt)
 1525 {
 1526         struct gate_descriptor *ip;
 1527         int idx, quit;
 1528         uintptr_t func;
 1529 
 1530         ip = idt;
 1531         db_setup_paging(db_simple_pager, &quit, db_lines_per_page);
 1532         for (idx = 0, quit = 0; idx < NIDT; idx++) {
 1533                 func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 1534                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
 1535                         db_printf("%3d\t", idx);
 1536                         db_printsym(func, DB_STGY_PROC);
 1537                         db_printf("\n");
 1538                 }
 1539                 ip++;
 1540         }
 1541 }
 1542 #endif
 1543 
 1544 void
 1545 sdtossd(sd, ssd)
 1546         struct segment_descriptor *sd;
 1547         struct soft_segment_descriptor *ssd;
 1548 {
 1549         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1550         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1551         ssd->ssd_type  = sd->sd_type;
 1552         ssd->ssd_dpl   = sd->sd_dpl;
 1553         ssd->ssd_p     = sd->sd_p;
 1554         ssd->ssd_def32 = sd->sd_def32;
 1555         ssd->ssd_gran  = sd->sd_gran;
 1556 }
 1557 
 1558 #define PHYSMAP_SIZE    (2 * 8)
 1559 
 1560 /*
 1561  * Populate the (physmap) array with base/bound pairs describing the
 1562  * available physical memory in the system, then test this memory and
 1563  * build the phys_avail array describing the actually-available memory.
 1564  *
 1565  * If we cannot accurately determine the physical memory map, then use
 1566  * value from the 0xE801 call, and failing that, the RTC.
 1567  *
 1568  * Total memory size may be set by the kernel environment variable
 1569  * hw.physmem or the compile-time define MAXMEM.
 1570  *
 1571  * XXX first should be vm_paddr_t.
 1572  */
 1573 static void
 1574 getmemsize(int first)
 1575 {
 1576         int i, physmap_idx, pa_indx;
 1577         int hasbrokenint12;
 1578         u_int extmem;
 1579         struct vm86frame vmf;
 1580         struct vm86context vmc;
 1581         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1582         pt_entry_t *pte;
 1583         char *cp;
 1584         struct bios_smap *smap;
 1585 
 1586         hasbrokenint12 = 0;
 1587         TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 1588         bzero(&vmf, sizeof(vmf));
 1589         bzero(physmap, sizeof(physmap));
 1590         basemem = 0;
 1591 
 1592         /*
 1593          * Some newer BIOSes has broken INT 12H implementation which cause
 1594          * kernel panic immediately. In this case, we need to scan SMAP
 1595          * with INT 15:E820 first, then determine base memory size.
 1596          */
 1597         if (hasbrokenint12) {
 1598                 goto int15e820;
 1599         }
 1600 
 1601         /*
 1602          * Perform "base memory" related probes & setup
 1603          */
 1604         vm86_intcall(0x12, &vmf);
 1605         basemem = vmf.vmf_ax;
 1606         if (basemem > 640) {
 1607                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1608                         basemem);
 1609                 basemem = 640;
 1610         }
 1611 
 1612         /*
 1613          * XXX if biosbasemem is now < 640, there is a `hole'
 1614          * between the end of base memory and the start of
 1615          * ISA memory.  The hole may be empty or it may
 1616          * contain BIOS code or data.  Map it read/write so
 1617          * that the BIOS can write to it.  (Memory from 0 to
 1618          * the physical end of the kernel is mapped read-only
 1619          * to begin with and then parts of it are remapped.
 1620          * The parts that aren't remapped form holes that
 1621          * remain read-only and are unused by the kernel.
 1622          * The base memory area is below the physical end of
 1623          * the kernel and right now forms a read-only hole.
 1624          * The part of it from PAGE_SIZE to
 1625          * (trunc_page(biosbasemem * 1024) - 1) will be
 1626          * remapped and used by the kernel later.)
 1627          *
 1628          * This code is similar to the code used in
 1629          * pmap_mapdev, but since no memory needs to be
 1630          * allocated we simply change the mapping.
 1631          */
 1632         for (pa = trunc_page(basemem * 1024);
 1633              pa < ISA_HOLE_START; pa += PAGE_SIZE)
 1634                 pmap_kenter(KERNBASE + pa, pa);
 1635 
 1636         /*
 1637          * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 1638          * the vm86 page table so that vm86 can scribble on them using
 1639          * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 1640          * page 0, at least as initialized here?
 1641          */
 1642         pte = (pt_entry_t *)vm86paddr;
 1643         for (i = basemem / 4; i < 160; i++)
 1644                 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1645 
 1646 int15e820:
 1647         /*
 1648          * map page 1 R/W into the kernel page table so we can use it
 1649          * as a buffer.  The kernel will unmap this page later.
 1650          */
 1651         pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 1652 
 1653         /*
 1654          * get memory map with INT 15:E820
 1655          */
 1656         vmc.npages = 0;
 1657         smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 1658         vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 1659 
 1660         physmap_idx = 0;
 1661         vmf.vmf_ebx = 0;
 1662         do {
 1663                 vmf.vmf_eax = 0xE820;
 1664                 vmf.vmf_edx = SMAP_SIG;
 1665                 vmf.vmf_ecx = sizeof(struct bios_smap);
 1666                 i = vm86_datacall(0x15, &vmf, &vmc);
 1667                 if (i || vmf.vmf_eax != SMAP_SIG)
 1668                         break;
 1669                 if (boothowto & RB_VERBOSE)
 1670                         printf("SMAP type=%02x base=%016llx len=%016llx\n",
 1671                             smap->type, smap->base, smap->length);
 1672 
 1673                 if (smap->type != 0x01)
 1674                         goto next_run;
 1675 
 1676                 if (smap->length == 0)
 1677                         goto next_run;
 1678 
 1679 #ifndef PAE
 1680                 if (smap->base >= 0xffffffff) {
 1681                         printf("%uK of memory above 4GB ignored\n",
 1682                             (u_int)(smap->length / 1024));
 1683                         goto next_run;
 1684                 }
 1685 #endif
 1686 
 1687                 for (i = 0; i <= physmap_idx; i += 2) {
 1688                         if (smap->base < physmap[i + 1]) {
 1689                                 if (boothowto & RB_VERBOSE)
 1690                                         printf(
 1691         "Overlapping or non-montonic memory region, ignoring second region\n");
 1692                                 goto next_run;
 1693                         }
 1694                 }
 1695 
 1696                 if (smap->base == physmap[physmap_idx + 1]) {
 1697                         physmap[physmap_idx + 1] += smap->length;
 1698                         goto next_run;
 1699                 }
 1700 
 1701                 physmap_idx += 2;
 1702                 if (physmap_idx == PHYSMAP_SIZE) {
 1703                         printf(
 1704                 "Too many segments in the physical address map, giving up\n");
 1705                         break;
 1706                 }
 1707                 physmap[physmap_idx] = smap->base;
 1708                 physmap[physmap_idx + 1] = smap->base + smap->length;
 1709 next_run: ;
 1710         } while (vmf.vmf_ebx != 0);
 1711 
 1712         /*
 1713          * Perform "base memory" related probes & setup based on SMAP
 1714          */
 1715         if (basemem == 0) {
 1716                 for (i = 0; i <= physmap_idx; i += 2) {
 1717                         if (physmap[i] == 0x00000000) {
 1718                                 basemem = physmap[i + 1] / 1024;
 1719                                 break;
 1720                         }
 1721                 }
 1722 
 1723                 /*
 1724                  * XXX this function is horribly organized and has to the same
 1725                  * things that it does above here.
 1726                  */
 1727                 if (basemem == 0)
 1728                         basemem = 640;
 1729                 if (basemem > 640) {
 1730                         printf(
 1731                     "Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1732                             basemem);
 1733                         basemem = 640;
 1734                 }
 1735 
 1736                 /*
 1737                  * Let vm86 scribble on pages between basemem and
 1738                  * ISA_HOLE_START, as above.
 1739                  */
 1740                 for (pa = trunc_page(basemem * 1024);
 1741                      pa < ISA_HOLE_START; pa += PAGE_SIZE)
 1742                         pmap_kenter(KERNBASE + pa, pa);
 1743                 pte = (pt_entry_t *)vm86paddr;
 1744                 for (i = basemem / 4; i < 160; i++)
 1745                         pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1746         }
 1747 
 1748         if (physmap[1] != 0)
 1749                 goto physmap_done;
 1750 
 1751         /*
 1752          * If we failed above, try memory map with INT 15:E801
 1753          */
 1754         vmf.vmf_ax = 0xE801;
 1755         if (vm86_intcall(0x15, &vmf) == 0) {
 1756                 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 1757         } else {
 1758 #if 0
 1759                 vmf.vmf_ah = 0x88;
 1760                 vm86_intcall(0x15, &vmf);
 1761                 extmem = vmf.vmf_ax;
 1762 #else
 1763                 /*
 1764                  * Prefer the RTC value for extended memory.
 1765                  */
 1766                 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 1767 #endif
 1768         }
 1769 
 1770         /*
 1771          * Special hack for chipsets that still remap the 384k hole when
 1772          * there's 16MB of memory - this really confuses people that
 1773          * are trying to use bus mastering ISA controllers with the
 1774          * "16MB limit"; they only have 16MB, but the remapping puts
 1775          * them beyond the limit.
 1776          *
 1777          * If extended memory is between 15-16MB (16-17MB phys address range),
 1778          *      chop it to 15MB.
 1779          */
 1780         if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 1781                 extmem = 15 * 1024;
 1782 
 1783         physmap[0] = 0;
 1784         physmap[1] = basemem * 1024;
 1785         physmap_idx = 2;
 1786         physmap[physmap_idx] = 0x100000;
 1787         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1788 
 1789 physmap_done:
 1790         /*
 1791          * Now, physmap contains a map of physical memory.
 1792          */
 1793 
 1794 #ifdef SMP
 1795         /* make hole for AP bootstrap code */
 1796         physmap[1] = mp_bootaddress(physmap[1]);
 1797 #endif
 1798 
 1799         /*
 1800          * Maxmem isn't the "maximum memory", it's one larger than the
 1801          * highest page of the physical address space.  It should be
 1802          * called something like "Maxphyspage".  We may adjust this 
 1803          * based on ``hw.physmem'' and the results of the memory test.
 1804          */
 1805         Maxmem = atop(physmap[physmap_idx + 1]);
 1806 
 1807 #ifdef MAXMEM
 1808         Maxmem = MAXMEM / 4;
 1809 #endif
 1810 
 1811         /*
 1812          * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 1813          * for the appropriate modifiers.  This overrides MAXMEM.
 1814          */
 1815         if ((cp = getenv("hw.physmem")) != NULL) {
 1816                 u_int64_t AllowMem, sanity;
 1817                 char *ep;
 1818 
 1819                 sanity = AllowMem = strtouq(cp, &ep, 0);
 1820                 if ((ep != cp) && (*ep != 0)) {
 1821                         switch(*ep) {
 1822                         case 'g':
 1823                         case 'G':
 1824                                 AllowMem <<= 10;
 1825                         case 'm':
 1826                         case 'M':
 1827                                 AllowMem <<= 10;
 1828                         case 'k':
 1829                         case 'K':
 1830                                 AllowMem <<= 10;
 1831                                 break;
 1832                         default:
 1833                                 AllowMem = sanity = 0;
 1834                         }
 1835                         if (AllowMem < sanity)
 1836                                 AllowMem = 0;
 1837                 }
 1838                 if (AllowMem == 0)
 1839                         printf("Ignoring invalid memory size of '%s'\n", cp);
 1840                 else
 1841                         Maxmem = atop(AllowMem);
 1842                 freeenv(cp);
 1843         }
 1844 
 1845         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1846             (boothowto & RB_VERBOSE))
 1847                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1848 
 1849         /*
 1850          * If Maxmem has been increased beyond what the system has detected,
 1851          * extend the last memory segment to the new limit.
 1852          */ 
 1853         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 1854                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 1855 
 1856         /* call pmap initialization to make new kernel address space */
 1857         pmap_bootstrap(first, 0);
 1858 
 1859         /*
 1860          * Size up each available chunk of physical memory.
 1861          */
 1862         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 1863         pa_indx = 0;
 1864         phys_avail[pa_indx++] = physmap[0];
 1865         phys_avail[pa_indx] = physmap[0];
 1866         pte = CMAP1;
 1867 
 1868         /*
 1869          * physmap is in bytes, so when converting to page boundaries,
 1870          * round up the start address and round down the end address.
 1871          */
 1872         for (i = 0; i <= physmap_idx; i += 2) {
 1873                 vm_paddr_t end;
 1874 
 1875                 end = ptoa((vm_paddr_t)Maxmem);
 1876                 if (physmap[i + 1] < end)
 1877                         end = trunc_page(physmap[i + 1]);
 1878                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1879                         int tmp, page_bad;
 1880                         int *ptr = (int *)CADDR1;
 1881 
 1882                         /*
 1883                          * block out kernel memory as not available.
 1884                          */
 1885                         if (pa >= KERNLOAD && pa < first)
 1886                                 continue;
 1887         
 1888                         page_bad = FALSE;
 1889 
 1890                         /*
 1891                          * map page into kernel: valid, read/write,non-cacheable
 1892                          */
 1893                         *pte = pa | PG_V | PG_RW | PG_N;
 1894                         invltlb();
 1895 
 1896                         tmp = *(int *)ptr;
 1897                         /*
 1898                          * Test for alternating 1's and 0's
 1899                          */
 1900                         *(volatile int *)ptr = 0xaaaaaaaa;
 1901                         if (*(volatile int *)ptr != 0xaaaaaaaa) {
 1902                                 page_bad = TRUE;
 1903                         }
 1904                         /*
 1905                          * Test for alternating 0's and 1's
 1906                          */
 1907                         *(volatile int *)ptr = 0x55555555;
 1908                         if (*(volatile int *)ptr != 0x55555555) {
 1909                         page_bad = TRUE;
 1910                         }
 1911                         /*
 1912                          * Test for all 1's
 1913                          */
 1914                         *(volatile int *)ptr = 0xffffffff;
 1915                         if (*(volatile int *)ptr != 0xffffffff) {
 1916                                 page_bad = TRUE;
 1917                         }
 1918                         /*
 1919                          * Test for all 0's
 1920                          */
 1921                         *(volatile int *)ptr = 0x0;
 1922                         if (*(volatile int *)ptr != 0x0) {
 1923                                 page_bad = TRUE;
 1924                         }
 1925                         /*
 1926                          * Restore original value.
 1927                          */
 1928                         *(int *)ptr = tmp;
 1929 
 1930                         /*
 1931                          * Adjust array of valid/good pages.
 1932                          */
 1933                         if (page_bad == TRUE) {
 1934                                 continue;
 1935                         }
 1936                         /*
 1937                          * If this good page is a continuation of the
 1938                          * previous set of good pages, then just increase
 1939                          * the end pointer. Otherwise start a new chunk.
 1940                          * Note that "end" points one higher than end,
 1941                          * making the range >= start and < end.
 1942                          * If we're also doing a speculative memory
 1943                          * test and we at or past the end, bump up Maxmem
 1944                          * so that we keep going. The first bad page
 1945                          * will terminate the loop.
 1946                          */
 1947                         if (phys_avail[pa_indx] == pa) {
 1948                                 phys_avail[pa_indx] += PAGE_SIZE;
 1949                         } else {
 1950                                 pa_indx++;
 1951                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1952                                         printf(
 1953                 "Too many holes in the physical address space, giving up\n");
 1954                                         pa_indx--;
 1955                                         break;
 1956                                 }
 1957                                 phys_avail[pa_indx++] = pa;     /* start */
 1958                                 phys_avail[pa_indx] = pa + PAGE_SIZE;   /* end */
 1959                         }
 1960                         physmem++;
 1961                 }
 1962         }
 1963         *pte = 0;
 1964         invltlb();
 1965 
 1966         /*
 1967          * XXX
 1968          * The last chunk must contain at least one page plus the message
 1969          * buffer to avoid complicating other code (message buffer address
 1970          * calculation, etc.).
 1971          */
 1972         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1973             round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 1974                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1975                 phys_avail[pa_indx--] = 0;
 1976                 phys_avail[pa_indx--] = 0;
 1977         }
 1978 
 1979         Maxmem = atop(phys_avail[pa_indx]);
 1980 
 1981         /* Trim off space for the message buffer. */
 1982         phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 1983 
 1984         avail_end = phys_avail[pa_indx];
 1985 }
 1986 
 1987 void
 1988 init386(first)
 1989         int first;
 1990 {
 1991         struct gate_descriptor *gdp;
 1992         int gsel_tss, metadata_missing, off, x;
 1993         struct pcpu *pc;
 1994 
 1995         thread0.td_kstack = proc0kstack;
 1996         thread0.td_pcb = (struct pcb *)
 1997            (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 1998 
 1999         /*
 2000          * This may be done better later if it gets more high level
 2001          * components in it. If so just link td->td_proc here.
 2002          */
 2003         proc_linkup(&proc0, &ksegrp0, &thread0);
 2004 
 2005         metadata_missing = 0;
 2006         if (bootinfo.bi_modulep) {
 2007                 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 2008                 preload_bootstrap_relocate(KERNBASE);
 2009         } else {
 2010                 metadata_missing = 1;
 2011         }
 2012         if (envmode == 1)
 2013                 kern_envp = static_env;
 2014         else if (bootinfo.bi_envp)
 2015                 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 2016 
 2017         /* Init basic tunables, hz etc */
 2018         init_param1();
 2019 
 2020         /*
 2021          * make gdt memory segments, the code segment goes up to end of the
 2022          * page with etext in it, the data segment goes to the end of
 2023          * the address space
 2024          */
 2025         /*
 2026          * XXX text protection is temporarily (?) disabled.  The limit was
 2027          * i386_btop(round_page(etext)) - 1.
 2028          */
 2029         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 2030         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 2031 #ifdef SMP
 2032         pc = &SMP_prvspace[0].pcpu;
 2033         gdt_segs[GPRIV_SEL].ssd_limit =
 2034                 atop(sizeof(struct privatespace) - 1);
 2035 #else
 2036         pc = &__pcpu;
 2037         gdt_segs[GPRIV_SEL].ssd_limit =
 2038                 atop(sizeof(struct pcpu) - 1);
 2039 #endif
 2040         gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 2041         gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 2042 
 2043         for (x = 0; x < NGDT; x++)
 2044                 ssdtosd(&gdt_segs[x], &gdt[x].sd);
 2045 
 2046         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 2047         r_gdt.rd_base =  (int) gdt;
 2048         lgdt(&r_gdt);
 2049 
 2050         pcpu_init(pc, 0, sizeof(struct pcpu));
 2051         PCPU_SET(prvspace, pc);
 2052         PCPU_SET(curthread, &thread0);
 2053         PCPU_SET(curpcb, thread0.td_pcb);
 2054 
 2055         /*
 2056          * Initialize mutexes.
 2057          *
 2058          * icu_lock: in order to allow an interrupt to occur in a critical
 2059          *           section, to set pcpu->ipending (etc...) properly, we
 2060          *           must be able to get the icu lock, so it can't be
 2061          *           under witness.
 2062          */
 2063         mutex_init();
 2064         mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
 2065         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 2066 
 2067         /* make ldt memory segments */
 2068         /*
 2069          * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 2070          * should be spelled ...MAX_USER...
 2071          */
 2072         ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 2073         ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 2074         for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 2075                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 2076 
 2077         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2078         lldt(_default_ldt);
 2079         PCPU_SET(currentldt, _default_ldt);
 2080 
 2081         /* exceptions */
 2082         for (x = 0; x < NIDT; x++)
 2083                 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 2084                     GSEL(GCODE_SEL, SEL_KPL));
 2085         setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 2086             GSEL(GCODE_SEL, SEL_KPL));
 2087         setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 2088             GSEL(GCODE_SEL, SEL_KPL));
 2089         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 2090             GSEL(GCODE_SEL, SEL_KPL));
 2091         setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 2092             GSEL(GCODE_SEL, SEL_KPL));
 2093         setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 2094             GSEL(GCODE_SEL, SEL_KPL));
 2095         setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 2096             GSEL(GCODE_SEL, SEL_KPL));
 2097         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2098             GSEL(GCODE_SEL, SEL_KPL));
 2099         setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 2100             , GSEL(GCODE_SEL, SEL_KPL));
 2101         setidt(IDT_DF, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 2102         setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 2103             GSEL(GCODE_SEL, SEL_KPL));
 2104         setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 2105             GSEL(GCODE_SEL, SEL_KPL));
 2106         setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 2107             GSEL(GCODE_SEL, SEL_KPL));
 2108         setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 2109             GSEL(GCODE_SEL, SEL_KPL));
 2110         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2111             GSEL(GCODE_SEL, SEL_KPL));
 2112         setidt(IDT_PF, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 2113             GSEL(GCODE_SEL, SEL_KPL));
 2114         setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 2115             GSEL(GCODE_SEL, SEL_KPL));
 2116         setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 2117             GSEL(GCODE_SEL, SEL_KPL));
 2118         setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 2119             GSEL(GCODE_SEL, SEL_KPL));
 2120         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 2121             GSEL(GCODE_SEL, SEL_KPL));
 2122         setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 2123             GSEL(GCODE_SEL, SEL_KPL));
 2124 
 2125         r_idt.rd_limit = sizeof(idt0) - 1;
 2126         r_idt.rd_base = (int) idt;
 2127         lidt(&r_idt);
 2128 
 2129         /*
 2130          * Initialize the console before we print anything out.
 2131          */
 2132         cninit();
 2133 
 2134         if (metadata_missing)
 2135                 printf("WARNING: loader(8) metadata is missing!\n");
 2136 
 2137 #ifdef DEV_ISA
 2138         elcr_probe();
 2139         atpic_startup();
 2140 #endif
 2141 
 2142 #ifdef DDB
 2143         ksym_start = bootinfo.bi_symtab;
 2144         ksym_end = bootinfo.bi_esymtab;
 2145 #endif
 2146 
 2147         kdb_init();
 2148 
 2149 #ifdef KDB
 2150         if (boothowto & RB_KDB)
 2151                 kdb_enter("Boot flags requested debugger");
 2152 #endif
 2153 
 2154         finishidentcpu();       /* Final stage of CPU initialization */
 2155         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2156             GSEL(GCODE_SEL, SEL_KPL));
 2157         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2158             GSEL(GCODE_SEL, SEL_KPL));
 2159         initializecpu();        /* Initialize CPU registers */
 2160 
 2161         /* make an initial tss so cpu can get interrupt stack on syscall! */
 2162         /* Note: -16 is so we can grow the trapframe if we came from vm86 */
 2163         PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 2164             KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
 2165         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 2166         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 2167         private_tss = 0;
 2168         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2169         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2170         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 2171         ltr(gsel_tss);
 2172 
 2173         dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 2174             dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 2175         dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 2176             dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 2177 #ifdef PAE
 2178         dblfault_tss.tss_cr3 = (int)IdlePDPT;
 2179 #else
 2180         dblfault_tss.tss_cr3 = (int)IdlePTD;
 2181 #endif
 2182         dblfault_tss.tss_eip = (int)dblfault_handler;
 2183         dblfault_tss.tss_eflags = PSL_KERNEL;
 2184         dblfault_tss.tss_ds = dblfault_tss.tss_es =
 2185             dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2186         dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2187         dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2188         dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2189 
 2190         vm86_initialize();
 2191         getmemsize(first);
 2192         init_param2(physmem);
 2193 
 2194         /* now running on new page tables, configured,and u/iom is accessible */
 2195 
 2196         /* Map the message buffer. */
 2197         for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 2198                 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 2199 
 2200         msgbufinit(msgbufp, MSGBUF_SIZE);
 2201 
 2202         /* make a call gate to reenter kernel with */
 2203         gdp = &ldt[LSYS5CALLS_SEL].gd;
 2204 
 2205         x = (int) &IDTVEC(lcall_syscall);
 2206         gdp->gd_looffset = x;
 2207         gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 2208         gdp->gd_stkcpy = 1;
 2209         gdp->gd_type = SDT_SYS386CGT;
 2210         gdp->gd_dpl = SEL_UPL;
 2211         gdp->gd_p = 1;
 2212         gdp->gd_hioffset = x >> 16;
 2213 
 2214         /* XXX does this work? */
 2215         ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2216         ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2217 
 2218         /* transfer to user mode */
 2219 
 2220         _ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 2221         _udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 2222 
 2223         /* setup proc 0's pcb */
 2224         thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
 2225 #ifdef PAE
 2226         thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 2227 #else
 2228         thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 2229 #endif
 2230         thread0.td_pcb->pcb_ext = 0;
 2231         thread0.td_frame = &proc0_tf;
 2232 }
 2233 
 2234 void
 2235 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 2236 {
 2237 
 2238         pcpu->pc_acpi_id = 0xffffffff;
 2239 }
 2240 
 2241 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2242 static void f00f_hack(void *unused);
 2243 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL)
 2244 
 2245 static void
 2246 f00f_hack(void *unused)
 2247 {
 2248         struct gate_descriptor *new_idt;
 2249         vm_offset_t tmp;
 2250 
 2251         if (!has_f00f_bug)
 2252                 return;
 2253 
 2254         GIANT_REQUIRED;
 2255 
 2256         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2257 
 2258         tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 2259         if (tmp == 0)
 2260                 panic("kmem_alloc returned 0");
 2261 
 2262         /* Put the problematic entry (#6) at the end of the lower page. */
 2263         new_idt = (struct gate_descriptor*)
 2264             (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 2265         bcopy(idt, new_idt, sizeof(idt0));
 2266         r_idt.rd_base = (u_int)new_idt;
 2267         lidt(&r_idt);
 2268         idt = new_idt;
 2269         if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 2270                            VM_PROT_READ, FALSE) != KERN_SUCCESS)
 2271                 panic("vm_map_protect failed");
 2272 }
 2273 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2274 
 2275 /*
 2276  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2277  * we want to start a backtrace from the function that caused us to enter
 2278  * the debugger. We have the context in the trapframe, but base the trace
 2279  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2280  * enough for a backtrace.
 2281  */
 2282 void
 2283 makectx(struct trapframe *tf, struct pcb *pcb)
 2284 {
 2285 
 2286         pcb->pcb_edi = tf->tf_edi;
 2287         pcb->pcb_esi = tf->tf_esi;
 2288         pcb->pcb_ebp = tf->tf_ebp;
 2289         pcb->pcb_ebx = tf->tf_ebx;
 2290         pcb->pcb_eip = tf->tf_eip;
 2291         pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 2292 }
 2293 
 2294 int
 2295 ptrace_set_pc(struct thread *td, u_long addr)
 2296 {
 2297 
 2298         td->td_frame->tf_eip = addr;
 2299         return (0);
 2300 }
 2301 
 2302 int
 2303 ptrace_single_step(struct thread *td)
 2304 {
 2305         td->td_frame->tf_eflags |= PSL_T;
 2306         return (0);
 2307 }
 2308 
 2309 int
 2310 ptrace_clear_single_step(struct thread *td)
 2311 {
 2312         td->td_frame->tf_eflags &= ~PSL_T;
 2313         return (0);
 2314 }
 2315 
 2316 int
 2317 fill_regs(struct thread *td, struct reg *regs)
 2318 {
 2319         struct pcb *pcb;
 2320         struct trapframe *tp;
 2321 
 2322         tp = td->td_frame;
 2323         regs->r_fs = tp->tf_fs;
 2324         regs->r_es = tp->tf_es;
 2325         regs->r_ds = tp->tf_ds;
 2326         regs->r_edi = tp->tf_edi;
 2327         regs->r_esi = tp->tf_esi;
 2328         regs->r_ebp = tp->tf_ebp;
 2329         regs->r_ebx = tp->tf_ebx;
 2330         regs->r_edx = tp->tf_edx;
 2331         regs->r_ecx = tp->tf_ecx;
 2332         regs->r_eax = tp->tf_eax;
 2333         regs->r_eip = tp->tf_eip;
 2334         regs->r_cs = tp->tf_cs;
 2335         regs->r_eflags = tp->tf_eflags;
 2336         regs->r_esp = tp->tf_esp;
 2337         regs->r_ss = tp->tf_ss;
 2338         pcb = td->td_pcb;
 2339         regs->r_gs = pcb->pcb_gs;
 2340         return (0);
 2341 }
 2342 
 2343 int
 2344 set_regs(struct thread *td, struct reg *regs)
 2345 {
 2346         struct pcb *pcb;
 2347         struct trapframe *tp;
 2348 
 2349         tp = td->td_frame;
 2350         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2351             !CS_SECURE(regs->r_cs))
 2352                 return (EINVAL);
 2353         tp->tf_fs = regs->r_fs;
 2354         tp->tf_es = regs->r_es;
 2355         tp->tf_ds = regs->r_ds;
 2356         tp->tf_edi = regs->r_edi;
 2357         tp->tf_esi = regs->r_esi;
 2358         tp->tf_ebp = regs->r_ebp;
 2359         tp->tf_ebx = regs->r_ebx;
 2360         tp->tf_edx = regs->r_edx;
 2361         tp->tf_ecx = regs->r_ecx;
 2362         tp->tf_eax = regs->r_eax;
 2363         tp->tf_eip = regs->r_eip;
 2364         tp->tf_cs = regs->r_cs;
 2365         tp->tf_eflags = regs->r_eflags;
 2366         tp->tf_esp = regs->r_esp;
 2367         tp->tf_ss = regs->r_ss;
 2368         pcb = td->td_pcb;
 2369         pcb->pcb_gs = regs->r_gs;
 2370         return (0);
 2371 }
 2372 
 2373 #ifdef CPU_ENABLE_SSE
 2374 static void
 2375 fill_fpregs_xmm(sv_xmm, sv_87)
 2376         struct savexmm *sv_xmm;
 2377         struct save87 *sv_87;
 2378 {
 2379         register struct env87 *penv_87 = &sv_87->sv_env;
 2380         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2381         int i;
 2382 
 2383         bzero(sv_87, sizeof(*sv_87));
 2384 
 2385         /* FPU control/status */
 2386         penv_87->en_cw = penv_xmm->en_cw;
 2387         penv_87->en_sw = penv_xmm->en_sw;
 2388         penv_87->en_tw = penv_xmm->en_tw;
 2389         penv_87->en_fip = penv_xmm->en_fip;
 2390         penv_87->en_fcs = penv_xmm->en_fcs;
 2391         penv_87->en_opcode = penv_xmm->en_opcode;
 2392         penv_87->en_foo = penv_xmm->en_foo;
 2393         penv_87->en_fos = penv_xmm->en_fos;
 2394 
 2395         /* FPU registers */
 2396         for (i = 0; i < 8; ++i)
 2397                 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 2398 }
 2399 
 2400 static void
 2401 set_fpregs_xmm(sv_87, sv_xmm)
 2402         struct save87 *sv_87;
 2403         struct savexmm *sv_xmm;
 2404 {
 2405         register struct env87 *penv_87 = &sv_87->sv_env;
 2406         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2407         int i;
 2408 
 2409         /* FPU control/status */
 2410         penv_xmm->en_cw = penv_87->en_cw;
 2411         penv_xmm->en_sw = penv_87->en_sw;
 2412         penv_xmm->en_tw = penv_87->en_tw;
 2413         penv_xmm->en_fip = penv_87->en_fip;
 2414         penv_xmm->en_fcs = penv_87->en_fcs;
 2415         penv_xmm->en_opcode = penv_87->en_opcode;
 2416         penv_xmm->en_foo = penv_87->en_foo;
 2417         penv_xmm->en_fos = penv_87->en_fos;
 2418 
 2419         /* FPU registers */
 2420         for (i = 0; i < 8; ++i)
 2421                 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 2422 }
 2423 #endif /* CPU_ENABLE_SSE */
 2424 
 2425 int
 2426 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2427 {
 2428 #ifdef CPU_ENABLE_SSE
 2429         if (cpu_fxsr) {
 2430                 fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
 2431                                                 (struct save87 *)fpregs);
 2432                 return (0);
 2433         }
 2434 #endif /* CPU_ENABLE_SSE */
 2435         bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
 2436         return (0);
 2437 }
 2438 
 2439 int
 2440 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2441 {
 2442 #ifdef CPU_ENABLE_SSE
 2443         if (cpu_fxsr) {
 2444                 set_fpregs_xmm((struct save87 *)fpregs,
 2445                                            &td->td_pcb->pcb_save.sv_xmm);
 2446                 return (0);
 2447         }
 2448 #endif /* CPU_ENABLE_SSE */
 2449         bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
 2450         return (0);
 2451 }
 2452 
 2453 /*
 2454  * Get machine context.
 2455  */
 2456 int
 2457 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2458 {
 2459         struct trapframe *tp;
 2460 
 2461         tp = td->td_frame;
 2462 
 2463         PROC_LOCK(curthread->td_proc);
 2464         mcp->mc_onstack = sigonstack(tp->tf_esp);
 2465         PROC_UNLOCK(curthread->td_proc);
 2466         mcp->mc_gs = td->td_pcb->pcb_gs;
 2467         mcp->mc_fs = tp->tf_fs;
 2468         mcp->mc_es = tp->tf_es;
 2469         mcp->mc_ds = tp->tf_ds;
 2470         mcp->mc_edi = tp->tf_edi;
 2471         mcp->mc_esi = tp->tf_esi;
 2472         mcp->mc_ebp = tp->tf_ebp;
 2473         mcp->mc_isp = tp->tf_isp;
 2474         if (flags & GET_MC_CLEAR_RET) {
 2475                 mcp->mc_eax = 0;
 2476                 mcp->mc_edx = 0;
 2477         } else {
 2478                 mcp->mc_eax = tp->tf_eax;
 2479                 mcp->mc_edx = tp->tf_edx;
 2480         }
 2481         mcp->mc_ebx = tp->tf_ebx;
 2482         mcp->mc_ecx = tp->tf_ecx;
 2483         mcp->mc_eip = tp->tf_eip;
 2484         mcp->mc_cs = tp->tf_cs;
 2485         mcp->mc_eflags = tp->tf_eflags;
 2486         mcp->mc_esp = tp->tf_esp;
 2487         mcp->mc_ss = tp->tf_ss;
 2488         mcp->mc_len = sizeof(*mcp);
 2489         get_fpcontext(td, mcp);
 2490         return (0);
 2491 }
 2492 
 2493 /*
 2494  * Set machine context.
 2495  *
 2496  * However, we don't set any but the user modifiable flags, and we won't
 2497  * touch the cs selector.
 2498  */
 2499 int
 2500 set_mcontext(struct thread *td, const mcontext_t *mcp)
 2501 {
 2502         struct trapframe *tp;
 2503         int eflags, ret;
 2504 
 2505         tp = td->td_frame;
 2506         if (mcp->mc_len != sizeof(*mcp))
 2507                 return (EINVAL);
 2508         eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 2509             (tp->tf_eflags & ~PSL_USERCHANGE);
 2510         if ((ret = set_fpcontext(td, mcp)) == 0) {
 2511                 tp->tf_fs = mcp->mc_fs;
 2512                 tp->tf_es = mcp->mc_es;
 2513                 tp->tf_ds = mcp->mc_ds;
 2514                 tp->tf_edi = mcp->mc_edi;
 2515                 tp->tf_esi = mcp->mc_esi;
 2516                 tp->tf_ebp = mcp->mc_ebp;
 2517                 tp->tf_ebx = mcp->mc_ebx;
 2518                 tp->tf_edx = mcp->mc_edx;
 2519                 tp->tf_ecx = mcp->mc_ecx;
 2520                 tp->tf_eax = mcp->mc_eax;
 2521                 tp->tf_eip = mcp->mc_eip;
 2522                 tp->tf_eflags = eflags;
 2523                 tp->tf_esp = mcp->mc_esp;
 2524                 tp->tf_ss = mcp->mc_ss;
 2525                 td->td_pcb->pcb_gs = mcp->mc_gs;
 2526                 ret = 0;
 2527         }
 2528         return (ret);
 2529 }
 2530 
 2531 static void
 2532 get_fpcontext(struct thread *td, mcontext_t *mcp)
 2533 {
 2534 #ifndef DEV_NPX
 2535         mcp->mc_fpformat = _MC_FPFMT_NODEV;
 2536         mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 2537 #else
 2538         union savefpu *addr;
 2539 
 2540         /*
 2541          * XXX mc_fpstate might be misaligned, since its declaration is not
 2542          * unportabilized using __attribute__((aligned(16))) like the
 2543          * declaration of struct savemm, and anyway, alignment doesn't work
 2544          * for auto variables since we don't use gcc's pessimal stack
 2545          * alignment.  Work around this by abusing the spare fields after
 2546          * mcp->mc_fpstate.
 2547          *
 2548          * XXX unpessimize most cases by only aligning when fxsave might be
 2549          * called, although this requires knowing too much about
 2550          * npxgetregs()'s internals.
 2551          */
 2552         addr = (union savefpu *)&mcp->mc_fpstate;
 2553         if (td == PCPU_GET(fpcurthread) &&
 2554 #ifdef CPU_ENABLE_SSE
 2555             cpu_fxsr &&
 2556 #endif
 2557             ((uintptr_t)(void *)addr & 0xF)) {
 2558                 do
 2559                         addr = (void *)((char *)addr + 4);
 2560                 while ((uintptr_t)(void *)addr & 0xF);
 2561         }
 2562         mcp->mc_ownedfp = npxgetregs(td, addr);
 2563         if (addr != (union savefpu *)&mcp->mc_fpstate) {
 2564                 bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 2565                 bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
 2566         }
 2567         mcp->mc_fpformat = npxformat();
 2568 #endif
 2569 }
 2570 
 2571 static int
 2572 set_fpcontext(struct thread *td, const mcontext_t *mcp)
 2573 {
 2574         union savefpu *addr;
 2575 
 2576         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 2577                 return (0);
 2578         else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 2579             mcp->mc_fpformat != _MC_FPFMT_XMM)
 2580                 return (EINVAL);
 2581         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
 2582                 /* We don't care what state is left in the FPU or PCB. */
 2583                 fpstate_drop(td);
 2584         else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 2585             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 2586                 /* XXX align as above. */
 2587                 addr = (union savefpu *)&mcp->mc_fpstate;
 2588                 if (td == PCPU_GET(fpcurthread) &&
 2589 #ifdef CPU_ENABLE_SSE
 2590                     cpu_fxsr &&
 2591 #endif
 2592                     ((uintptr_t)(void *)addr & 0xF)) {
 2593                         do
 2594                                 addr = (void *)((char *)addr + 4);
 2595                         while ((uintptr_t)(void *)addr & 0xF);
 2596                         bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
 2597                 }
 2598 #ifdef DEV_NPX
 2599                 /*
 2600                  * XXX we violate the dubious requirement that npxsetregs()
 2601                  * be called with interrupts disabled.
 2602                  */
 2603                 npxsetregs(td, addr);
 2604 #endif
 2605                 /*
 2606                  * Don't bother putting things back where they were in the
 2607                  * misaligned case, since we know that the caller won't use
 2608                  * them again.
 2609                  */
 2610         } else
 2611                 return (EINVAL);
 2612         return (0);
 2613 }
 2614 
 2615 static void
 2616 fpstate_drop(struct thread *td)
 2617 {
 2618         register_t s;
 2619 
 2620         s = intr_disable();
 2621 #ifdef DEV_NPX
 2622         if (PCPU_GET(fpcurthread) == td)
 2623                 npxdrop();
 2624 #endif
 2625         /*
 2626          * XXX force a full drop of the npx.  The above only drops it if we
 2627          * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 2628          *
 2629          * XXX I don't much like npxgetregs()'s semantics of doing a full
 2630          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 2631          * We only need to drop to !PCB_INITDONE in sendsig().  But
 2632          * sendsig() is the only caller of npxgetregs()... perhaps we just
 2633          * have too many layers.
 2634          */
 2635         curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 2636         intr_restore(s);
 2637 }
 2638 
 2639 int
 2640 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 2641 {
 2642         struct pcb *pcb;
 2643 
 2644         if (td == NULL) {
 2645                 dbregs->dr[0] = rdr0();
 2646                 dbregs->dr[1] = rdr1();
 2647                 dbregs->dr[2] = rdr2();
 2648                 dbregs->dr[3] = rdr3();
 2649                 dbregs->dr[4] = rdr4();
 2650                 dbregs->dr[5] = rdr5();
 2651                 dbregs->dr[6] = rdr6();
 2652                 dbregs->dr[7] = rdr7();
 2653         } else {
 2654                 pcb = td->td_pcb;
 2655                 dbregs->dr[0] = pcb->pcb_dr0;
 2656                 dbregs->dr[1] = pcb->pcb_dr1;
 2657                 dbregs->dr[2] = pcb->pcb_dr2;
 2658                 dbregs->dr[3] = pcb->pcb_dr3;
 2659                 dbregs->dr[4] = 0;
 2660                 dbregs->dr[5] = 0;
 2661                 dbregs->dr[6] = pcb->pcb_dr6;
 2662                 dbregs->dr[7] = pcb->pcb_dr7;
 2663         }
 2664         return (0);
 2665 }
 2666 
 2667 int
 2668 set_dbregs(struct thread *td, struct dbreg *dbregs)
 2669 {
 2670         struct pcb *pcb;
 2671         int i;
 2672         u_int32_t mask1, mask2;
 2673 
 2674         if (td == NULL) {
 2675                 load_dr0(dbregs->dr[0]);
 2676                 load_dr1(dbregs->dr[1]);
 2677                 load_dr2(dbregs->dr[2]);
 2678                 load_dr3(dbregs->dr[3]);
 2679                 load_dr4(dbregs->dr[4]);
 2680                 load_dr5(dbregs->dr[5]);
 2681                 load_dr6(dbregs->dr[6]);
 2682                 load_dr7(dbregs->dr[7]);
 2683         } else {
 2684                 /*
 2685                  * Don't let an illegal value for dr7 get set.  Specifically,
 2686                  * check for undefined settings.  Setting these bit patterns
 2687                  * result in undefined behaviour and can lead to an unexpected
 2688                  * TRCTRAP.
 2689                  */
 2690                 for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 2691                      i++, mask1 <<= 2, mask2 <<= 2)
 2692                         if ((dbregs->dr[7] & mask1) == mask2)
 2693                                 return (EINVAL);
 2694                 
 2695                 pcb = td->td_pcb;
 2696                 
 2697                 /*
 2698                  * Don't let a process set a breakpoint that is not within the
 2699                  * process's address space.  If a process could do this, it
 2700                  * could halt the system by setting a breakpoint in the kernel
 2701                  * (if ddb was enabled).  Thus, we need to check to make sure
 2702                  * that no breakpoints are being enabled for addresses outside
 2703                  * process's address space, unless, perhaps, we were called by
 2704                  * uid 0.
 2705                  *
 2706                  * XXX - what about when the watched area of the user's
 2707                  * address space is written into from within the kernel
 2708                  * ... wouldn't that still cause a breakpoint to be generated
 2709                  * from within kernel mode?
 2710                  */
 2711 
 2712                 if (suser(td) != 0) {
 2713                         if (dbregs->dr[7] & 0x3) {
 2714                                 /* dr0 is enabled */
 2715                                 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 2716                                         return (EINVAL);
 2717                         }
 2718                         
 2719                         if (dbregs->dr[7] & (0x3<<2)) {
 2720                                 /* dr1 is enabled */
 2721                                 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 2722                                         return (EINVAL);
 2723                         }
 2724                         
 2725                         if (dbregs->dr[7] & (0x3<<4)) {
 2726                                 /* dr2 is enabled */
 2727                                 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 2728                                         return (EINVAL);
 2729                         }
 2730                         
 2731                         if (dbregs->dr[7] & (0x3<<6)) {
 2732                                 /* dr3 is enabled */
 2733                                 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 2734                                         return (EINVAL);
 2735                         }
 2736                 }
 2737 
 2738                 pcb->pcb_dr0 = dbregs->dr[0];
 2739                 pcb->pcb_dr1 = dbregs->dr[1];
 2740                 pcb->pcb_dr2 = dbregs->dr[2];
 2741                 pcb->pcb_dr3 = dbregs->dr[3];
 2742                 pcb->pcb_dr6 = dbregs->dr[6];
 2743                 pcb->pcb_dr7 = dbregs->dr[7];
 2744 
 2745                 pcb->pcb_flags |= PCB_DBREGS;
 2746         }
 2747 
 2748         return (0);
 2749 }
 2750 
 2751 /*
 2752  * Return > 0 if a hardware breakpoint has been hit, and the
 2753  * breakpoint was in user space.  Return 0, otherwise.
 2754  */
 2755 int
 2756 user_dbreg_trap(void)
 2757 {
 2758         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
 2759         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 2760         int nbp;            /* number of breakpoints that triggered */
 2761         caddr_t addr[4];    /* breakpoint addresses */
 2762         int i;
 2763         
 2764         dr7 = rdr7();
 2765         if ((dr7 & 0x000000ff) == 0) {
 2766                 /*
 2767                  * all GE and LE bits in the dr7 register are zero,
 2768                  * thus the trap couldn't have been caused by the
 2769                  * hardware debug registers
 2770                  */
 2771                 return 0;
 2772         }
 2773 
 2774         nbp = 0;
 2775         dr6 = rdr6();
 2776         bp = dr6 & 0x0000000f;
 2777 
 2778         if (!bp) {
 2779                 /*
 2780                  * None of the breakpoint bits are set meaning this
 2781                  * trap was not caused by any of the debug registers
 2782                  */
 2783                 return 0;
 2784         }
 2785 
 2786         /*
 2787          * at least one of the breakpoints were hit, check to see
 2788          * which ones and if any of them are user space addresses
 2789          */
 2790 
 2791         if (bp & 0x01) {
 2792                 addr[nbp++] = (caddr_t)rdr0();
 2793         }
 2794         if (bp & 0x02) {
 2795                 addr[nbp++] = (caddr_t)rdr1();
 2796         }
 2797         if (bp & 0x04) {
 2798                 addr[nbp++] = (caddr_t)rdr2();
 2799         }
 2800         if (bp & 0x08) {
 2801                 addr[nbp++] = (caddr_t)rdr3();
 2802         }
 2803 
 2804         for (i=0; i<nbp; i++) {
 2805                 if (addr[i] <
 2806                     (caddr_t)VM_MAXUSER_ADDRESS) {
 2807                         /*
 2808                          * addr[i] is in user space
 2809                          */
 2810                         return nbp;
 2811                 }
 2812         }
 2813 
 2814         /*
 2815          * None of the breakpoints are in user space.
 2816          */
 2817         return 0;
 2818 }
 2819 
 2820 #ifndef DEV_APIC
 2821 #include <machine/apicvar.h>
 2822 
 2823 /*
 2824  * Provide stub functions so that the MADT APIC enumerator in the acpi
 2825  * kernel module will link against a kernel without 'device apic'.
 2826  *
 2827  * XXX - This is a gross hack.
 2828  */
 2829 void
 2830 apic_register_enumerator(struct apic_enumerator *enumerator)
 2831 {
 2832 }
 2833 
 2834 void *
 2835 ioapic_create(uintptr_t addr, int32_t id, int intbase)
 2836 {
 2837         return (NULL);
 2838 }
 2839 
 2840 int
 2841 ioapic_disable_pin(void *cookie, u_int pin)
 2842 {
 2843         return (ENXIO);
 2844 }
 2845 
 2846 void
 2847 ioapic_enable_mixed_mode(void)
 2848 {
 2849 }
 2850 
 2851 int
 2852 ioapic_get_vector(void *cookie, u_int pin)
 2853 {
 2854         return (-1);
 2855 }
 2856 
 2857 void
 2858 ioapic_register(void *cookie)
 2859 {
 2860 }
 2861 
 2862 int
 2863 ioapic_remap_vector(void *cookie, u_int pin, int vector)
 2864 {
 2865         return (ENXIO);
 2866 }
 2867 
 2868 int
 2869 ioapic_set_extint(void *cookie, u_int pin)
 2870 {
 2871         return (ENXIO);
 2872 }
 2873 
 2874 int
 2875 ioapic_set_nmi(void *cookie, u_int pin)
 2876 {
 2877         return (ENXIO);
 2878 }
 2879 
 2880 int
 2881 ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
 2882 {
 2883         return (ENXIO);
 2884 }
 2885 
 2886 int
 2887 ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
 2888 {
 2889         return (ENXIO);
 2890 }
 2891 
 2892 void
 2893 lapic_create(u_int apic_id, int boot_cpu)
 2894 {
 2895 }
 2896 
 2897 void
 2898 lapic_init(uintptr_t addr)
 2899 {
 2900 }
 2901 
 2902 int
 2903 lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode)
 2904 {
 2905         return (ENXIO);
 2906 }
 2907 
 2908 int
 2909 lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
 2910 {
 2911         return (ENXIO);
 2912 }
 2913 
 2914 int
 2915 lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger)
 2916 {
 2917         return (ENXIO);
 2918 }
 2919 #endif
 2920 
 2921 #ifdef KDB
 2922 
 2923 /*
 2924  * Provide inb() and outb() as functions.  They are normally only
 2925  * available as macros calling inlined functions, thus cannot be
 2926  * called from the debugger.
 2927  *
 2928  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
 2929  */
 2930 
 2931 #undef inb
 2932 #undef outb
 2933 
 2934 /* silence compiler warnings */
 2935 u_char inb(u_int);
 2936 void outb(u_int, u_char);
 2937 
 2938 u_char
 2939 inb(u_int port)
 2940 {
 2941         u_char  data;
 2942         /*
 2943          * We use %%dx and not %1 here because i/o is done at %dx and not at
 2944          * %edx, while gcc generates inferior code (movw instead of movl)
 2945          * if we tell it to load (u_short) port.
 2946          */
 2947         __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 2948         return (data);
 2949 }
 2950 
 2951 void
 2952 outb(u_int port, u_char data)
 2953 {
 2954         u_char  al;
 2955         /*
 2956          * Use an unnecessary assignment to help gcc's register allocator.
 2957          * This make a large difference for gcc-1.40 and a tiny difference
 2958          * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 2959          * best results.  gcc-2.6.0 can't handle this.
 2960          */
 2961         al = data;
 2962         __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 2963 }
 2964 
 2965 #endif /* KDB */

Cache object: 24963eaf0ca5b6705800e9da8e46a275


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.