The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/pc98/pc98/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1992 Terrence R. Lambert.
    3  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/8.0/sys/pc98/pc98/machdep.c 194784 2009-06-23 22:42:39Z jeff $");
   42 
   43 #include "opt_atalk.h"
   44 #include "opt_compat.h"
   45 #include "opt_cpu.h"
   46 #include "opt_ddb.h"
   47 #include "opt_inet.h"
   48 #include "opt_ipx.h"
   49 #include "opt_isa.h"
   50 #include "opt_kstack_pages.h"
   51 #include "opt_maxmem.h"
   52 #include "opt_msgbuf.h"
   53 #include "opt_npx.h"
   54 #include "opt_perfmon.h"
   55 
   56 #include <sys/param.h>
   57 #include <sys/proc.h>
   58 #include <sys/systm.h>
   59 #include <sys/bio.h>
   60 #include <sys/buf.h>
   61 #include <sys/bus.h>
   62 #include <sys/callout.h>
   63 #include <sys/cons.h>
   64 #include <sys/cpu.h>
   65 #include <sys/eventhandler.h>
   66 #include <sys/exec.h>
   67 #include <sys/imgact.h>
   68 #include <sys/kdb.h>
   69 #include <sys/kernel.h>
   70 #include <sys/ktr.h>
   71 #include <sys/linker.h>
   72 #include <sys/lock.h>
   73 #include <sys/malloc.h>
   74 #include <sys/memrange.h>
   75 #include <sys/msgbuf.h>
   76 #include <sys/mutex.h>
   77 #include <sys/pcpu.h>
   78 #include <sys/ptrace.h>
   79 #include <sys/reboot.h>
   80 #include <sys/sched.h>
   81 #include <sys/signalvar.h>
   82 #include <sys/sysctl.h>
   83 #include <sys/sysent.h>
   84 #include <sys/sysproto.h>
   85 #include <sys/ucontext.h>
   86 #include <sys/vmmeter.h>
   87 
   88 #include <vm/vm.h>
   89 #include <vm/vm_extern.h>
   90 #include <vm/vm_kern.h>
   91 #include <vm/vm_page.h>
   92 #include <vm/vm_map.h>
   93 #include <vm/vm_object.h>
   94 #include <vm/vm_pager.h>
   95 #include <vm/vm_param.h>
   96 
   97 #ifdef DDB
   98 #ifndef KDB
   99 #error KDB must be enabled in order for DDB to work!
  100 #endif
  101 #include <ddb/ddb.h>
  102 #include <ddb/db_sym.h>
  103 #endif
  104 
  105 #include <pc98/pc98/pc98_machdep.h>
  106 
  107 #include <net/netisr.h>
  108 
  109 #include <machine/bootinfo.h>
  110 #include <machine/clock.h>
  111 #include <machine/cpu.h>
  112 #include <machine/cputypes.h>
  113 #include <machine/intr_machdep.h>
  114 #include <machine/mca.h>
  115 #include <machine/md_var.h>
  116 #include <machine/pc/bios.h>
  117 #include <machine/pcb.h>
  118 #include <machine/pcb_ext.h>
  119 #include <machine/proc.h>
  120 #include <machine/reg.h>
  121 #include <machine/sigframe.h>
  122 #include <machine/specialreg.h>
  123 #include <machine/vm86.h>
  124 #ifdef PERFMON
  125 #include <machine/perfmon.h>
  126 #endif
  127 #ifdef SMP
  128 #include <machine/smp.h>
  129 #endif
  130 
  131 #ifdef DEV_ISA
  132 #include <i386/isa/icu.h>
  133 #endif
  134 
  135 /* Sanity check for __curthread() */
  136 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  137 
  138 extern void init386(int first);
  139 extern void dblfault_handler(void);
  140 
  141 extern void printcpuinfo(void); /* XXX header file */
  142 extern void finishidentcpu(void);
  143 extern void panicifcpuunsupported(void);
  144 extern void initializecpu(void);
  145 
  146 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  147 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  148 
  149 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
  150 #define CPU_ENABLE_SSE
  151 #endif
  152 
  153 static void cpu_startup(void *);
  154 static void fpstate_drop(struct thread *td);
  155 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
  156 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
  157 #ifdef CPU_ENABLE_SSE
  158 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
  159 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
  160 #endif /* CPU_ENABLE_SSE */
  161 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  162 
  163 int     need_pre_dma_flush;     /* If 1, use wbinvd befor DMA transfer. */
  164 int     need_post_dma_flush;    /* If 1, use invd after DMA transfer. */
  165 
  166 #ifdef DDB
  167 extern vm_offset_t ksym_start, ksym_end;
  168 #endif
  169 
  170 int     _udatasel, _ucodesel;
  171 u_int   basemem;
  172 
  173 static int      ispc98 = 1;
  174 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
  175 
  176 int cold = 1;
  177 
  178 #ifdef COMPAT_43
  179 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  180 #endif
  181 #ifdef COMPAT_FREEBSD4
  182 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
  183 #endif
  184 
  185 long Maxmem = 0;
  186 long realmem = 0;
  187 
  188 /*
  189  * The number of PHYSMAP entries must be one less than the number of
  190  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  191  * physical address that is accessible by ISA DMA is split into two
  192  * PHYSSEG entries.
  193  */
  194 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  195 
  196 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  197 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  198 
  199 /* must be 2 less so 0 0 can signal end of chunks */
  200 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
  201 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
  202 
  203 struct kva_md_info kmi;
  204 
  205 static struct trapframe proc0_tf;
  206 struct pcpu __pcpu[MAXCPU];
  207 
  208 struct mtx icu_lock;
  209 
  210 struct mem_range_softc mem_range_softc;
  211 
  212 static void
  213 cpu_startup(dummy)
  214         void *dummy;
  215 {
  216         /*
  217          * Good {morning,afternoon,evening,night}.
  218          */
  219         startrtclock();
  220         printcpuinfo();
  221         panicifcpuunsupported();
  222 #ifdef PERFMON
  223         perfmon_init();
  224 #endif
  225         printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
  226             ptoa((uintmax_t)Maxmem) / 1048576);
  227         realmem = Maxmem;
  228         /*
  229          * Display any holes after the first chunk of extended memory.
  230          */
  231         if (bootverbose) {
  232                 int indx;
  233 
  234                 printf("Physical memory chunk(s):\n");
  235                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  236                         vm_paddr_t size;
  237 
  238                         size = phys_avail[indx + 1] - phys_avail[indx];
  239                         printf(
  240                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  241                             (uintmax_t)phys_avail[indx],
  242                             (uintmax_t)phys_avail[indx + 1] - 1,
  243                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  244                 }
  245         }
  246 
  247         vm_ksubmap_init(&kmi);
  248 
  249         printf("avail memory = %ju (%ju MB)\n",
  250             ptoa((uintmax_t)cnt.v_free_count),
  251             ptoa((uintmax_t)cnt.v_free_count) / 1048576);
  252 
  253         /*
  254          * Set up buffers, so they can be used to read disk labels.
  255          */
  256         bufinit();
  257         vm_pager_bufferinit();
  258 
  259         cpu_setregs();
  260 
  261         mca_init();
  262 }
  263 
  264 /*
  265  * Send an interrupt to process.
  266  *
  267  * Stack is set up to allow sigcode stored
  268  * at top to call routine, followed by kcall
  269  * to sigreturn routine below.  After sigreturn
  270  * resets the signal mask, the stack, and the
  271  * frame pointer, it returns to the user
  272  * specified pc, psl.
  273  */
  274 #ifdef COMPAT_43
  275 static void
  276 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  277 {
  278         struct osigframe sf, *fp;
  279         struct proc *p;
  280         struct thread *td;
  281         struct sigacts *psp;
  282         struct trapframe *regs;
  283         int sig;
  284         int oonstack;
  285 
  286         td = curthread;
  287         p = td->td_proc;
  288         PROC_LOCK_ASSERT(p, MA_OWNED);
  289         sig = ksi->ksi_signo;
  290         psp = p->p_sigacts;
  291         mtx_assert(&psp->ps_mtx, MA_OWNED);
  292         regs = td->td_frame;
  293         oonstack = sigonstack(regs->tf_esp);
  294 
  295         /* Allocate space for the signal handler context. */
  296         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  297             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  298                 fp = (struct osigframe *)(td->td_sigstk.ss_sp +
  299                     td->td_sigstk.ss_size - sizeof(struct osigframe));
  300 #if defined(COMPAT_43)
  301                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  302 #endif
  303         } else
  304                 fp = (struct osigframe *)regs->tf_esp - 1;
  305 
  306         /* Translate the signal if appropriate. */
  307         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  308                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  309 
  310         /* Build the argument list for the signal handler. */
  311         sf.sf_signum = sig;
  312         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  313         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  314                 /* Signal handler installed with SA_SIGINFO. */
  315                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  316                 sf.sf_siginfo.si_signo = sig;
  317                 sf.sf_siginfo.si_code = ksi->ksi_code;
  318                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  319         } else {
  320                 /* Old FreeBSD-style arguments. */
  321                 sf.sf_arg2 = ksi->ksi_code;
  322                 sf.sf_addr = (register_t)ksi->ksi_addr;
  323                 sf.sf_ahu.sf_handler = catcher;
  324         }
  325         mtx_unlock(&psp->ps_mtx);
  326         PROC_UNLOCK(p);
  327 
  328         /* Save most if not all of trap frame. */
  329         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  330         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  331         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  332         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  333         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  334         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  335         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  336         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  337         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  338         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  339         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  340         sf.sf_siginfo.si_sc.sc_gs = rgs();
  341         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  342 
  343         /* Build the signal context to be used by osigreturn(). */
  344         sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
  345         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  346         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  347         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  348         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  349         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  350         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  351         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  352 
  353         /*
  354          * If we're a vm86 process, we want to save the segment registers.
  355          * We also change eflags to be our emulated eflags, not the actual
  356          * eflags.
  357          */
  358         if (regs->tf_eflags & PSL_VM) {
  359                 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
  360                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  361                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  362 
  363                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  364                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  365                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  366                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  367 
  368                 if (vm86->vm86_has_vme == 0)
  369                         sf.sf_siginfo.si_sc.sc_ps =
  370                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  371                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  372 
  373                 /* See sendsig() for comments. */
  374                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  375         }
  376 
  377         /*
  378          * Copy the sigframe out to the user's stack.
  379          */
  380         if (copyout(&sf, fp, sizeof(*fp)) != 0) {
  381 #ifdef DEBUG
  382                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  383 #endif
  384                 PROC_LOCK(p);
  385                 sigexit(td, SIGILL);
  386         }
  387 
  388         regs->tf_esp = (int)fp;
  389         regs->tf_eip = PS_STRINGS - szosigcode;
  390         regs->tf_eflags &= ~(PSL_T | PSL_D);
  391         regs->tf_cs = _ucodesel;
  392         regs->tf_ds = _udatasel;
  393         regs->tf_es = _udatasel;
  394         regs->tf_fs = _udatasel;
  395         load_gs(_udatasel);
  396         regs->tf_ss = _udatasel;
  397         PROC_LOCK(p);
  398         mtx_lock(&psp->ps_mtx);
  399 }
  400 #endif /* COMPAT_43 */
  401 
  402 #ifdef COMPAT_FREEBSD4
  403 static void
  404 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  405 {
  406         struct sigframe4 sf, *sfp;
  407         struct proc *p;
  408         struct thread *td;
  409         struct sigacts *psp;
  410         struct trapframe *regs;
  411         int sig;
  412         int oonstack;
  413 
  414         td = curthread;
  415         p = td->td_proc;
  416         PROC_LOCK_ASSERT(p, MA_OWNED);
  417         sig = ksi->ksi_signo;
  418         psp = p->p_sigacts;
  419         mtx_assert(&psp->ps_mtx, MA_OWNED);
  420         regs = td->td_frame;
  421         oonstack = sigonstack(regs->tf_esp);
  422 
  423         /* Save user context. */
  424         bzero(&sf, sizeof(sf));
  425         sf.sf_uc.uc_sigmask = *mask;
  426         sf.sf_uc.uc_stack = td->td_sigstk;
  427         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  428             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  429         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  430         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  431         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  432 
  433         /* Allocate space for the signal handler context. */
  434         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  435             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  436                 sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
  437                     td->td_sigstk.ss_size - sizeof(struct sigframe4));
  438 #if defined(COMPAT_43)
  439                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  440 #endif
  441         } else
  442                 sfp = (struct sigframe4 *)regs->tf_esp - 1;
  443 
  444         /* Translate the signal if appropriate. */
  445         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  446                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  447 
  448         /* Build the argument list for the signal handler. */
  449         sf.sf_signum = sig;
  450         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  451         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  452                 /* Signal handler installed with SA_SIGINFO. */
  453                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  454                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  455 
  456                 /* Fill in POSIX parts */
  457                 sf.sf_si.si_signo = sig;
  458                 sf.sf_si.si_code = ksi->ksi_code;
  459                 sf.sf_si.si_addr = ksi->ksi_addr;
  460         } else {
  461                 /* Old FreeBSD-style arguments. */
  462                 sf.sf_siginfo = ksi->ksi_code;
  463                 sf.sf_addr = (register_t)ksi->ksi_addr;
  464                 sf.sf_ahu.sf_handler = catcher;
  465         }
  466         mtx_unlock(&psp->ps_mtx);
  467         PROC_UNLOCK(p);
  468 
  469         /*
  470          * If we're a vm86 process, we want to save the segment registers.
  471          * We also change eflags to be our emulated eflags, not the actual
  472          * eflags.
  473          */
  474         if (regs->tf_eflags & PSL_VM) {
  475                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  476                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  477 
  478                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  479                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  480                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  481                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  482 
  483                 if (vm86->vm86_has_vme == 0)
  484                         sf.sf_uc.uc_mcontext.mc_eflags =
  485                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  486                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  487 
  488                 /*
  489                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  490                  * syscalls made by the signal handler.  This just avoids
  491                  * wasting time for our lazy fixup of such faults.  PSL_NT
  492                  * does nothing in vm86 mode, but vm86 programs can set it
  493                  * almost legitimately in probes for old cpu types.
  494                  */
  495                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  496         }
  497 
  498         /*
  499          * Copy the sigframe out to the user's stack.
  500          */
  501         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  502 #ifdef DEBUG
  503                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  504 #endif
  505                 PROC_LOCK(p);
  506                 sigexit(td, SIGILL);
  507         }
  508 
  509         regs->tf_esp = (int)sfp;
  510         regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
  511         regs->tf_eflags &= ~(PSL_T | PSL_D);
  512         regs->tf_cs = _ucodesel;
  513         regs->tf_ds = _udatasel;
  514         regs->tf_es = _udatasel;
  515         regs->tf_fs = _udatasel;
  516         regs->tf_ss = _udatasel;
  517         PROC_LOCK(p);
  518         mtx_lock(&psp->ps_mtx);
  519 }
  520 #endif  /* COMPAT_FREEBSD4 */
  521 
  522 void
  523 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  524 {
  525         struct sigframe sf, *sfp;
  526         struct proc *p;
  527         struct thread *td;
  528         struct sigacts *psp;
  529         char *sp;
  530         struct trapframe *regs;
  531         struct segment_descriptor *sdp;
  532         int sig;
  533         int oonstack;
  534 
  535         td = curthread;
  536         p = td->td_proc;
  537         PROC_LOCK_ASSERT(p, MA_OWNED);
  538         sig = ksi->ksi_signo;
  539         psp = p->p_sigacts;
  540         mtx_assert(&psp->ps_mtx, MA_OWNED);
  541 #ifdef COMPAT_FREEBSD4
  542         if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
  543                 freebsd4_sendsig(catcher, ksi, mask);
  544                 return;
  545         }
  546 #endif
  547 #ifdef COMPAT_43
  548         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  549                 osendsig(catcher, ksi, mask);
  550                 return;
  551         }
  552 #endif
  553         regs = td->td_frame;
  554         oonstack = sigonstack(regs->tf_esp);
  555 
  556         /* Save user context. */
  557         bzero(&sf, sizeof(sf));
  558         sf.sf_uc.uc_sigmask = *mask;
  559         sf.sf_uc.uc_stack = td->td_sigstk;
  560         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  561             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  562         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  563         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  564         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
  565         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  566         get_fpcontext(td, &sf.sf_uc.uc_mcontext);
  567         fpstate_drop(td);
  568         /*
  569          * Unconditionally fill the fsbase and gsbase into the mcontext.
  570          */
  571         sdp = &td->td_pcb->pcb_gsd;
  572         sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
  573             sdp->sd_lobase;
  574         sdp = &td->td_pcb->pcb_fsd;
  575         sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
  576             sdp->sd_lobase;
  577 
  578         /* Allocate space for the signal handler context. */
  579         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  580             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  581                 sp = td->td_sigstk.ss_sp +
  582                     td->td_sigstk.ss_size - sizeof(struct sigframe);
  583 #if defined(COMPAT_43)
  584                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  585 #endif
  586         } else
  587                 sp = (char *)regs->tf_esp - sizeof(struct sigframe);
  588         /* Align to 16 bytes. */
  589         sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
  590 
  591         /* Translate the signal if appropriate. */
  592         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  593                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  594 
  595         /* Build the argument list for the signal handler. */
  596         sf.sf_signum = sig;
  597         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  598         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  599                 /* Signal handler installed with SA_SIGINFO. */
  600                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  601                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  602 
  603                 /* Fill in POSIX parts */
  604                 sf.sf_si = ksi->ksi_info;
  605                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  606         } else {
  607                 /* Old FreeBSD-style arguments. */
  608                 sf.sf_siginfo = ksi->ksi_code;
  609                 sf.sf_addr = (register_t)ksi->ksi_addr;
  610                 sf.sf_ahu.sf_handler = catcher;
  611         }
  612         mtx_unlock(&psp->ps_mtx);
  613         PROC_UNLOCK(p);
  614 
  615         /*
  616          * If we're a vm86 process, we want to save the segment registers.
  617          * We also change eflags to be our emulated eflags, not the actual
  618          * eflags.
  619          */
  620         if (regs->tf_eflags & PSL_VM) {
  621                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  622                 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  623 
  624                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  625                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  626                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  627                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  628 
  629                 if (vm86->vm86_has_vme == 0)
  630                         sf.sf_uc.uc_mcontext.mc_eflags =
  631                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  632                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  633 
  634                 /*
  635                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  636                  * syscalls made by the signal handler.  This just avoids
  637                  * wasting time for our lazy fixup of such faults.  PSL_NT
  638                  * does nothing in vm86 mode, but vm86 programs can set it
  639                  * almost legitimately in probes for old cpu types.
  640                  */
  641                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  642         }
  643 
  644         /*
  645          * Copy the sigframe out to the user's stack.
  646          */
  647         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  648 #ifdef DEBUG
  649                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  650 #endif
  651                 PROC_LOCK(p);
  652                 sigexit(td, SIGILL);
  653         }
  654 
  655         regs->tf_esp = (int)sfp;
  656         regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
  657         regs->tf_eflags &= ~(PSL_T | PSL_D);
  658         regs->tf_cs = _ucodesel;
  659         regs->tf_ds = _udatasel;
  660         regs->tf_es = _udatasel;
  661         regs->tf_fs = _udatasel;
  662         regs->tf_ss = _udatasel;
  663         PROC_LOCK(p);
  664         mtx_lock(&psp->ps_mtx);
  665 }
  666 
  667 /*
  668  * System call to cleanup state after a signal
  669  * has been taken.  Reset signal mask and
  670  * stack state from context left by sendsig (above).
  671  * Return to previous pc and psl as specified by
  672  * context left by sendsig. Check carefully to
  673  * make sure that the user has not modified the
  674  * state to gain improper privileges.
  675  *
  676  * MPSAFE
  677  */
  678 #ifdef COMPAT_43
  679 int
  680 osigreturn(td, uap)
  681         struct thread *td;
  682         struct osigreturn_args /* {
  683                 struct osigcontext *sigcntxp;
  684         } */ *uap;
  685 {
  686         struct osigcontext sc;
  687         struct trapframe *regs;
  688         struct osigcontext *scp;
  689         struct proc *p = td->td_proc;
  690         int eflags, error;
  691         ksiginfo_t ksi;
  692 
  693         regs = td->td_frame;
  694         error = copyin(uap->sigcntxp, &sc, sizeof(sc));
  695         if (error != 0)
  696                 return (error);
  697         scp = &sc;
  698         eflags = scp->sc_ps;
  699         if (eflags & PSL_VM) {
  700                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  701                 struct vm86_kernel *vm86;
  702 
  703                 /*
  704                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  705                  * set up the vm86 area, and we can't enter vm86 mode.
  706                  */
  707                 if (td->td_pcb->pcb_ext == 0)
  708                         return (EINVAL);
  709                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  710                 if (vm86->vm86_inited == 0)
  711                         return (EINVAL);
  712 
  713                 /* Go back to user mode if both flags are set. */
  714                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  715                         ksiginfo_init_trap(&ksi);
  716                         ksi.ksi_signo = SIGBUS;
  717                         ksi.ksi_code = BUS_OBJERR;
  718                         ksi.ksi_addr = (void *)regs->tf_eip;
  719                         trapsignal(td, &ksi);
  720                 }
  721 
  722                 if (vm86->vm86_has_vme) {
  723                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  724                             (eflags & VME_USERCHANGE) | PSL_VM;
  725                 } else {
  726                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  727                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  728                             (eflags & VM_USERCHANGE) | PSL_VM;
  729                 }
  730                 tf->tf_vm86_ds = scp->sc_ds;
  731                 tf->tf_vm86_es = scp->sc_es;
  732                 tf->tf_vm86_fs = scp->sc_fs;
  733                 tf->tf_vm86_gs = scp->sc_gs;
  734                 tf->tf_ds = _udatasel;
  735                 tf->tf_es = _udatasel;
  736                 tf->tf_fs = _udatasel;
  737         } else {
  738                 /*
  739                  * Don't allow users to change privileged or reserved flags.
  740                  */
  741                 /*
  742                  * XXX do allow users to change the privileged flag PSL_RF.
  743                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  744                  * should sometimes set it there too.  tf_eflags is kept in
  745                  * the signal context during signal handling and there is no
  746                  * other place to remember it, so the PSL_RF bit may be
  747                  * corrupted by the signal handler without us knowing.
  748                  * Corruption of the PSL_RF bit at worst causes one more or
  749                  * one less debugger trap, so allowing it is fairly harmless.
  750                  */
  751                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  752                         return (EINVAL);
  753                 }
  754 
  755                 /*
  756                  * Don't allow users to load a valid privileged %cs.  Let the
  757                  * hardware check for invalid selectors, excess privilege in
  758                  * other selectors, invalid %eip's and invalid %esp's.
  759                  */
  760                 if (!CS_SECURE(scp->sc_cs)) {
  761                         ksiginfo_init_trap(&ksi);
  762                         ksi.ksi_signo = SIGBUS;
  763                         ksi.ksi_code = BUS_OBJERR;
  764                         ksi.ksi_trapno = T_PROTFLT;
  765                         ksi.ksi_addr = (void *)regs->tf_eip;
  766                         trapsignal(td, &ksi);
  767                         return (EINVAL);
  768                 }
  769                 regs->tf_ds = scp->sc_ds;
  770                 regs->tf_es = scp->sc_es;
  771                 regs->tf_fs = scp->sc_fs;
  772         }
  773 
  774         /* Restore remaining registers. */
  775         regs->tf_eax = scp->sc_eax;
  776         regs->tf_ebx = scp->sc_ebx;
  777         regs->tf_ecx = scp->sc_ecx;
  778         regs->tf_edx = scp->sc_edx;
  779         regs->tf_esi = scp->sc_esi;
  780         regs->tf_edi = scp->sc_edi;
  781         regs->tf_cs = scp->sc_cs;
  782         regs->tf_ss = scp->sc_ss;
  783         regs->tf_isp = scp->sc_isp;
  784         regs->tf_ebp = scp->sc_fp;
  785         regs->tf_esp = scp->sc_sp;
  786         regs->tf_eip = scp->sc_pc;
  787         regs->tf_eflags = eflags;
  788 
  789         PROC_LOCK(p);
  790 #if defined(COMPAT_43)
  791         if (scp->sc_onstack & 1)
  792                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  793         else
  794                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  795 #endif
  796         SIGSETOLD(td->td_sigmask, scp->sc_mask);
  797         SIG_CANTMASK(td->td_sigmask);
  798         signotify(td);
  799         PROC_UNLOCK(p);
  800         return (EJUSTRETURN);
  801 }
  802 #endif /* COMPAT_43 */
  803 
  804 #ifdef COMPAT_FREEBSD4
  805 /*
  806  * MPSAFE
  807  */
  808 int
  809 freebsd4_sigreturn(td, uap)
  810         struct thread *td;
  811         struct freebsd4_sigreturn_args /* {
  812                 const ucontext4 *sigcntxp;
  813         } */ *uap;
  814 {
  815         struct ucontext4 uc;
  816         struct proc *p = td->td_proc;
  817         struct trapframe *regs;
  818         const struct ucontext4 *ucp;
  819         int cs, eflags, error;
  820         ksiginfo_t ksi;
  821 
  822         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  823         if (error != 0)
  824                 return (error);
  825         ucp = &uc;
  826         regs = td->td_frame;
  827         eflags = ucp->uc_mcontext.mc_eflags;
  828         if (eflags & PSL_VM) {
  829                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  830                 struct vm86_kernel *vm86;
  831 
  832                 /*
  833                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  834                  * set up the vm86 area, and we can't enter vm86 mode.
  835                  */
  836                 if (td->td_pcb->pcb_ext == 0)
  837                         return (EINVAL);
  838                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  839                 if (vm86->vm86_inited == 0)
  840                         return (EINVAL);
  841 
  842                 /* Go back to user mode if both flags are set. */
  843                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  844                         ksiginfo_init_trap(&ksi);
  845                         ksi.ksi_signo = SIGBUS;
  846                         ksi.ksi_code = BUS_OBJERR;
  847                         ksi.ksi_addr = (void *)regs->tf_eip;
  848                         trapsignal(td, &ksi);
  849                 }
  850                 if (vm86->vm86_has_vme) {
  851                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  852                             (eflags & VME_USERCHANGE) | PSL_VM;
  853                 } else {
  854                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  855                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  856                             (eflags & VM_USERCHANGE) | PSL_VM;
  857                 }
  858                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  859                 tf->tf_eflags = eflags;
  860                 tf->tf_vm86_ds = tf->tf_ds;
  861                 tf->tf_vm86_es = tf->tf_es;
  862                 tf->tf_vm86_fs = tf->tf_fs;
  863                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  864                 tf->tf_ds = _udatasel;
  865                 tf->tf_es = _udatasel;
  866                 tf->tf_fs = _udatasel;
  867         } else {
  868                 /*
  869                  * Don't allow users to change privileged or reserved flags.
  870                  */
  871                 /*
  872                  * XXX do allow users to change the privileged flag PSL_RF.
  873                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  874                  * should sometimes set it there too.  tf_eflags is kept in
  875                  * the signal context during signal handling and there is no
  876                  * other place to remember it, so the PSL_RF bit may be
  877                  * corrupted by the signal handler without us knowing.
  878                  * Corruption of the PSL_RF bit at worst causes one more or
  879                  * one less debugger trap, so allowing it is fairly harmless.
  880                  */
  881                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  882                         printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
  883                         return (EINVAL);
  884                 }
  885 
  886                 /*
  887                  * Don't allow users to load a valid privileged %cs.  Let the
  888                  * hardware check for invalid selectors, excess privilege in
  889                  * other selectors, invalid %eip's and invalid %esp's.
  890                  */
  891                 cs = ucp->uc_mcontext.mc_cs;
  892                 if (!CS_SECURE(cs)) {
  893                         printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
  894                         ksiginfo_init_trap(&ksi);
  895                         ksi.ksi_signo = SIGBUS;
  896                         ksi.ksi_code = BUS_OBJERR;
  897                         ksi.ksi_trapno = T_PROTFLT;
  898                         ksi.ksi_addr = (void *)regs->tf_eip;
  899                         trapsignal(td, &ksi);
  900                         return (EINVAL);
  901                 }
  902 
  903                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
  904         }
  905 
  906         PROC_LOCK(p);
  907 #if defined(COMPAT_43)
  908         if (ucp->uc_mcontext.mc_onstack & 1)
  909                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  910         else
  911                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  912 #endif
  913 
  914         td->td_sigmask = ucp->uc_sigmask;
  915         SIG_CANTMASK(td->td_sigmask);
  916         signotify(td);
  917         PROC_UNLOCK(p);
  918         return (EJUSTRETURN);
  919 }
  920 #endif  /* COMPAT_FREEBSD4 */
  921 
  922 /*
  923  * MPSAFE
  924  */
  925 int
  926 sigreturn(td, uap)
  927         struct thread *td;
  928         struct sigreturn_args /* {
  929                 const struct __ucontext *sigcntxp;
  930         } */ *uap;
  931 {
  932         ucontext_t uc;
  933         struct proc *p = td->td_proc;
  934         struct trapframe *regs;
  935         const ucontext_t *ucp;
  936         int cs, eflags, error, ret;
  937         ksiginfo_t ksi;
  938 
  939         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  940         if (error != 0)
  941                 return (error);
  942         ucp = &uc;
  943         regs = td->td_frame;
  944         eflags = ucp->uc_mcontext.mc_eflags;
  945         if (eflags & PSL_VM) {
  946                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  947                 struct vm86_kernel *vm86;
  948 
  949                 /*
  950                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  951                  * set up the vm86 area, and we can't enter vm86 mode.
  952                  */
  953                 if (td->td_pcb->pcb_ext == 0)
  954                         return (EINVAL);
  955                 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
  956                 if (vm86->vm86_inited == 0)
  957                         return (EINVAL);
  958 
  959                 /* Go back to user mode if both flags are set. */
  960                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
  961                         ksiginfo_init_trap(&ksi);
  962                         ksi.ksi_signo = SIGBUS;
  963                         ksi.ksi_code = BUS_OBJERR;
  964                         ksi.ksi_addr = (void *)regs->tf_eip;
  965                         trapsignal(td, &ksi);
  966                 }
  967 
  968                 if (vm86->vm86_has_vme) {
  969                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  970                             (eflags & VME_USERCHANGE) | PSL_VM;
  971                 } else {
  972                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  973                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
  974                             (eflags & VM_USERCHANGE) | PSL_VM;
  975                 }
  976                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  977                 tf->tf_eflags = eflags;
  978                 tf->tf_vm86_ds = tf->tf_ds;
  979                 tf->tf_vm86_es = tf->tf_es;
  980                 tf->tf_vm86_fs = tf->tf_fs;
  981                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  982                 tf->tf_ds = _udatasel;
  983                 tf->tf_es = _udatasel;
  984                 tf->tf_fs = _udatasel;
  985         } else {
  986                 /*
  987                  * Don't allow users to change privileged or reserved flags.
  988                  */
  989                 /*
  990                  * XXX do allow users to change the privileged flag PSL_RF.
  991                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  992                  * should sometimes set it there too.  tf_eflags is kept in
  993                  * the signal context during signal handling and there is no
  994                  * other place to remember it, so the PSL_RF bit may be
  995                  * corrupted by the signal handler without us knowing.
  996                  * Corruption of the PSL_RF bit at worst causes one more or
  997                  * one less debugger trap, so allowing it is fairly harmless.
  998                  */
  999                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 1000                         printf("sigreturn: eflags = 0x%x\n", eflags);
 1001                         return (EINVAL);
 1002                 }
 1003 
 1004                 /*
 1005                  * Don't allow users to load a valid privileged %cs.  Let the
 1006                  * hardware check for invalid selectors, excess privilege in
 1007                  * other selectors, invalid %eip's and invalid %esp's.
 1008                  */
 1009                 cs = ucp->uc_mcontext.mc_cs;
 1010                 if (!CS_SECURE(cs)) {
 1011                         printf("sigreturn: cs = 0x%x\n", cs);
 1012                         ksiginfo_init_trap(&ksi);
 1013                         ksi.ksi_signo = SIGBUS;
 1014                         ksi.ksi_code = BUS_OBJERR;
 1015                         ksi.ksi_trapno = T_PROTFLT;
 1016                         ksi.ksi_addr = (void *)regs->tf_eip;
 1017                         trapsignal(td, &ksi);
 1018                         return (EINVAL);
 1019                 }
 1020 
 1021                 ret = set_fpcontext(td, &ucp->uc_mcontext);
 1022                 if (ret != 0)
 1023                         return (ret);
 1024                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 1025         }
 1026 
 1027         PROC_LOCK(p);
 1028 #if defined(COMPAT_43)
 1029         if (ucp->uc_mcontext.mc_onstack & 1)
 1030                 td->td_sigstk.ss_flags |= SS_ONSTACK;
 1031         else
 1032                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 1033 #endif
 1034 
 1035         td->td_sigmask = ucp->uc_sigmask;
 1036         SIG_CANTMASK(td->td_sigmask);
 1037         signotify(td);
 1038         PROC_UNLOCK(p);
 1039         return (EJUSTRETURN);
 1040 }
 1041 
 1042 /*
 1043  * Machine dependent boot() routine
 1044  *
 1045  * I haven't seen anything to put here yet
 1046  * Possibly some stuff might be grafted back here from boot()
 1047  */
 1048 void
 1049 cpu_boot(int howto)
 1050 {
 1051 }
 1052 
 1053 /*
 1054  * Flush the D-cache for non-DMA I/O so that the I-cache can
 1055  * be made coherent later.
 1056  */
 1057 void
 1058 cpu_flush_dcache(void *ptr, size_t len)
 1059 {
 1060         /* Not applicable */
 1061 }
 1062 
 1063 /* Get current clock frequency for the given cpu id. */
 1064 int
 1065 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 1066 {
 1067         register_t reg;
 1068         uint64_t tsc1, tsc2;
 1069 
 1070         if (pcpu_find(cpu_id) == NULL || rate == NULL)
 1071                 return (EINVAL);
 1072         if (!tsc_present)
 1073                 return (EOPNOTSUPP);
 1074 
 1075         /* If we're booting, trust the rate calibrated moments ago. */
 1076         if (cold) {
 1077                 *rate = tsc_freq;
 1078                 return (0);
 1079         }
 1080 
 1081 #ifdef SMP
 1082         /* Schedule ourselves on the indicated cpu. */
 1083         thread_lock(curthread);
 1084         sched_bind(curthread, cpu_id);
 1085         thread_unlock(curthread);
 1086 #endif
 1087 
 1088         /* Calibrate by measuring a short delay. */
 1089         reg = intr_disable();
 1090         tsc1 = rdtsc();
 1091         DELAY(1000);
 1092         tsc2 = rdtsc();
 1093         intr_restore(reg);
 1094 
 1095 #ifdef SMP
 1096         thread_lock(curthread);
 1097         sched_unbind(curthread);
 1098         thread_unlock(curthread);
 1099 #endif
 1100 
 1101         /*
 1102          * Calculate the difference in readings, convert to Mhz, and
 1103          * subtract 0.5% of the total.  Empirical testing has shown that
 1104          * overhead in DELAY() works out to approximately this value.
 1105          */
 1106         tsc2 -= tsc1;
 1107         *rate = tsc2 * 1000 - tsc2 * 5;
 1108         return (0);
 1109 }
 1110 
 1111 /*
 1112  * Shutdown the CPU as much as possible
 1113  */
 1114 void
 1115 cpu_halt(void)
 1116 {
 1117         for (;;)
 1118                 __asm__ ("hlt");
 1119 }
 1120 
 1121 /*
 1122  * Hook to idle the CPU when possible.  In the SMP case we default to
 1123  * off because a halted cpu will not currently pick up a new thread in the
 1124  * run queue until the next timer tick.  If turned on this will result in
 1125  * approximately a 4.2% loss in real time performance in buildworld tests
 1126  * (but improves user and sys times oddly enough), and saves approximately
 1127  * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
 1128  *
 1129  * XXX we need to have a cpu mask of idle cpus and generate an IPI or
 1130  * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
 1131  * Then we can have our cake and eat it too.
 1132  *
 1133  * XXX I'm turning it on for SMP as well by default for now.  It seems to
 1134  * help lock contention somewhat, and this is critical for HTT. -Peter
 1135  */
 1136 static int      cpu_idle_hlt = 1;
 1137 TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt);
 1138 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
 1139     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 1140 
 1141 static void
 1142 cpu_idle_default(void)
 1143 {
 1144         /*
 1145          * we must absolutely guarentee that hlt is the
 1146          * absolute next instruction after sti or we
 1147          * introduce a timing window.
 1148          */
 1149         __asm __volatile("sti; hlt");
 1150 }
 1151 
 1152 /*
 1153  * Note that we have to be careful here to avoid a race between checking
 1154  * sched_runnable() and actually halting.  If we don't do this, we may waste
 1155  * the time between calling hlt and the next interrupt even though there
 1156  * is a runnable process.
 1157  */
 1158 void
 1159 cpu_idle(int busy)
 1160 {
 1161 
 1162 #ifdef SMP
 1163         if (mp_grab_cpu_hlt())
 1164                 return;
 1165 #endif
 1166 
 1167         if (cpu_idle_hlt) {
 1168                 disable_intr();
 1169                 if (sched_runnable())
 1170                         enable_intr();
 1171                 else
 1172                         (*cpu_idle_hook)();
 1173         }
 1174 }
 1175 
 1176 int
 1177 cpu_idle_wakeup(int cpu)
 1178 {
 1179 
 1180         return (0);
 1181 }
 1182 
 1183 /* Other subsystems (e.g., ACPI) can hook this later. */
 1184 void (*cpu_idle_hook)(void) = cpu_idle_default;
 1185 
 1186 /*
 1187  * Reset registers to default values on exec.
 1188  */
 1189 void
 1190 exec_setregs(td, entry, stack, ps_strings)
 1191         struct thread *td;
 1192         u_long entry;
 1193         u_long stack;
 1194         u_long ps_strings;
 1195 {
 1196         struct trapframe *regs = td->td_frame;
 1197         struct pcb *pcb = td->td_pcb;
 1198 
 1199         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1200         pcb->pcb_gs = _udatasel;
 1201         load_gs(_udatasel);
 1202 
 1203         mtx_lock_spin(&dt_lock);
 1204         if (td->td_proc->p_md.md_ldt)
 1205                 user_ldt_free(td);
 1206         else
 1207                 mtx_unlock_spin(&dt_lock);
 1208   
 1209         bzero((char *)regs, sizeof(struct trapframe));
 1210         regs->tf_eip = entry;
 1211         regs->tf_esp = stack;
 1212         regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 1213         regs->tf_ss = _udatasel;
 1214         regs->tf_ds = _udatasel;
 1215         regs->tf_es = _udatasel;
 1216         regs->tf_fs = _udatasel;
 1217         regs->tf_cs = _ucodesel;
 1218 
 1219         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1220         regs->tf_ebx = ps_strings;
 1221 
 1222         /*
 1223          * Reset the hardware debug registers if they were in use.
 1224          * They won't have any meaning for the newly exec'd process.  
 1225          */
 1226         if (pcb->pcb_flags & PCB_DBREGS) {
 1227                 pcb->pcb_dr0 = 0;
 1228                 pcb->pcb_dr1 = 0;
 1229                 pcb->pcb_dr2 = 0;
 1230                 pcb->pcb_dr3 = 0;
 1231                 pcb->pcb_dr6 = 0;
 1232                 pcb->pcb_dr7 = 0;
 1233                 if (pcb == PCPU_GET(curpcb)) {
 1234                         /*
 1235                          * Clear the debug registers on the running
 1236                          * CPU, otherwise they will end up affecting
 1237                          * the next process we switch to.
 1238                          */
 1239                         reset_dbregs();
 1240                 }
 1241                 pcb->pcb_flags &= ~PCB_DBREGS;
 1242         }
 1243 
 1244         /*
 1245          * Initialize the math emulator (if any) for the current process.
 1246          * Actually, just clear the bit that says that the emulator has
 1247          * been initialized.  Initialization is delayed until the process
 1248          * traps to the emulator (if it is done at all) mainly because
 1249          * emulators don't provide an entry point for initialization.
 1250          */
 1251         td->td_pcb->pcb_flags &= ~FP_SOFTFP;
 1252         pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 1253 
 1254         /*
 1255          * Drop the FP state if we hold it, so that the process gets a
 1256          * clean FP state if it uses the FPU again.
 1257          */
 1258         fpstate_drop(td);
 1259 
 1260         /*
 1261          * XXX - Linux emulator
 1262          * Make sure sure edx is 0x0 on entry. Linux binaries depend
 1263          * on it.
 1264          */
 1265         td->td_retval[1] = 0;
 1266 }
 1267 
 1268 void
 1269 cpu_setregs(void)
 1270 {
 1271         unsigned int cr0;
 1272 
 1273         cr0 = rcr0();
 1274 
 1275         /*
 1276          * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 1277          *
 1278          * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 1279          * instructions.  We must set the CR0_MP bit and use the CR0_TS
 1280          * bit to control the trap, because setting the CR0_EM bit does
 1281          * not cause WAIT instructions to trap.  It's important to trap
 1282          * WAIT instructions - otherwise the "wait" variants of no-wait
 1283          * control instructions would degenerate to the "no-wait" variants
 1284          * after FP context switches but work correctly otherwise.  It's
 1285          * particularly important to trap WAITs when there is no NPX -
 1286          * otherwise the "wait" variants would always degenerate.
 1287          *
 1288          * Try setting CR0_NE to get correct error reporting on 486DX's.
 1289          * Setting it should fail or do nothing on lesser processors.
 1290          */
 1291         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 1292         load_cr0(cr0);
 1293         load_gs(_udatasel);
 1294 }
 1295 
 1296 u_long bootdev;         /* not a struct cdev *- encoding is different */
 1297 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1298         CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 1299 
 1300 /*
 1301  * Initialize 386 and configure to run kernel
 1302  */
 1303 
 1304 /*
 1305  * Initialize segments & interrupt table
 1306  */
 1307 
 1308 int _default_ldt;
 1309 union descriptor gdt[NGDT * MAXCPU];    /* global descriptor table */
 1310 static struct gate_descriptor idt0[NIDT];
 1311 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1312 union descriptor ldt[NLDT];             /* local descriptor table */
 1313 struct region_descriptor r_gdt, r_idt;  /* table descriptors */
 1314 struct mtx dt_lock;                     /* lock for GDT and LDT */
 1315 
 1316 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 1317 extern int has_f00f_bug;
 1318 #endif
 1319 
 1320 static struct i386tss dblfault_tss;
 1321 static char dblfault_stack[PAGE_SIZE];
 1322 
 1323 extern  vm_offset_t     proc0kstack;
 1324 
 1325 
 1326 /*
 1327  * software prototypes -- in more palatable form.
 1328  *
 1329  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
 1330  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
 1331  */
 1332 struct soft_segment_descriptor gdt_segs[] = {
 1333 /* GNULL_SEL    0 Null Descriptor */
 1334 {       .ssd_base = 0x0,
 1335         .ssd_limit = 0x0,
 1336         .ssd_type = 0,
 1337         .ssd_dpl = 0,
 1338         .ssd_p = 0,
 1339         .ssd_xx = 0, .ssd_xx1 = 0,
 1340         .ssd_def32 = 0,
 1341         .ssd_gran = 0           },
 1342 /* GPRIV_SEL    1 SMP Per-Processor Private Data Descriptor */
 1343 {       .ssd_base = 0x0,
 1344         .ssd_limit = 0xfffff,
 1345         .ssd_type = SDT_MEMRWA,
 1346         .ssd_dpl = 0,
 1347         .ssd_p = 1,
 1348         .ssd_xx = 0, .ssd_xx1 = 0,
 1349         .ssd_def32 = 1,
 1350         .ssd_gran = 1           },
 1351 /* GUFS_SEL     2 %fs Descriptor for user */
 1352 {       .ssd_base = 0x0,
 1353         .ssd_limit = 0xfffff,
 1354         .ssd_type = SDT_MEMRWA,
 1355         .ssd_dpl = SEL_UPL,
 1356         .ssd_p = 1,
 1357         .ssd_xx = 0, .ssd_xx1 = 0,
 1358         .ssd_def32 = 1,
 1359         .ssd_gran = 1           },
 1360 /* GUGS_SEL     3 %gs Descriptor for user */
 1361 {       .ssd_base = 0x0,
 1362         .ssd_limit = 0xfffff,
 1363         .ssd_type = SDT_MEMRWA,
 1364         .ssd_dpl = SEL_UPL,
 1365         .ssd_p = 1,
 1366         .ssd_xx = 0, .ssd_xx1 = 0,
 1367         .ssd_def32 = 1,
 1368         .ssd_gran = 1           },
 1369 /* GCODE_SEL    4 Code Descriptor for kernel */
 1370 {       .ssd_base = 0x0,
 1371         .ssd_limit = 0xfffff,
 1372         .ssd_type = SDT_MEMERA,
 1373         .ssd_dpl = 0,
 1374         .ssd_p = 1,
 1375         .ssd_xx = 0, .ssd_xx1 = 0,
 1376         .ssd_def32 = 1,
 1377         .ssd_gran = 1           },
 1378 /* GDATA_SEL    5 Data Descriptor for kernel */
 1379 {       .ssd_base = 0x0,
 1380         .ssd_limit = 0xfffff,
 1381         .ssd_type = SDT_MEMRWA,
 1382         .ssd_dpl = 0,
 1383         .ssd_p = 1,
 1384         .ssd_xx = 0, .ssd_xx1 = 0,
 1385         .ssd_def32 = 1,
 1386         .ssd_gran = 1           },
 1387 /* GUCODE_SEL   6 Code Descriptor for user */
 1388 {       .ssd_base = 0x0,
 1389         .ssd_limit = 0xfffff,
 1390         .ssd_type = SDT_MEMERA,
 1391         .ssd_dpl = SEL_UPL,
 1392         .ssd_p = 1,
 1393         .ssd_xx = 0, .ssd_xx1 = 0,
 1394         .ssd_def32 = 1,
 1395         .ssd_gran = 1           },
 1396 /* GUDATA_SEL   7 Data Descriptor for user */
 1397 {       .ssd_base = 0x0,
 1398         .ssd_limit = 0xfffff,
 1399         .ssd_type = SDT_MEMRWA,
 1400         .ssd_dpl = SEL_UPL,
 1401         .ssd_p = 1,
 1402         .ssd_xx = 0, .ssd_xx1 = 0,
 1403         .ssd_def32 = 1,
 1404         .ssd_gran = 1           },
 1405 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1406 {       .ssd_base = 0x400,
 1407         .ssd_limit = 0xfffff,
 1408         .ssd_type = SDT_MEMRWA,
 1409         .ssd_dpl = 0,
 1410         .ssd_p = 1,
 1411         .ssd_xx = 0, .ssd_xx1 = 0,
 1412         .ssd_def32 = 1,
 1413         .ssd_gran = 1           },
 1414 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
 1415 {
 1416         .ssd_base = 0x0,
 1417         .ssd_limit = sizeof(struct i386tss)-1,
 1418         .ssd_type = SDT_SYS386TSS,
 1419         .ssd_dpl = 0,
 1420         .ssd_p = 1,
 1421         .ssd_xx = 0, .ssd_xx1 = 0,
 1422         .ssd_def32 = 0,
 1423         .ssd_gran = 0           },
 1424 /* GLDT_SEL     10 LDT Descriptor */
 1425 {       .ssd_base = (int) ldt,
 1426         .ssd_limit = sizeof(ldt)-1,
 1427         .ssd_type = SDT_SYSLDT,
 1428         .ssd_dpl = SEL_UPL,
 1429         .ssd_p = 1,
 1430         .ssd_xx = 0, .ssd_xx1 = 0,
 1431         .ssd_def32 = 0,
 1432         .ssd_gran = 0           },
 1433 /* GUSERLDT_SEL 11 User LDT Descriptor per process */
 1434 {       .ssd_base = (int) ldt,
 1435         .ssd_limit = (512 * sizeof(union descriptor)-1),
 1436         .ssd_type = SDT_SYSLDT,
 1437         .ssd_dpl = 0,
 1438         .ssd_p = 1,
 1439         .ssd_xx = 0, .ssd_xx1 = 0,
 1440         .ssd_def32 = 0,
 1441         .ssd_gran = 0           },
 1442 /* GPANIC_SEL   12 Panic Tss Descriptor */
 1443 {       .ssd_base = (int) &dblfault_tss,
 1444         .ssd_limit = sizeof(struct i386tss)-1,
 1445         .ssd_type = SDT_SYS386TSS,
 1446         .ssd_dpl = 0,
 1447         .ssd_p = 1,
 1448         .ssd_xx = 0, .ssd_xx1 = 0,
 1449         .ssd_def32 = 0,
 1450         .ssd_gran = 0           },
 1451 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 1452 {       .ssd_base = 0,
 1453         .ssd_limit = 0xfffff,
 1454         .ssd_type = SDT_MEMERA,
 1455         .ssd_dpl = 0,
 1456         .ssd_p = 1,
 1457         .ssd_xx = 0, .ssd_xx1 = 0,
 1458         .ssd_def32 = 0,
 1459         .ssd_gran = 1           },
 1460 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 1461 {       .ssd_base = 0,
 1462         .ssd_limit = 0xfffff,
 1463         .ssd_type = SDT_MEMERA,
 1464         .ssd_dpl = 0,
 1465         .ssd_p = 1,
 1466         .ssd_xx = 0, .ssd_xx1 = 0,
 1467         .ssd_def32 = 0,
 1468         .ssd_gran = 1           },
 1469 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 1470 {       .ssd_base = 0,
 1471         .ssd_limit = 0xfffff,
 1472         .ssd_type = SDT_MEMRWA,
 1473         .ssd_dpl = 0,
 1474         .ssd_p = 1,
 1475         .ssd_xx = 0, .ssd_xx1 = 0,
 1476         .ssd_def32 = 1,
 1477         .ssd_gran = 1           },
 1478 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 1479 {       .ssd_base = 0,
 1480         .ssd_limit = 0xfffff,
 1481         .ssd_type = SDT_MEMRWA,
 1482         .ssd_dpl = 0,
 1483         .ssd_p = 1,
 1484         .ssd_xx = 0, .ssd_xx1 = 0,
 1485         .ssd_def32 = 0,
 1486         .ssd_gran = 1           },
 1487 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 1488 {       .ssd_base = 0,
 1489         .ssd_limit = 0xfffff,
 1490         .ssd_type = SDT_MEMRWA,
 1491         .ssd_dpl = 0,
 1492         .ssd_p = 1,
 1493         .ssd_xx = 0, .ssd_xx1 = 0,
 1494         .ssd_def32 = 0,
 1495         .ssd_gran = 1           },
 1496 /* GNDIS_SEL    18 NDIS Descriptor */
 1497 {       .ssd_base = 0x0,
 1498         .ssd_limit = 0x0,
 1499         .ssd_type = 0,
 1500         .ssd_dpl = 0,
 1501         .ssd_p = 0,
 1502         .ssd_xx = 0, .ssd_xx1 = 0,
 1503         .ssd_def32 = 0,
 1504         .ssd_gran = 0           },
 1505 };
 1506 
 1507 static struct soft_segment_descriptor ldt_segs[] = {
 1508         /* Null Descriptor - overwritten by call gate */
 1509 {       .ssd_base = 0x0,
 1510         .ssd_limit = 0x0,
 1511         .ssd_type = 0,
 1512         .ssd_dpl = 0,
 1513         .ssd_p = 0,
 1514         .ssd_xx = 0, .ssd_xx1 = 0,
 1515         .ssd_def32 = 0,
 1516         .ssd_gran = 0           },
 1517         /* Null Descriptor - overwritten by call gate */
 1518 {       .ssd_base = 0x0,
 1519         .ssd_limit = 0x0,
 1520         .ssd_type = 0,
 1521         .ssd_dpl = 0,
 1522         .ssd_p = 0,
 1523         .ssd_xx = 0, .ssd_xx1 = 0,
 1524         .ssd_def32 = 0,
 1525         .ssd_gran = 0           },
 1526         /* Null Descriptor - overwritten by call gate */
 1527 {       .ssd_base = 0x0,
 1528         .ssd_limit = 0x0,
 1529         .ssd_type = 0,
 1530         .ssd_dpl = 0,
 1531         .ssd_p = 0,
 1532         .ssd_xx = 0, .ssd_xx1 = 0,
 1533         .ssd_def32 = 0,
 1534         .ssd_gran = 0           },
 1535         /* Code Descriptor for user */
 1536 {       .ssd_base = 0x0,
 1537         .ssd_limit = 0xfffff,
 1538         .ssd_type = SDT_MEMERA,
 1539         .ssd_dpl = SEL_UPL,
 1540         .ssd_p = 1,
 1541         .ssd_xx = 0, .ssd_xx1 = 0,
 1542         .ssd_def32 = 1,
 1543         .ssd_gran = 1           },
 1544         /* Null Descriptor - overwritten by call gate */
 1545 {       .ssd_base = 0x0,
 1546         .ssd_limit = 0x0,
 1547         .ssd_type = 0,
 1548         .ssd_dpl = 0,
 1549         .ssd_p = 0,
 1550         .ssd_xx = 0, .ssd_xx1 = 0,
 1551         .ssd_def32 = 0,
 1552         .ssd_gran = 0           },
 1553         /* Data Descriptor for user */
 1554 {       .ssd_base = 0x0,
 1555         .ssd_limit = 0xfffff,
 1556         .ssd_type = SDT_MEMRWA,
 1557         .ssd_dpl = SEL_UPL,
 1558         .ssd_p = 1,
 1559         .ssd_xx = 0, .ssd_xx1 = 0,
 1560         .ssd_def32 = 1,
 1561         .ssd_gran = 1           },
 1562 };
 1563 
 1564 void
 1565 setidt(idx, func, typ, dpl, selec)
 1566         int idx;
 1567         inthand_t *func;
 1568         int typ;
 1569         int dpl;
 1570         int selec;
 1571 {
 1572         struct gate_descriptor *ip;
 1573 
 1574         ip = idt + idx;
 1575         ip->gd_looffset = (int)func;
 1576         ip->gd_selector = selec;
 1577         ip->gd_stkcpy = 0;
 1578         ip->gd_xx = 0;
 1579         ip->gd_type = typ;
 1580         ip->gd_dpl = dpl;
 1581         ip->gd_p = 1;
 1582         ip->gd_hioffset = ((int)func)>>16 ;
 1583 }
 1584 
 1585 extern inthand_t
 1586         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1587         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1588         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1589         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1590         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 1591 
 1592 #ifdef DDB
 1593 /*
 1594  * Display the index and function name of any IDT entries that don't use
 1595  * the default 'rsvd' entry point.
 1596  */
 1597 DB_SHOW_COMMAND(idt, db_show_idt)
 1598 {
 1599         struct gate_descriptor *ip;
 1600         int idx;
 1601         uintptr_t func;
 1602 
 1603         ip = idt;
 1604         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 1605                 func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 1606                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
 1607                         db_printf("%3d\t", idx);
 1608                         db_printsym(func, DB_STGY_PROC);
 1609                         db_printf("\n");
 1610                 }
 1611                 ip++;
 1612         }
 1613 }
 1614 
 1615 /* Show privileged registers. */
 1616 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 1617 {
 1618         uint64_t idtr, gdtr;
 1619 
 1620         idtr = ridt();
 1621         db_printf("idtr\t0x%08x/%04x\n",
 1622             (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 1623         gdtr = rgdt();
 1624         db_printf("gdtr\t0x%08x/%04x\n",
 1625             (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 1626         db_printf("ldtr\t0x%04x\n", rldt());
 1627         db_printf("tr\t0x%04x\n", rtr());
 1628         db_printf("cr0\t0x%08x\n", rcr0());
 1629         db_printf("cr2\t0x%08x\n", rcr2());
 1630         db_printf("cr3\t0x%08x\n", rcr3());
 1631         db_printf("cr4\t0x%08x\n", rcr4());
 1632 }
 1633 #endif
 1634 
 1635 void
 1636 sdtossd(sd, ssd)
 1637         struct segment_descriptor *sd;
 1638         struct soft_segment_descriptor *ssd;
 1639 {
 1640         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1641         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1642         ssd->ssd_type  = sd->sd_type;
 1643         ssd->ssd_dpl   = sd->sd_dpl;
 1644         ssd->ssd_p     = sd->sd_p;
 1645         ssd->ssd_def32 = sd->sd_def32;
 1646         ssd->ssd_gran  = sd->sd_gran;
 1647 }
 1648 
 1649 /*
 1650  * Populate the (physmap) array with base/bound pairs describing the
 1651  * available physical memory in the system, then test this memory and
 1652  * build the phys_avail array describing the actually-available memory.
 1653  *
 1654  * If we cannot accurately determine the physical memory map, then use
 1655  * value from the 0xE801 call, and failing that, the RTC.
 1656  *
 1657  * Total memory size may be set by the kernel environment variable
 1658  * hw.physmem or the compile-time define MAXMEM.
 1659  *
 1660  * XXX first should be vm_paddr_t.
 1661  */
 1662 static void
 1663 getmemsize(int first)
 1664 {
 1665         int i, off, physmap_idx, pa_indx, da_indx;
 1666         int pg_n;
 1667         u_long physmem_tunable;
 1668         u_int extmem, under16;
 1669         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1670         pt_entry_t *pte;
 1671         quad_t dcons_addr, dcons_size;
 1672 
 1673         bzero(physmap, sizeof(physmap));
 1674 
 1675         /* XXX - some of EPSON machines can't use PG_N */
 1676         pg_n = PG_N;
 1677         if (pc98_machine_type & M_EPSON_PC98) {
 1678                 switch (epson_machine_id) {
 1679 #ifdef WB_CACHE
 1680                 default:
 1681 #endif
 1682                 case EPSON_PC486_HX:
 1683                 case EPSON_PC486_HG:
 1684                 case EPSON_PC486_HA:
 1685                         pg_n = 0;
 1686                         break;
 1687                 }
 1688         }
 1689 
 1690         /*
 1691          * Perform "base memory" related probes & setup
 1692          */
 1693         under16 = pc98_getmemsize(&basemem, &extmem);
 1694         if (basemem > 640) {
 1695                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1696                         basemem);
 1697                 basemem = 640;
 1698         }
 1699 
 1700         /*
 1701          * XXX if biosbasemem is now < 640, there is a `hole'
 1702          * between the end of base memory and the start of
 1703          * ISA memory.  The hole may be empty or it may
 1704          * contain BIOS code or data.  Map it read/write so
 1705          * that the BIOS can write to it.  (Memory from 0 to
 1706          * the physical end of the kernel is mapped read-only
 1707          * to begin with and then parts of it are remapped.
 1708          * The parts that aren't remapped form holes that
 1709          * remain read-only and are unused by the kernel.
 1710          * The base memory area is below the physical end of
 1711          * the kernel and right now forms a read-only hole.
 1712          * The part of it from PAGE_SIZE to
 1713          * (trunc_page(biosbasemem * 1024) - 1) will be
 1714          * remapped and used by the kernel later.)
 1715          *
 1716          * This code is similar to the code used in
 1717          * pmap_mapdev, but since no memory needs to be
 1718          * allocated we simply change the mapping.
 1719          */
 1720         for (pa = trunc_page(basemem * 1024);
 1721              pa < ISA_HOLE_START; pa += PAGE_SIZE)
 1722                 pmap_kenter(KERNBASE + pa, pa);
 1723 
 1724         /*
 1725          * if basemem != 640, map pages r/w into vm86 page table so 
 1726          * that the bios can scribble on it.
 1727          */
 1728         pte = (pt_entry_t *)vm86paddr;
 1729         for (i = basemem / 4; i < 160; i++)
 1730                 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1731 
 1732         physmap[0] = 0;
 1733         physmap[1] = basemem * 1024;
 1734         physmap_idx = 2;
 1735         physmap[physmap_idx] = 0x100000;
 1736         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1737 
 1738         /*
 1739          * Now, physmap contains a map of physical memory.
 1740          */
 1741 
 1742 #ifdef SMP
 1743         /* make hole for AP bootstrap code */
 1744         physmap[1] = mp_bootaddress(physmap[1]);
 1745 #endif
 1746 
 1747         /*
 1748          * Maxmem isn't the "maximum memory", it's one larger than the
 1749          * highest page of the physical address space.  It should be
 1750          * called something like "Maxphyspage".  We may adjust this 
 1751          * based on ``hw.physmem'' and the results of the memory test.
 1752          */
 1753         Maxmem = atop(physmap[physmap_idx + 1]);
 1754 
 1755 #ifdef MAXMEM
 1756         Maxmem = MAXMEM / 4;
 1757 #endif
 1758 
 1759         if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 1760                 Maxmem = atop(physmem_tunable);
 1761 
 1762         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1763             (boothowto & RB_VERBOSE))
 1764                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1765 
 1766         /*
 1767          * If Maxmem has been increased beyond what the system has detected,
 1768          * extend the last memory segment to the new limit.
 1769          */ 
 1770         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 1771                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 1772 
 1773         /*
 1774          * We need to divide chunk if Maxmem is larger than 16MB and
 1775          * under 16MB area is not full of memory.
 1776          * (1) system area (15-16MB region) is cut off
 1777          * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY")
 1778          */
 1779         if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
 1780                 /* 15M - 16M region is cut off, so need to divide chunk */
 1781                 physmap[physmap_idx + 1] = under16 * 1024;
 1782                 physmap_idx += 2;
 1783                 physmap[physmap_idx] = 0x1000000;
 1784                 physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
 1785         }
 1786 
 1787         /* call pmap initialization to make new kernel address space */
 1788         pmap_bootstrap(first);
 1789 
 1790         /*
 1791          * Size up each available chunk of physical memory.
 1792          */
 1793         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 1794         pa_indx = 0;
 1795         da_indx = 1;
 1796         phys_avail[pa_indx++] = physmap[0];
 1797         phys_avail[pa_indx] = physmap[0];
 1798         dump_avail[da_indx] = physmap[0];
 1799         pte = CMAP1;
 1800 
 1801         /*
 1802          * Get dcons buffer address
 1803          */
 1804         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 1805             getenv_quad("dcons.size", &dcons_size) == 0)
 1806                 dcons_addr = 0;
 1807 
 1808         /*
 1809          * physmap is in bytes, so when converting to page boundaries,
 1810          * round up the start address and round down the end address.
 1811          */
 1812         for (i = 0; i <= physmap_idx; i += 2) {
 1813                 vm_paddr_t end;
 1814 
 1815                 end = ptoa((vm_paddr_t)Maxmem);
 1816                 if (physmap[i + 1] < end)
 1817                         end = trunc_page(physmap[i + 1]);
 1818                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1819                         int tmp, page_bad, full;
 1820                         int *ptr = (int *)CADDR1;
 1821 
 1822                         full = FALSE;
 1823                         /*
 1824                          * block out kernel memory as not available.
 1825                          */
 1826                         if (pa >= KERNLOAD && pa < first)
 1827                                 goto do_dump_avail;
 1828 
 1829                         /*
 1830                          * block out dcons buffer
 1831                          */
 1832                         if (dcons_addr > 0
 1833                             && pa >= trunc_page(dcons_addr)
 1834                             && pa < dcons_addr + dcons_size)
 1835                                 goto do_dump_avail;
 1836 
 1837                         page_bad = FALSE;
 1838 
 1839                         /*
 1840                          * map page into kernel: valid, read/write,non-cacheable
 1841                          */
 1842                         *pte = pa | PG_V | PG_RW | pg_n;
 1843                         invltlb();
 1844 
 1845                         tmp = *(int *)ptr;
 1846                         /*
 1847                          * Test for alternating 1's and 0's
 1848                          */
 1849                         *(volatile int *)ptr = 0xaaaaaaaa;
 1850                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 1851                                 page_bad = TRUE;
 1852                         /*
 1853                          * Test for alternating 0's and 1's
 1854                          */
 1855                         *(volatile int *)ptr = 0x55555555;
 1856                         if (*(volatile int *)ptr != 0x55555555)
 1857                                 page_bad = TRUE;
 1858                         /*
 1859                          * Test for all 1's
 1860                          */
 1861                         *(volatile int *)ptr = 0xffffffff;
 1862                         if (*(volatile int *)ptr != 0xffffffff)
 1863                                 page_bad = TRUE;
 1864                         /*
 1865                          * Test for all 0's
 1866                          */
 1867                         *(volatile int *)ptr = 0x0;
 1868                         if (*(volatile int *)ptr != 0x0)
 1869                                 page_bad = TRUE;
 1870                         /*
 1871                          * Restore original value.
 1872                          */
 1873                         *(int *)ptr = tmp;
 1874 
 1875                         /*
 1876                          * Adjust array of valid/good pages.
 1877                          */
 1878                         if (page_bad == TRUE)
 1879                                 continue;
 1880                         /*
 1881                          * If this good page is a continuation of the
 1882                          * previous set of good pages, then just increase
 1883                          * the end pointer. Otherwise start a new chunk.
 1884                          * Note that "end" points one higher than end,
 1885                          * making the range >= start and < end.
 1886                          * If we're also doing a speculative memory
 1887                          * test and we at or past the end, bump up Maxmem
 1888                          * so that we keep going. The first bad page
 1889                          * will terminate the loop.
 1890                          */
 1891                         if (phys_avail[pa_indx] == pa) {
 1892                                 phys_avail[pa_indx] += PAGE_SIZE;
 1893                         } else {
 1894                                 pa_indx++;
 1895                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1896                                         printf(
 1897                 "Too many holes in the physical address space, giving up\n");
 1898                                         pa_indx--;
 1899                                         full = TRUE;
 1900                                         goto do_dump_avail;
 1901                                 }
 1902                                 phys_avail[pa_indx++] = pa;     /* start */
 1903                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 1904                         }
 1905                         physmem++;
 1906 do_dump_avail:
 1907                         if (dump_avail[da_indx] == pa) {
 1908                                 dump_avail[da_indx] += PAGE_SIZE;
 1909                         } else {
 1910                                 da_indx++;
 1911                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 1912                                         da_indx--;
 1913                                         goto do_next;
 1914                                 }
 1915                                 dump_avail[da_indx++] = pa;     /* start */
 1916                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 1917                         }
 1918 do_next:
 1919                         if (full)
 1920                                 break;
 1921                 }
 1922         }
 1923         *pte = 0;
 1924         invltlb();
 1925 
 1926         /*
 1927          * XXX
 1928          * The last chunk must contain at least one page plus the message
 1929          * buffer to avoid complicating other code (message buffer address
 1930          * calculation, etc.).
 1931          */
 1932         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1933             round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 1934                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1935                 phys_avail[pa_indx--] = 0;
 1936                 phys_avail[pa_indx--] = 0;
 1937         }
 1938 
 1939         Maxmem = atop(phys_avail[pa_indx]);
 1940 
 1941         /* Trim off space for the message buffer. */
 1942         phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 1943 
 1944         /* Map the message buffer. */
 1945         for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 1946                 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 1947                     off);
 1948 }
 1949 
 1950 void
 1951 init386(first)
 1952         int first;
 1953 {
 1954         struct gate_descriptor *gdp;
 1955         int gsel_tss, metadata_missing, x;
 1956         struct pcpu *pc;
 1957         int pa;
 1958 
 1959         thread0.td_kstack = proc0kstack;
 1960         thread0.td_pcb = (struct pcb *)
 1961            (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 1962 
 1963         /*
 1964          * This may be done better later if it gets more high level
 1965          * components in it. If so just link td->td_proc here.
 1966          */
 1967         proc_linkup0(&proc0, &thread0);
 1968 
 1969         /*
 1970          * Initialize DMAC
 1971          */
 1972         pc98_init_dmac();
 1973 
 1974         metadata_missing = 0;
 1975         if (bootinfo.bi_modulep) {
 1976                 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 1977                 preload_bootstrap_relocate(KERNBASE);
 1978         } else {
 1979                 metadata_missing = 1;
 1980         }
 1981         if (envmode == 1)
 1982                 kern_envp = static_env;
 1983         else if (bootinfo.bi_envp)
 1984                 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 1985 
 1986         /* Init basic tunables, hz etc */
 1987         init_param1();
 1988 
 1989         /*
 1990          * Make gdt memory segments.  All segments cover the full 4GB
 1991          * of address space and permissions are enforced at page level.
 1992          */
 1993         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 1994         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 1995         gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 1996         gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 1997         gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 1998         gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 1999 
 2000         pc = &__pcpu[0];
 2001         gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 2002         gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 2003         gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 2004 
 2005         for (x = 0; x < NGDT; x++)
 2006                 ssdtosd(&gdt_segs[x], &gdt[x].sd);
 2007 
 2008         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 2009         r_gdt.rd_base =  (int) gdt;
 2010         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 2011         lgdt(&r_gdt);
 2012 
 2013         pcpu_init(pc, 0, sizeof(struct pcpu));
 2014         for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 2015                 pmap_kenter(pa + KERNBASE, pa);
 2016         dpcpu_init((void *)(first + KERNBASE), 0);
 2017         first += DPCPU_SIZE;
 2018 
 2019         PCPU_SET(prvspace, pc);
 2020         PCPU_SET(curthread, &thread0);
 2021         PCPU_SET(curpcb, thread0.td_pcb);
 2022 
 2023         /*
 2024          * Initialize mutexes.
 2025          *
 2026          * icu_lock: in order to allow an interrupt to occur in a critical
 2027          *           section, to set pcpu->ipending (etc...) properly, we
 2028          *           must be able to get the icu lock, so it can't be
 2029          *           under witness.
 2030          */
 2031         mutex_init();
 2032         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 2033 
 2034         /* make ldt memory segments */
 2035         ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 2036         ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 2037         for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 2038                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 2039 
 2040         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2041         lldt(_default_ldt);
 2042         PCPU_SET(currentldt, _default_ldt);
 2043 
 2044         /* exceptions */
 2045         for (x = 0; x < NIDT; x++)
 2046                 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 2047                     GSEL(GCODE_SEL, SEL_KPL));
 2048         setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 2049             GSEL(GCODE_SEL, SEL_KPL));
 2050         setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 2051             GSEL(GCODE_SEL, SEL_KPL));
 2052         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYS386IGT, SEL_KPL,
 2053             GSEL(GCODE_SEL, SEL_KPL));
 2054         setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 2055             GSEL(GCODE_SEL, SEL_KPL));
 2056         setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 2057             GSEL(GCODE_SEL, SEL_KPL));
 2058         setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 2059             GSEL(GCODE_SEL, SEL_KPL));
 2060         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2061             GSEL(GCODE_SEL, SEL_KPL));
 2062         setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 2063             , GSEL(GCODE_SEL, SEL_KPL));
 2064         setidt(IDT_DF, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 2065         setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 2066             GSEL(GCODE_SEL, SEL_KPL));
 2067         setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 2068             GSEL(GCODE_SEL, SEL_KPL));
 2069         setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 2070             GSEL(GCODE_SEL, SEL_KPL));
 2071         setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 2072             GSEL(GCODE_SEL, SEL_KPL));
 2073         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2074             GSEL(GCODE_SEL, SEL_KPL));
 2075         setidt(IDT_PF, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 2076             GSEL(GCODE_SEL, SEL_KPL));
 2077         setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 2078             GSEL(GCODE_SEL, SEL_KPL));
 2079         setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 2080             GSEL(GCODE_SEL, SEL_KPL));
 2081         setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 2082             GSEL(GCODE_SEL, SEL_KPL));
 2083         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 2084             GSEL(GCODE_SEL, SEL_KPL));
 2085         setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 2086             GSEL(GCODE_SEL, SEL_KPL));
 2087 
 2088         r_idt.rd_limit = sizeof(idt0) - 1;
 2089         r_idt.rd_base = (int) idt;
 2090         lidt(&r_idt);
 2091 
 2092         /*
 2093          * Initialize the i8254 before the console so that console
 2094          * initialization can use DELAY().
 2095          */
 2096         i8254_init();
 2097 
 2098         /*
 2099          * Initialize the console before we print anything out.
 2100          */
 2101         cninit();
 2102 
 2103         if (metadata_missing)
 2104                 printf("WARNING: loader(8) metadata is missing!\n");
 2105 
 2106 #ifdef DEV_ISA
 2107         atpic_startup();
 2108 #endif
 2109 
 2110 #ifdef DDB
 2111         ksym_start = bootinfo.bi_symtab;
 2112         ksym_end = bootinfo.bi_esymtab;
 2113 #endif
 2114 
 2115         kdb_init();
 2116 
 2117 #ifdef KDB
 2118         if (boothowto & RB_KDB)
 2119                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 2120 #endif
 2121 
 2122         finishidentcpu();       /* Final stage of CPU initialization */
 2123         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 2124             GSEL(GCODE_SEL, SEL_KPL));
 2125         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 2126             GSEL(GCODE_SEL, SEL_KPL));
 2127         initializecpu();        /* Initialize CPU registers */
 2128 
 2129         /* make an initial tss so cpu can get interrupt stack on syscall! */
 2130         /* Note: -16 is so we can grow the trapframe if we came from vm86 */
 2131         PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 2132             KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
 2133         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 2134         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 2135         PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 2136         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 2137         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 2138         ltr(gsel_tss);
 2139 
 2140         /* pointer to selector slot for %fs/%gs */
 2141         PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 2142 
 2143         dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 2144             dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 2145         dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 2146             dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 2147         dblfault_tss.tss_cr3 = (int)IdlePTD;
 2148         dblfault_tss.tss_eip = (int)dblfault_handler;
 2149         dblfault_tss.tss_eflags = PSL_KERNEL;
 2150         dblfault_tss.tss_ds = dblfault_tss.tss_es =
 2151             dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2152         dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2153         dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2154         dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2155 
 2156         vm86_initialize();
 2157         getmemsize(first);
 2158         init_param2(physmem);
 2159 
 2160         /* now running on new page tables, configured,and u/iom is accessible */
 2161 
 2162         msgbufinit(msgbufp, MSGBUF_SIZE);
 2163 
 2164         /* make a call gate to reenter kernel with */
 2165         gdp = &ldt[LSYS5CALLS_SEL].gd;
 2166 
 2167         x = (int) &IDTVEC(lcall_syscall);
 2168         gdp->gd_looffset = x;
 2169         gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 2170         gdp->gd_stkcpy = 1;
 2171         gdp->gd_type = SDT_SYS386CGT;
 2172         gdp->gd_dpl = SEL_UPL;
 2173         gdp->gd_p = 1;
 2174         gdp->gd_hioffset = x >> 16;
 2175 
 2176         /* XXX does this work? */
 2177         /* XXX yes! */
 2178         ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2179         ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2180 
 2181         /* transfer to user mode */
 2182 
 2183         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 2184         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 2185 
 2186         /* setup proc 0's pcb */
 2187         thread0.td_pcb->pcb_flags = 0;
 2188         thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 2189         thread0.td_pcb->pcb_ext = 0;
 2190         thread0.td_frame = &proc0_tf;
 2191 }
 2192 
 2193 void
 2194 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 2195 {
 2196 
 2197 }
 2198 
 2199 void
 2200 spinlock_enter(void)
 2201 {
 2202         struct thread *td;
 2203 
 2204         td = curthread;
 2205         if (td->td_md.md_spinlock_count == 0)
 2206                 td->td_md.md_saved_flags = intr_disable();
 2207         td->td_md.md_spinlock_count++;
 2208         critical_enter();
 2209 }
 2210 
 2211 void
 2212 spinlock_exit(void)
 2213 {
 2214         struct thread *td;
 2215 
 2216         td = curthread;
 2217         critical_exit();
 2218         td->td_md.md_spinlock_count--;
 2219         if (td->td_md.md_spinlock_count == 0)
 2220                 intr_restore(td->td_md.md_saved_flags);
 2221 }
 2222 
 2223 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2224 static void f00f_hack(void *unused);
 2225 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 2226 
 2227 static void
 2228 f00f_hack(void *unused)
 2229 {
 2230         struct gate_descriptor *new_idt;
 2231         vm_offset_t tmp;
 2232 
 2233         if (!has_f00f_bug)
 2234                 return;
 2235 
 2236         GIANT_REQUIRED;
 2237 
 2238         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2239 
 2240         tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 2241         if (tmp == 0)
 2242                 panic("kmem_alloc returned 0");
 2243 
 2244         /* Put the problematic entry (#6) at the end of the lower page. */
 2245         new_idt = (struct gate_descriptor*)
 2246             (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 2247         bcopy(idt, new_idt, sizeof(idt0));
 2248         r_idt.rd_base = (u_int)new_idt;
 2249         lidt(&r_idt);
 2250         idt = new_idt;
 2251         if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 2252                            VM_PROT_READ, FALSE) != KERN_SUCCESS)
 2253                 panic("vm_map_protect failed");
 2254 }
 2255 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2256 
 2257 /*
 2258  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2259  * we want to start a backtrace from the function that caused us to enter
 2260  * the debugger. We have the context in the trapframe, but base the trace
 2261  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2262  * enough for a backtrace.
 2263  */
 2264 void
 2265 makectx(struct trapframe *tf, struct pcb *pcb)
 2266 {
 2267 
 2268         pcb->pcb_edi = tf->tf_edi;
 2269         pcb->pcb_esi = tf->tf_esi;
 2270         pcb->pcb_ebp = tf->tf_ebp;
 2271         pcb->pcb_ebx = tf->tf_ebx;
 2272         pcb->pcb_eip = tf->tf_eip;
 2273         pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 2274 }
 2275 
 2276 int
 2277 ptrace_set_pc(struct thread *td, u_long addr)
 2278 {
 2279 
 2280         td->td_frame->tf_eip = addr;
 2281         return (0);
 2282 }
 2283 
 2284 int
 2285 ptrace_single_step(struct thread *td)
 2286 {
 2287         td->td_frame->tf_eflags |= PSL_T;
 2288         return (0);
 2289 }
 2290 
 2291 int
 2292 ptrace_clear_single_step(struct thread *td)
 2293 {
 2294         td->td_frame->tf_eflags &= ~PSL_T;
 2295         return (0);
 2296 }
 2297 
 2298 int
 2299 fill_regs(struct thread *td, struct reg *regs)
 2300 {
 2301         struct pcb *pcb;
 2302         struct trapframe *tp;
 2303 
 2304         tp = td->td_frame;
 2305         pcb = td->td_pcb;
 2306         regs->r_fs = tp->tf_fs;
 2307         regs->r_es = tp->tf_es;
 2308         regs->r_ds = tp->tf_ds;
 2309         regs->r_edi = tp->tf_edi;
 2310         regs->r_esi = tp->tf_esi;
 2311         regs->r_ebp = tp->tf_ebp;
 2312         regs->r_ebx = tp->tf_ebx;
 2313         regs->r_edx = tp->tf_edx;
 2314         regs->r_ecx = tp->tf_ecx;
 2315         regs->r_eax = tp->tf_eax;
 2316         regs->r_eip = tp->tf_eip;
 2317         regs->r_cs = tp->tf_cs;
 2318         regs->r_eflags = tp->tf_eflags;
 2319         regs->r_esp = tp->tf_esp;
 2320         regs->r_ss = tp->tf_ss;
 2321         regs->r_gs = pcb->pcb_gs;
 2322         return (0);
 2323 }
 2324 
 2325 int
 2326 set_regs(struct thread *td, struct reg *regs)
 2327 {
 2328         struct pcb *pcb;
 2329         struct trapframe *tp;
 2330 
 2331         tp = td->td_frame;
 2332         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2333             !CS_SECURE(regs->r_cs))
 2334                 return (EINVAL);
 2335         pcb = td->td_pcb;
 2336         tp->tf_fs = regs->r_fs;
 2337         tp->tf_es = regs->r_es;
 2338         tp->tf_ds = regs->r_ds;
 2339         tp->tf_edi = regs->r_edi;
 2340         tp->tf_esi = regs->r_esi;
 2341         tp->tf_ebp = regs->r_ebp;
 2342         tp->tf_ebx = regs->r_ebx;
 2343         tp->tf_edx = regs->r_edx;
 2344         tp->tf_ecx = regs->r_ecx;
 2345         tp->tf_eax = regs->r_eax;
 2346         tp->tf_eip = regs->r_eip;
 2347         tp->tf_cs = regs->r_cs;
 2348         tp->tf_eflags = regs->r_eflags;
 2349         tp->tf_esp = regs->r_esp;
 2350         tp->tf_ss = regs->r_ss;
 2351         pcb->pcb_gs = regs->r_gs;
 2352         return (0);
 2353 }
 2354 
 2355 #ifdef CPU_ENABLE_SSE
 2356 static void
 2357 fill_fpregs_xmm(sv_xmm, sv_87)
 2358         struct savexmm *sv_xmm;
 2359         struct save87 *sv_87;
 2360 {
 2361         register struct env87 *penv_87 = &sv_87->sv_env;
 2362         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2363         int i;
 2364 
 2365         bzero(sv_87, sizeof(*sv_87));
 2366 
 2367         /* FPU control/status */
 2368         penv_87->en_cw = penv_xmm->en_cw;
 2369         penv_87->en_sw = penv_xmm->en_sw;
 2370         penv_87->en_tw = penv_xmm->en_tw;
 2371         penv_87->en_fip = penv_xmm->en_fip;
 2372         penv_87->en_fcs = penv_xmm->en_fcs;
 2373         penv_87->en_opcode = penv_xmm->en_opcode;
 2374         penv_87->en_foo = penv_xmm->en_foo;
 2375         penv_87->en_fos = penv_xmm->en_fos;
 2376 
 2377         /* FPU registers */
 2378         for (i = 0; i < 8; ++i)
 2379                 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 2380 }
 2381 
 2382 static void
 2383 set_fpregs_xmm(sv_87, sv_xmm)
 2384         struct save87 *sv_87;
 2385         struct savexmm *sv_xmm;
 2386 {
 2387         register struct env87 *penv_87 = &sv_87->sv_env;
 2388         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2389         int i;
 2390 
 2391         /* FPU control/status */
 2392         penv_xmm->en_cw = penv_87->en_cw;
 2393         penv_xmm->en_sw = penv_87->en_sw;
 2394         penv_xmm->en_tw = penv_87->en_tw;
 2395         penv_xmm->en_fip = penv_87->en_fip;
 2396         penv_xmm->en_fcs = penv_87->en_fcs;
 2397         penv_xmm->en_opcode = penv_87->en_opcode;
 2398         penv_xmm->en_foo = penv_87->en_foo;
 2399         penv_xmm->en_fos = penv_87->en_fos;
 2400 
 2401         /* FPU registers */
 2402         for (i = 0; i < 8; ++i)
 2403                 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 2404 }
 2405 #endif /* CPU_ENABLE_SSE */
 2406 
 2407 int
 2408 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2409 {
 2410 #ifdef CPU_ENABLE_SSE
 2411         if (cpu_fxsr) {
 2412                 fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
 2413                                                 (struct save87 *)fpregs);
 2414                 return (0);
 2415         }
 2416 #endif /* CPU_ENABLE_SSE */
 2417         bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
 2418         return (0);
 2419 }
 2420 
 2421 int
 2422 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2423 {
 2424 #ifdef CPU_ENABLE_SSE
 2425         if (cpu_fxsr) {
 2426                 set_fpregs_xmm((struct save87 *)fpregs,
 2427                                            &td->td_pcb->pcb_save.sv_xmm);
 2428                 return (0);
 2429         }
 2430 #endif /* CPU_ENABLE_SSE */
 2431         bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
 2432         return (0);
 2433 }
 2434 
 2435 /*
 2436  * Get machine context.
 2437  */
 2438 int
 2439 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2440 {
 2441         struct trapframe *tp;
 2442         struct segment_descriptor *sdp;
 2443 
 2444         tp = td->td_frame;
 2445 
 2446         PROC_LOCK(curthread->td_proc);
 2447         mcp->mc_onstack = sigonstack(tp->tf_esp);
 2448         PROC_UNLOCK(curthread->td_proc);
 2449         mcp->mc_gs = td->td_pcb->pcb_gs;
 2450         mcp->mc_fs = tp->tf_fs;
 2451         mcp->mc_es = tp->tf_es;
 2452         mcp->mc_ds = tp->tf_ds;
 2453         mcp->mc_edi = tp->tf_edi;
 2454         mcp->mc_esi = tp->tf_esi;
 2455         mcp->mc_ebp = tp->tf_ebp;
 2456         mcp->mc_isp = tp->tf_isp;
 2457         mcp->mc_eflags = tp->tf_eflags;
 2458         if (flags & GET_MC_CLEAR_RET) {
 2459                 mcp->mc_eax = 0;
 2460                 mcp->mc_edx = 0;
 2461                 mcp->mc_eflags &= ~PSL_C;
 2462         } else {
 2463                 mcp->mc_eax = tp->tf_eax;
 2464                 mcp->mc_edx = tp->tf_edx;
 2465         }
 2466         mcp->mc_ebx = tp->tf_ebx;
 2467         mcp->mc_ecx = tp->tf_ecx;
 2468         mcp->mc_eip = tp->tf_eip;
 2469         mcp->mc_cs = tp->tf_cs;
 2470         mcp->mc_esp = tp->tf_esp;
 2471         mcp->mc_ss = tp->tf_ss;
 2472         mcp->mc_len = sizeof(*mcp);
 2473         get_fpcontext(td, mcp);
 2474         sdp = &td->td_pcb->pcb_gsd;
 2475         mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2476         sdp = &td->td_pcb->pcb_fsd;
 2477         mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 2478 
 2479         return (0);
 2480 }
 2481 
 2482 /*
 2483  * Set machine context.
 2484  *
 2485  * However, we don't set any but the user modifiable flags, and we won't
 2486  * touch the cs selector.
 2487  */
 2488 int
 2489 set_mcontext(struct thread *td, const mcontext_t *mcp)
 2490 {
 2491         struct trapframe *tp;
 2492         int eflags, ret;
 2493 
 2494         tp = td->td_frame;
 2495         if (mcp->mc_len != sizeof(*mcp))
 2496                 return (EINVAL);
 2497         eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 2498             (tp->tf_eflags & ~PSL_USERCHANGE);
 2499         if ((ret = set_fpcontext(td, mcp)) == 0) {
 2500                 tp->tf_fs = mcp->mc_fs;
 2501                 tp->tf_es = mcp->mc_es;
 2502                 tp->tf_ds = mcp->mc_ds;
 2503                 tp->tf_edi = mcp->mc_edi;
 2504                 tp->tf_esi = mcp->mc_esi;
 2505                 tp->tf_ebp = mcp->mc_ebp;
 2506                 tp->tf_ebx = mcp->mc_ebx;
 2507                 tp->tf_edx = mcp->mc_edx;
 2508                 tp->tf_ecx = mcp->mc_ecx;
 2509                 tp->tf_eax = mcp->mc_eax;
 2510                 tp->tf_eip = mcp->mc_eip;
 2511                 tp->tf_eflags = eflags;
 2512                 tp->tf_esp = mcp->mc_esp;
 2513                 tp->tf_ss = mcp->mc_ss;
 2514                 td->td_pcb->pcb_gs = mcp->mc_gs;
 2515                 ret = 0;
 2516         }
 2517         return (ret);
 2518 }
 2519 
 2520 static void
 2521 get_fpcontext(struct thread *td, mcontext_t *mcp)
 2522 {
 2523 #ifndef DEV_NPX
 2524         mcp->mc_fpformat = _MC_FPFMT_NODEV;
 2525         mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 2526 #else
 2527         union savefpu *addr;
 2528 
 2529         /*
 2530          * XXX mc_fpstate might be misaligned, since its declaration is not
 2531          * unportabilized using __attribute__((aligned(16))) like the
 2532          * declaration of struct savemm, and anyway, alignment doesn't work
 2533          * for auto variables since we don't use gcc's pessimal stack
 2534          * alignment.  Work around this by abusing the spare fields after
 2535          * mcp->mc_fpstate.
 2536          *
 2537          * XXX unpessimize most cases by only aligning when fxsave might be
 2538          * called, although this requires knowing too much about
 2539          * npxgetregs()'s internals.
 2540          */
 2541         addr = (union savefpu *)&mcp->mc_fpstate;
 2542         if (td == PCPU_GET(fpcurthread) &&
 2543 #ifdef CPU_ENABLE_SSE
 2544             cpu_fxsr &&
 2545 #endif
 2546             ((uintptr_t)(void *)addr & 0xF)) {
 2547                 do
 2548                         addr = (void *)((char *)addr + 4);
 2549                 while ((uintptr_t)(void *)addr & 0xF);
 2550         }
 2551         mcp->mc_ownedfp = npxgetregs(td, addr);
 2552         if (addr != (union savefpu *)&mcp->mc_fpstate) {
 2553                 bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 2554                 bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
 2555         }
 2556         mcp->mc_fpformat = npxformat();
 2557 #endif
 2558 }
 2559 
 2560 static int
 2561 set_fpcontext(struct thread *td, const mcontext_t *mcp)
 2562 {
 2563         union savefpu *addr;
 2564 
 2565         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 2566                 return (0);
 2567         else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 2568             mcp->mc_fpformat != _MC_FPFMT_XMM)
 2569                 return (EINVAL);
 2570         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
 2571                 /* We don't care what state is left in the FPU or PCB. */
 2572                 fpstate_drop(td);
 2573         else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 2574             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 2575                 /* XXX align as above. */
 2576                 addr = (union savefpu *)&mcp->mc_fpstate;
 2577                 if (td == PCPU_GET(fpcurthread) &&
 2578 #ifdef CPU_ENABLE_SSE
 2579                     cpu_fxsr &&
 2580 #endif
 2581                     ((uintptr_t)(void *)addr & 0xF)) {
 2582                         do
 2583                                 addr = (void *)((char *)addr + 4);
 2584                         while ((uintptr_t)(void *)addr & 0xF);
 2585                         bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
 2586                 }
 2587 #ifdef DEV_NPX
 2588 #ifdef CPU_ENABLE_SSE
 2589                 if (cpu_fxsr)
 2590                         addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
 2591 #endif
 2592                 /*
 2593                  * XXX we violate the dubious requirement that npxsetregs()
 2594                  * be called with interrupts disabled.
 2595                  */
 2596                 npxsetregs(td, addr);
 2597 #endif
 2598                 /*
 2599                  * Don't bother putting things back where they were in the
 2600                  * misaligned case, since we know that the caller won't use
 2601                  * them again.
 2602                  */
 2603         } else
 2604                 return (EINVAL);
 2605         return (0);
 2606 }
 2607 
 2608 static void
 2609 fpstate_drop(struct thread *td)
 2610 {
 2611         register_t s;
 2612 
 2613         s = intr_disable();
 2614 #ifdef DEV_NPX
 2615         if (PCPU_GET(fpcurthread) == td)
 2616                 npxdrop();
 2617 #endif
 2618         /*
 2619          * XXX force a full drop of the npx.  The above only drops it if we
 2620          * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 2621          *
 2622          * XXX I don't much like npxgetregs()'s semantics of doing a full
 2623          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 2624          * We only need to drop to !PCB_INITDONE in sendsig().  But
 2625          * sendsig() is the only caller of npxgetregs()... perhaps we just
 2626          * have too many layers.
 2627          */
 2628         curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 2629         intr_restore(s);
 2630 }
 2631 
 2632 int
 2633 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 2634 {
 2635         struct pcb *pcb;
 2636 
 2637         if (td == NULL) {
 2638                 dbregs->dr[0] = rdr0();
 2639                 dbregs->dr[1] = rdr1();
 2640                 dbregs->dr[2] = rdr2();
 2641                 dbregs->dr[3] = rdr3();
 2642                 dbregs->dr[4] = rdr4();
 2643                 dbregs->dr[5] = rdr5();
 2644                 dbregs->dr[6] = rdr6();
 2645                 dbregs->dr[7] = rdr7();
 2646         } else {
 2647                 pcb = td->td_pcb;
 2648                 dbregs->dr[0] = pcb->pcb_dr0;
 2649                 dbregs->dr[1] = pcb->pcb_dr1;
 2650                 dbregs->dr[2] = pcb->pcb_dr2;
 2651                 dbregs->dr[3] = pcb->pcb_dr3;
 2652                 dbregs->dr[4] = 0;
 2653                 dbregs->dr[5] = 0;
 2654                 dbregs->dr[6] = pcb->pcb_dr6;
 2655                 dbregs->dr[7] = pcb->pcb_dr7;
 2656         }
 2657         return (0);
 2658 }
 2659 
 2660 int
 2661 set_dbregs(struct thread *td, struct dbreg *dbregs)
 2662 {
 2663         struct pcb *pcb;
 2664         int i;
 2665 
 2666         if (td == NULL) {
 2667                 load_dr0(dbregs->dr[0]);
 2668                 load_dr1(dbregs->dr[1]);
 2669                 load_dr2(dbregs->dr[2]);
 2670                 load_dr3(dbregs->dr[3]);
 2671                 load_dr4(dbregs->dr[4]);
 2672                 load_dr5(dbregs->dr[5]);
 2673                 load_dr6(dbregs->dr[6]);
 2674                 load_dr7(dbregs->dr[7]);
 2675         } else {
 2676                 /*
 2677                  * Don't let an illegal value for dr7 get set.  Specifically,
 2678                  * check for undefined settings.  Setting these bit patterns
 2679                  * result in undefined behaviour and can lead to an unexpected
 2680                  * TRCTRAP.
 2681                  */
 2682                 for (i = 0; i < 4; i++) {
 2683                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 2684                                 return (EINVAL);
 2685                         if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 2686                                 return (EINVAL);
 2687                 }
 2688                 
 2689                 pcb = td->td_pcb;
 2690                 
 2691                 /*
 2692                  * Don't let a process set a breakpoint that is not within the
 2693                  * process's address space.  If a process could do this, it
 2694                  * could halt the system by setting a breakpoint in the kernel
 2695                  * (if ddb was enabled).  Thus, we need to check to make sure
 2696                  * that no breakpoints are being enabled for addresses outside
 2697                  * process's address space.
 2698                  *
 2699                  * XXX - what about when the watched area of the user's
 2700                  * address space is written into from within the kernel
 2701                  * ... wouldn't that still cause a breakpoint to be generated
 2702                  * from within kernel mode?
 2703                  */
 2704 
 2705                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 2706                         /* dr0 is enabled */
 2707                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 2708                                 return (EINVAL);
 2709                 }
 2710                         
 2711                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 2712                         /* dr1 is enabled */
 2713                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 2714                                 return (EINVAL);
 2715                 }
 2716                         
 2717                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 2718                         /* dr2 is enabled */
 2719                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 2720                                 return (EINVAL);
 2721                 }
 2722                         
 2723                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 2724                         /* dr3 is enabled */
 2725                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 2726                                 return (EINVAL);
 2727                 }
 2728 
 2729                 pcb->pcb_dr0 = dbregs->dr[0];
 2730                 pcb->pcb_dr1 = dbregs->dr[1];
 2731                 pcb->pcb_dr2 = dbregs->dr[2];
 2732                 pcb->pcb_dr3 = dbregs->dr[3];
 2733                 pcb->pcb_dr6 = dbregs->dr[6];
 2734                 pcb->pcb_dr7 = dbregs->dr[7];
 2735 
 2736                 pcb->pcb_flags |= PCB_DBREGS;
 2737         }
 2738 
 2739         return (0);
 2740 }
 2741 
 2742 /*
 2743  * Return > 0 if a hardware breakpoint has been hit, and the
 2744  * breakpoint was in user space.  Return 0, otherwise.
 2745  */
 2746 int
 2747 user_dbreg_trap(void)
 2748 {
 2749         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
 2750         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 2751         int nbp;            /* number of breakpoints that triggered */
 2752         caddr_t addr[4];    /* breakpoint addresses */
 2753         int i;
 2754         
 2755         dr7 = rdr7();
 2756         if ((dr7 & 0x000000ff) == 0) {
 2757                 /*
 2758                  * all GE and LE bits in the dr7 register are zero,
 2759                  * thus the trap couldn't have been caused by the
 2760                  * hardware debug registers
 2761                  */
 2762                 return 0;
 2763         }
 2764 
 2765         nbp = 0;
 2766         dr6 = rdr6();
 2767         bp = dr6 & 0x0000000f;
 2768 
 2769         if (!bp) {
 2770                 /*
 2771                  * None of the breakpoint bits are set meaning this
 2772                  * trap was not caused by any of the debug registers
 2773                  */
 2774                 return 0;
 2775         }
 2776 
 2777         /*
 2778          * at least one of the breakpoints were hit, check to see
 2779          * which ones and if any of them are user space addresses
 2780          */
 2781 
 2782         if (bp & 0x01) {
 2783                 addr[nbp++] = (caddr_t)rdr0();
 2784         }
 2785         if (bp & 0x02) {
 2786                 addr[nbp++] = (caddr_t)rdr1();
 2787         }
 2788         if (bp & 0x04) {
 2789                 addr[nbp++] = (caddr_t)rdr2();
 2790         }
 2791         if (bp & 0x08) {
 2792                 addr[nbp++] = (caddr_t)rdr3();
 2793         }
 2794 
 2795         for (i = 0; i < nbp; i++) {
 2796                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 2797                         /*
 2798                          * addr[i] is in user space
 2799                          */
 2800                         return nbp;
 2801                 }
 2802         }
 2803 
 2804         /*
 2805          * None of the breakpoints are in user space.
 2806          */
 2807         return 0;
 2808 }
 2809 
 2810 #ifdef KDB
 2811 
 2812 /*
 2813  * Provide inb() and outb() as functions.  They are normally only available as
 2814  * inline functions, thus cannot be called from the debugger.
 2815  */
 2816 
 2817 /* silence compiler warnings */
 2818 u_char inb_(u_short);
 2819 void outb_(u_short, u_char);
 2820 
 2821 u_char
 2822 inb_(u_short port)
 2823 {
 2824         return inb(port);
 2825 }
 2826 
 2827 void
 2828 outb_(u_short port, u_char data)
 2829 {
 2830         outb(port, data);
 2831 }
 2832 
 2833 #endif /* KDB */

Cache object: 6f0ef824b1d4411253f577fd010ab50e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.