The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1992 Terrence R. Lambert.
    3  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   38  * $FreeBSD$
   39  */
   40 
   41 #include "apm.h"
   42 #include "ether.h"
   43 #include "npx.h"
   44 #include "opt_atalk.h"
   45 #include "opt_compat.h"
   46 #include "opt_cpu.h"
   47 #include "opt_ddb.h"
   48 #include "opt_directio.h"
   49 #include "opt_inet.h"
   50 #include "opt_ipx.h"
   51 #include "opt_maxmem.h"
   52 #include "opt_msgbuf.h"
   53 #include "opt_perfmon.h"
   54 #include "opt_swap.h"
   55 #include "opt_user_ldt.h"
   56 #include "opt_userconfig.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/systm.h>
   60 #include <sys/sysproto.h>
   61 #include <sys/signalvar.h>
   62 #include <sys/kernel.h>
   63 #include <sys/linker.h>
   64 #include <sys/malloc.h>
   65 #include <sys/proc.h>
   66 #include <sys/buf.h>
   67 #include <sys/reboot.h>
   68 #include <sys/callout.h>
   69 #include <sys/mbuf.h>
   70 #include <sys/msgbuf.h>
   71 #include <sys/sysent.h>
   72 #include <sys/sysctl.h>
   73 #include <sys/vmmeter.h>
   74 #include <sys/bus.h>
   75 
   76 #include <vm/vm.h>
   77 #include <vm/vm_param.h>
   78 #include <sys/lock.h>
   79 #include <vm/vm_kern.h>
   80 #include <vm/vm_object.h>
   81 #include <vm/vm_page.h>
   82 #include <vm/vm_map.h>
   83 #include <vm/vm_pager.h>
   84 #include <vm/vm_extern.h>
   85 
   86 #include <sys/user.h>
   87 #include <sys/exec.h>
   88 #include <sys/cons.h>
   89 
   90 #include <ddb/ddb.h>
   91 
   92 #include <net/netisr.h>
   93 
   94 #include <machine/cpu.h>
   95 #include <machine/reg.h>
   96 #include <machine/clock.h>
   97 #include <machine/specialreg.h>
   98 #include <machine/bootinfo.h>
   99 #include <machine/ipl.h>
  100 #include <machine/md_var.h>
  101 #include <machine/pcb_ext.h>            /* pcb.h included via sys/user.h */
  102 #ifdef SMP
  103 #include <machine/smp.h>
  104 #include <machine/globaldata.h>
  105 #endif
  106 #ifdef PERFMON
  107 #include <machine/perfmon.h>
  108 #endif
  109 #include <machine/cputypes.h>
  110 
  111 #ifdef OLD_BUS_ARCH
  112 #include <i386/isa/isa_device.h>
  113 #endif
  114 #include <i386/isa/intr_machdep.h>
  115 #include <isa/rtc.h>
  116 #include <machine/vm86.h>
  117 #include <sys/random.h>
  118 #include <sys/ptrace.h>
  119 #include <machine/sigframe.h>
  120 
  121 extern void init386 __P((int first));
  122 extern void dblfault_handler __P((void));
  123 
  124 extern void printcpuinfo(void); /* XXX header file */
  125 extern void finishidentcpu(void);
  126 extern void panicifcpuunsupported(void);
  127 extern void initializecpu(void);
  128 
  129 static void cpu_startup __P((void *));
  130 #ifdef CPU_ENABLE_SSE
  131 static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
  132 static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
  133 #endif /* CPU_ENABLE_SSE */
  134 #ifdef DIRECTIO
  135 extern void ffs_rawread_setup(void);
  136 #endif /* DIRECTIO */
  137 
  138 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
  139 
  140 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
  141 
  142 int     _udatasel, _ucodesel;
  143 u_int   atdevbase;
  144 
  145 #if defined(SWTCH_OPTIM_STATS)
  146 extern int swtch_optim_stats;
  147 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
  148         CTLFLAG_RD, &swtch_optim_stats, 0, "");
  149 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
  150         CTLFLAG_RD, &tlb_flush_count, 0, "");
  151 #endif
  152 
  153 #ifdef PC98
  154 static int      ispc98 = 1;
  155 #else
  156 static int      ispc98 = 0;
  157 #endif
  158 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
  159 
  160 int physmem = 0;
  161 int cold = 1;
  162 
  163 static int
  164 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
  165 {
  166         int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
  167         return (error);
  168 }
  169 
  170 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
  171         0, 0, sysctl_hw_physmem, "IU", "");
  172 
  173 static int
  174 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
  175 {
  176         int error = sysctl_handle_int(oidp, 0,
  177                 ctob(physmem - cnt.v_wire_count), req);
  178         return (error);
  179 }
  180 
  181 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
  182         0, 0, sysctl_hw_usermem, "IU", "");
  183 
  184 static int
  185 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
  186 {
  187         int error = sysctl_handle_int(oidp, 0,
  188                 i386_btop(avail_end - avail_start), req);
  189         return (error);
  190 }
  191 
  192 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
  193         0, 0, sysctl_hw_availpages, "I", "");
  194 
  195 static int
  196 sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS)
  197 {
  198         int error;
  199 
  200         /* Unwind the buffer, so that it's linear (possibly starting with
  201          * some initial nulls).
  202          */
  203         error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr,
  204                 msgbufp->msg_size-msgbufp->msg_bufr,req);
  205         if(error) return(error);
  206         if(msgbufp->msg_bufr>0) {
  207                 error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr,
  208                         msgbufp->msg_bufr,req);
  209         }
  210         return(error);
  211 }
  212 
  213 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD,
  214         0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer");
  215 
  216 static int msgbuf_clear;
  217 
  218 static int
  219 sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS)
  220 {
  221         int error;
  222         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
  223                 req);
  224         if (!error && req->newptr) {
  225                 /* Clear the buffer and reset write pointer */
  226                 bzero(msgbufp->msg_ptr,msgbufp->msg_size);
  227                 msgbufp->msg_bufr=msgbufp->msg_bufx=0;
  228                 msgbuf_clear=0;
  229         }
  230         return (error);
  231 }
  232 
  233 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW,
  234         &msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I",
  235         "Clear kernel message buffer");
  236 
  237 int bootverbose = 0, Maxmem = 0;
  238 long dumplo;
  239 
  240 vm_paddr_t phys_avail[10];
  241 
  242 /* must be 2 less so 0 0 can signal end of chunks */
  243 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
  244 
  245 static vm_offset_t buffer_sva, buffer_eva;
  246 vm_offset_t clean_sva, clean_eva;
  247 static vm_offset_t pager_sva, pager_eva;
  248 static struct trapframe proc0_tf;
  249 
  250 static void
  251 cpu_startup(dummy)
  252         void *dummy;
  253 {
  254         register unsigned i;
  255         register caddr_t v;
  256         vm_offset_t maxaddr;
  257         vm_size_t size = 0;
  258         int firstaddr;
  259         vm_offset_t minaddr;
  260 
  261         if (boothowto & RB_VERBOSE)
  262                 bootverbose++;
  263 
  264         /*
  265          * Good {morning,afternoon,evening,night}.
  266          */
  267         printf("%s", version);
  268         startrtclock();
  269         printcpuinfo();
  270         panicifcpuunsupported();
  271 #ifdef PERFMON
  272         perfmon_init();
  273 #endif
  274         printf("real memory  = %llu (%lluK bytes)\n",
  275             ptoa((u_int64_t)Maxmem), ptoa((u_int64_t)Maxmem) / 1024);
  276         /*
  277          * Display any holes after the first chunk of extended memory.
  278          */
  279         if (bootverbose) {
  280                 int indx;
  281 
  282                 printf("Physical memory chunk(s):\n");
  283                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  284                         vm_paddr_t size1;
  285 
  286                         size1 = phys_avail[indx + 1] - phys_avail[indx];
  287                         printf("0x%09llx - 0x%09llx, %llu bytes (%llu pages)\n",
  288                             (u_int64_t)phys_avail[indx],
  289                             (u_int64_t)phys_avail[indx + 1] - 1,
  290                             (u_int64_t)size1,
  291                             (u_int64_t)size1 / PAGE_SIZE);
  292                 }
  293         }
  294 
  295         /*
  296          * Calculate callout wheel size
  297          */
  298         for (callwheelsize = 1, callwheelbits = 0;
  299              callwheelsize < ncallout;
  300              callwheelsize <<= 1, ++callwheelbits)
  301                 ;
  302         callwheelmask = callwheelsize - 1;
  303 
  304         /*
  305          * Allocate space for system data structures.
  306          * The first available kernel virtual address is in "v".
  307          * As pages of kernel virtual memory are allocated, "v" is incremented.
  308          * As pages of memory are allocated and cleared,
  309          * "firstaddr" is incremented.
  310          * An index into the kernel page table corresponding to the
  311          * virtual memory address maintained in "v" is kept in "mapaddr".
  312          */
  313 
  314         /*
  315          * Make two passes.  The first pass calculates how much memory is
  316          * needed and allocates it.  The second pass assigns virtual
  317          * addresses to the various data structures.
  318          */
  319         firstaddr = 0;
  320 again:
  321         v = (caddr_t)firstaddr;
  322 
  323 #define valloc(name, type, num) \
  324             (name) = (type *)v; v = (caddr_t)((name)+(num))
  325 #define valloclim(name, type, num, lim) \
  326             (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
  327 
  328         valloc(callout, struct callout, ncallout);
  329         valloc(callwheel, struct callout_tailq, callwheelsize);
  330 
  331         /*
  332          * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
  333          * For the first 64MB of ram nominally allocate sufficient buffers to
  334          * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
  335          * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
  336          * the buffer cache we limit the eventual kva reservation to
  337          * maxbcache bytes.
  338          *
  339          * factor represents the 1/4 x ram conversion.
  340          */
  341         if (nbuf == 0) {
  342                 int factor = 4 * BKVASIZE / 1024;
  343                 int kbytes = physmem * (PAGE_SIZE / 1024);
  344 
  345                 nbuf = 50;
  346                 if (kbytes > 4096)
  347                         nbuf += min((kbytes - 4096) / factor, 65536 / factor);
  348                 if (kbytes > 65536)
  349                         nbuf += (kbytes - 65536) * 2 / (factor * 5);
  350                 if (maxbcache && nbuf > maxbcache / BKVASIZE)
  351                         nbuf = maxbcache / BKVASIZE;
  352         }
  353 
  354         /*
  355          * Do not allow the buffer_map to be more then 1/2 the size of the
  356          * kernel_map.
  357          */
  358         if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
  359             (BKVASIZE * 2)) {
  360                 nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
  361                     (BKVASIZE * 2);
  362                 printf("Warning: nbufs capped at %d\n", nbuf);
  363         }
  364 
  365         nswbuf = max(min(nbuf/4, 256), 16);
  366 #ifdef NSWBUF_MIN
  367         if (nswbuf < NSWBUF_MIN)
  368                 nswbuf = NSWBUF_MIN;
  369 #endif
  370 #ifdef DIRECTIO
  371         ffs_rawread_setup();
  372 #endif
  373 
  374         valloc(swbuf, struct buf, nswbuf);
  375         valloc(buf, struct buf, nbuf);
  376         v = bufhashinit(v);
  377 
  378         /*
  379          * End of first pass, size has been calculated so allocate memory
  380          */
  381         if (firstaddr == 0) {
  382                 size = (vm_size_t)(v - firstaddr);
  383                 firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
  384                 if (firstaddr == 0)
  385                         panic("startup: no room for tables");
  386                 goto again;
  387         }
  388 
  389         /*
  390          * End of second pass, addresses have been assigned
  391          */
  392         if ((vm_size_t)(v - firstaddr) != size)
  393                 panic("startup: table size inconsistency");
  394 
  395         clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
  396                         (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
  397         buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
  398                                 (nbuf*BKVASIZE));
  399         buffer_map->system_map = 1;
  400         pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
  401                                 (nswbuf*MAXPHYS) + pager_map_size);
  402         pager_map->system_map = 1;
  403         exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
  404                                 (16*(ARG_MAX+(PAGE_SIZE*3))));
  405 
  406         /*
  407          * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
  408          * we use the more space efficient malloc in place of kmem_alloc.
  409          */
  410         {
  411                 vm_offset_t mb_map_size;
  412 
  413                 mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES;
  414                 mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
  415                 mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT);
  416                 bzero(mclrefcnt, mb_map_size / MCLBYTES);
  417                 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
  418                         mb_map_size);
  419                 mb_map->system_map = 1;
  420                 mbutltop = mbutl;
  421         }
  422 
  423         /*
  424          * Initialize callouts
  425          */
  426         SLIST_INIT(&callfree);
  427         for (i = 0; i < ncallout; i++) {
  428                 callout_init(&callout[i]);
  429                 callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
  430                 SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
  431         }
  432 
  433         for (i = 0; i < callwheelsize; i++) {
  434                 TAILQ_INIT(&callwheel[i]);
  435         }
  436 
  437 #if defined(USERCONFIG)
  438         userconfig();
  439         cninit();               /* the preferred console may have changed */
  440 #endif
  441 
  442         printf("avail memory = %llu (%lluK bytes)\n",
  443             ptoa((u_int64_t)cnt.v_free_count),
  444             ptoa((u_int64_t)cnt.v_free_count) / 1024);
  445 
  446         /*
  447          * Set up buffers, so they can be used to read disk labels.
  448          */
  449         bufinit();
  450         vm_pager_bufferinit();
  451 
  452 #ifdef SMP
  453         /*
  454          * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
  455          */
  456         mp_start();                     /* fire up the APs and APICs */
  457         mp_announce();
  458 #endif  /* SMP */
  459         cpu_setregs();
  460 }
  461 
  462 int
  463 register_netisr(num, handler)
  464         int num;
  465         netisr_t *handler;
  466 {
  467         
  468         if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
  469                 printf("register_netisr: bad isr number: %d\n", num);
  470                 return (EINVAL);
  471         }
  472         netisrs[num] = handler;
  473         return (0);
  474 }
  475 
  476 int
  477 unregister_netisr(num)
  478         int num;
  479 {
  480 
  481         if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
  482                 printf("unregister_netisr: bad isr number: %d\n", num);
  483                 return (EINVAL);
  484         }
  485         netisrs[num] = NULL;
  486         return (0);
  487 }
  488 
  489 /*
  490  * Send an interrupt to process.
  491  *
  492  * Stack is set up to allow sigcode stored
  493  * at top to call routine, followed by kcall
  494  * to sigreturn routine below.  After sigreturn
  495  * resets the signal mask, the stack, and the
  496  * frame pointer, it returns to the user
  497  * specified pc, psl.
  498  */
  499 static void
  500 osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
  501 {
  502         register struct proc *p = curproc;
  503         register struct trapframe *regs;
  504         register struct osigframe *fp;
  505         struct osigframe sf;
  506         struct sigacts *psp = p->p_sigacts;
  507         int oonstack;
  508 
  509         regs = p->p_md.md_regs;
  510         oonstack = (p->p_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
  511 
  512         /* Allocate and validate space for the signal handler context. */
  513         if ((p->p_flag & P_ALTSTACK) && !oonstack &&
  514             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  515                 fp = (struct osigframe *)(p->p_sigstk.ss_sp +
  516                     p->p_sigstk.ss_size - sizeof(struct osigframe));
  517                 p->p_sigstk.ss_flags |= SS_ONSTACK;
  518         }
  519         else
  520                 fp = (struct osigframe *)regs->tf_esp - 1;
  521 
  522         /* Translate the signal if appropriate */
  523         if (p->p_sysent->sv_sigtbl) {
  524                 if (sig <= p->p_sysent->sv_sigsize)
  525                         sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  526         }
  527 
  528         /* Build the argument list for the signal handler. */
  529         sf.sf_signum = sig;
  530         sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
  531         if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
  532                 /* Signal handler installed with SA_SIGINFO. */
  533                 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
  534                 sf.sf_siginfo.si_signo = sig;
  535                 sf.sf_siginfo.si_code = code;
  536                 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
  537         }
  538         else {
  539                 /* Old FreeBSD-style arguments. */
  540                 sf.sf_arg2 = code;
  541                 sf.sf_addr = regs->tf_err;
  542                 sf.sf_ahu.sf_handler = catcher;
  543         }
  544 
  545         /* save scratch registers */
  546         sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
  547         sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
  548         sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
  549         sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
  550         sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
  551         sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
  552         sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
  553         sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
  554         sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
  555         sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
  556         sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
  557         sf.sf_siginfo.si_sc.sc_gs = rgs();
  558         sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
  559 
  560         /* Build the signal context to be used by sigreturn. */
  561         sf.sf_siginfo.si_sc.sc_onstack = oonstack;
  562         SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
  563         sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
  564         sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
  565         sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
  566         sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
  567         sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
  568         sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
  569 
  570         /*
  571          * If we're a vm86 process, we want to save the segment registers.
  572          * We also change eflags to be our emulated eflags, not the actual
  573          * eflags.
  574          */
  575         if (regs->tf_eflags & PSL_VM) {
  576                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  577                 struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
  578 
  579                 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
  580                 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
  581                 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
  582                 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
  583 
  584                 if (vm86->vm86_has_vme == 0)
  585                         sf.sf_siginfo.si_sc.sc_ps =
  586                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP))
  587                             | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  588                 /* see sendsig for comment */
  589                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  590         }
  591 
  592         /* Copy the sigframe out to the user's stack. */
  593         if (copyout(&sf, fp, sizeof(struct osigframe)) != 0) {
  594                 /*
  595                  * Something is wrong with the stack pointer.
  596                  * ...Kill the process.
  597                  */
  598                 sigexit(p, SIGILL);
  599         }
  600 
  601         regs->tf_esp = (int)fp;
  602         regs->tf_eip = PS_STRINGS - szosigcode;
  603         regs->tf_eflags &= ~PSL_T;
  604         regs->tf_cs = _ucodesel;
  605         regs->tf_ds = _udatasel;
  606         regs->tf_es = _udatasel;
  607         regs->tf_fs = _udatasel;
  608         load_gs(_udatasel);
  609         regs->tf_ss = _udatasel;
  610 }
  611 
  612 void
  613 sendsig(catcher, sig, mask, code)
  614         sig_t catcher;
  615         int sig;
  616         sigset_t *mask;
  617         u_long code;
  618 {
  619         struct proc *p = curproc;
  620         struct trapframe *regs;
  621         struct sigacts *psp = p->p_sigacts;
  622         struct sigframe sf, *sfp;
  623         int oonstack;
  624 
  625         if (SIGISMEMBER(psp->ps_osigset, sig)) {
  626                 osendsig(catcher, sig, mask, code);
  627                 return;
  628         }
  629 
  630         regs = p->p_md.md_regs;
  631         oonstack = (p->p_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
  632 
  633         /* save user context */
  634         bzero(&sf, sizeof(struct sigframe));
  635         sf.sf_uc.uc_sigmask = *mask;
  636         sf.sf_uc.uc_stack = p->p_sigstk;
  637         sf.sf_uc.uc_mcontext.mc_onstack = oonstack;
  638         sf.sf_uc.uc_mcontext.mc_gs = rgs();
  639         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(struct trapframe));
  640 
  641         /* Allocate and validate space for the signal handler context. */
  642         if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
  643             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  644                 sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
  645                     p->p_sigstk.ss_size - sizeof(struct sigframe));
  646                 p->p_sigstk.ss_flags |= SS_ONSTACK;
  647         }
  648         else
  649                 sfp = (struct sigframe *)regs->tf_esp - 1;
  650 
  651         /* Translate the signal is appropriate */
  652         if (p->p_sysent->sv_sigtbl) {
  653                 if (sig <= p->p_sysent->sv_sigsize)
  654                         sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  655         }
  656 
  657         /* Build the argument list for the signal handler. */
  658         sf.sf_signum = sig;
  659         sf.sf_ucontext = (register_t)&sfp->sf_uc;
  660         if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
  661                 /* Signal handler installed with SA_SIGINFO. */
  662                 sf.sf_siginfo = (register_t)&sfp->sf_si;
  663                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  664 
  665                 /* fill siginfo structure */
  666                 sf.sf_si.si_signo = sig;
  667                 sf.sf_si.si_code = code;
  668                 sf.sf_si.si_addr = (void*)regs->tf_err;
  669         }
  670         else {
  671                 /* Old FreeBSD-style arguments. */
  672                 sf.sf_siginfo = code;
  673                 sf.sf_addr = regs->tf_err;
  674                 sf.sf_ahu.sf_handler = catcher;
  675         }
  676 
  677         /*
  678          * If we're a vm86 process, we want to save the segment registers.
  679          * We also change eflags to be our emulated eflags, not the actual
  680          * eflags.
  681          */
  682         if (regs->tf_eflags & PSL_VM) {
  683                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  684                 struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
  685 
  686                 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
  687                 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
  688                 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
  689                 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
  690 
  691                 if (vm86->vm86_has_vme == 0)
  692                         sf.sf_uc.uc_mcontext.mc_eflags =
  693                             (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
  694                             (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
  695 
  696                 /*
  697                  * Clear PSL_NT to inhibit T_TSSFLT faults on return from
  698                  * syscalls made by the signal handler.  This just avoids
  699                  * wasting time for our lazy fixup of such faults.  PSL_NT
  700                  * does nothing in vm86 mode, but vm86 programs can set it
  701                  * almost legitimately in probes for old cpu types.
  702                  */
  703                 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
  704         }
  705 
  706         /*
  707          * Copy the sigframe out to the user's stack.
  708          */
  709         if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) {
  710                 /*
  711                  * Something is wrong with the stack pointer.
  712                  * ...Kill the process.
  713                  */
  714                 sigexit(p, SIGILL);
  715         }
  716 
  717         regs->tf_esp = (int)sfp;
  718         regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
  719         regs->tf_eflags &= ~PSL_T;
  720         regs->tf_cs = _ucodesel;
  721         regs->tf_ds = _udatasel;
  722         regs->tf_es = _udatasel;
  723         regs->tf_ss = _udatasel;
  724 }
  725 
  726 /*
  727  * System call to cleanup state after a signal
  728  * has been taken.  Reset signal mask and
  729  * stack state from context left by sendsig (above).
  730  * Return to previous pc and psl as specified by
  731  * context left by sendsig. Check carefully to
  732  * make sure that the user has not modified the
  733  * state to gain improper privileges.
  734  */
  735 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  736 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  737 
  738 int
  739 osigreturn(p, uap)
  740         struct proc *p;
  741         struct osigreturn_args /* {
  742                 struct osigcontext *sigcntxp;
  743         } */ *uap;
  744 {
  745         register struct osigcontext *scp;
  746         register struct trapframe *regs = p->p_md.md_regs;
  747         int eflags;
  748 
  749         scp = uap->sigcntxp;
  750 
  751         if (!useracc((caddr_t)scp, sizeof (struct osigcontext), VM_PROT_READ))
  752                 return(EFAULT);
  753 
  754         eflags = scp->sc_ps;
  755         if (eflags & PSL_VM) {
  756                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  757                 struct vm86_kernel *vm86;
  758 
  759                 /*
  760                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  761                  * set up the vm86 area, and we can't enter vm86 mode.
  762                  */
  763                 if (p->p_addr->u_pcb.pcb_ext == 0)
  764                         return (EINVAL);
  765                 vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
  766                 if (vm86->vm86_inited == 0)
  767                         return (EINVAL);
  768 
  769                 /* go back to user mode if both flags are set */
  770                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  771                         trapsignal(p, SIGBUS, 0);
  772 
  773                 if (vm86->vm86_has_vme) {
  774                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  775                             (eflags & VME_USERCHANGE) | PSL_VM;
  776                 } else {
  777                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  778                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |                                         (eflags & VM_USERCHANGE) | PSL_VM;
  779                 }
  780                 tf->tf_vm86_ds = scp->sc_ds;
  781                 tf->tf_vm86_es = scp->sc_es;
  782                 tf->tf_vm86_fs = scp->sc_fs;
  783                 tf->tf_vm86_gs = scp->sc_gs;
  784                 tf->tf_ds = _udatasel;
  785                 tf->tf_es = _udatasel;
  786                 tf->tf_fs = _udatasel;
  787         } else {
  788                 /*
  789                  * Don't allow users to change privileged or reserved flags.
  790                  */
  791                 /*
  792                  * XXX do allow users to change the privileged flag PSL_RF.
  793                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  794                  * should sometimes set it there too.  tf_eflags is kept in
  795                  * the signal context during signal handling and there is no
  796                  * other place to remember it, so the PSL_RF bit may be
  797                  * corrupted by the signal handler without us knowing.
  798                  * Corruption of the PSL_RF bit at worst causes one more or
  799                  * one less debugger trap, so allowing it is fairly harmless.
  800                  */
  801                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  802                         return(EINVAL);
  803                 }
  804 
  805                 /*
  806                  * Don't allow users to load a valid privileged %cs.  Let the
  807                  * hardware check for invalid selectors, excess privilege in
  808                  * other selectors, invalid %eip's and invalid %esp's.
  809                  */
  810                 if (!CS_SECURE(scp->sc_cs)) {
  811                         trapsignal(p, SIGBUS, T_PROTFLT);
  812                         return(EINVAL);
  813                 }
  814                 regs->tf_ds = scp->sc_ds;
  815                 regs->tf_es = scp->sc_es;
  816                 regs->tf_fs = scp->sc_fs;
  817         }
  818 
  819         /* restore scratch registers */
  820         regs->tf_eax = scp->sc_eax;
  821         regs->tf_ebx = scp->sc_ebx;
  822         regs->tf_ecx = scp->sc_ecx;
  823         regs->tf_edx = scp->sc_edx;
  824         regs->tf_esi = scp->sc_esi;
  825         regs->tf_edi = scp->sc_edi;
  826         regs->tf_cs = scp->sc_cs;
  827         regs->tf_ss = scp->sc_ss;
  828         regs->tf_isp = scp->sc_isp;
  829 
  830         if (scp->sc_onstack & 01)
  831                 p->p_sigstk.ss_flags |= SS_ONSTACK;
  832         else
  833                 p->p_sigstk.ss_flags &= ~SS_ONSTACK;
  834 
  835         SIGSETOLD(p->p_sigmask, scp->sc_mask);
  836         SIG_CANTMASK(p->p_sigmask);
  837         regs->tf_ebp = scp->sc_fp;
  838         regs->tf_esp = scp->sc_sp;
  839         regs->tf_eip = scp->sc_pc;
  840         regs->tf_eflags = eflags;
  841         return(EJUSTRETURN);
  842 }
  843 
  844 int
  845 sigreturn(p, uap)
  846         struct proc *p;
  847         struct sigreturn_args /* {
  848                 ucontext_t *sigcntxp;
  849         } */ *uap;
  850 {
  851         struct trapframe *regs;
  852         ucontext_t *ucp;
  853         int cs, eflags;
  854 
  855         ucp = uap->sigcntxp;
  856 
  857         if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
  858                 return (EFAULT);
  859         if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
  860                 return (osigreturn(p, (struct osigreturn_args *)uap));
  861 
  862         /*
  863          * Since ucp is not an osigcontext but a ucontext_t, we have to
  864          * check again if all of it is accessible.  A ucontext_t is
  865          * much larger, so instead of just checking for the pointer
  866          * being valid for the size of an osigcontext, now check for
  867          * it being valid for a whole, new-style ucontext_t.
  868          */
  869         if (!useracc((caddr_t)ucp, sizeof(ucontext_t), VM_PROT_READ))
  870                 return (EFAULT);
  871 
  872         regs = p->p_md.md_regs;
  873         eflags = ucp->uc_mcontext.mc_eflags;
  874 
  875         if (eflags & PSL_VM) {
  876                 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
  877                 struct vm86_kernel *vm86;
  878 
  879                 /*
  880                  * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
  881                  * set up the vm86 area, and we can't enter vm86 mode.
  882                  */
  883                 if (p->p_addr->u_pcb.pcb_ext == 0)
  884                         return (EINVAL);
  885                 vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
  886                 if (vm86->vm86_inited == 0)
  887                         return (EINVAL);
  888 
  889                 /* go back to user mode if both flags are set */
  890                 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
  891                         trapsignal(p, SIGBUS, 0);
  892 
  893                 if (vm86->vm86_has_vme) {
  894                         eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
  895                             (eflags & VME_USERCHANGE) | PSL_VM;
  896                 } else {
  897                         vm86->vm86_eflags = eflags;     /* save VIF, VIP */
  898                         eflags = (tf->tf_eflags & ~VM_USERCHANGE) |                                         (eflags & VM_USERCHANGE) | PSL_VM;
  899                 }
  900                 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
  901                 tf->tf_eflags = eflags;
  902                 tf->tf_vm86_ds = tf->tf_ds;
  903                 tf->tf_vm86_es = tf->tf_es;
  904                 tf->tf_vm86_fs = tf->tf_fs;
  905                 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
  906                 tf->tf_ds = _udatasel;
  907                 tf->tf_es = _udatasel;
  908                 tf->tf_fs = _udatasel;
  909         } else {
  910                 /*
  911                  * Don't allow users to change privileged or reserved flags.
  912                  */
  913                 /*
  914                  * XXX do allow users to change the privileged flag PSL_RF.
  915                  * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
  916                  * should sometimes set it there too.  tf_eflags is kept in
  917                  * the signal context during signal handling and there is no
  918                  * other place to remember it, so the PSL_RF bit may be
  919                  * corrupted by the signal handler without us knowing.
  920                  * Corruption of the PSL_RF bit at worst causes one more or
  921                  * one less debugger trap, so allowing it is fairly harmless.
  922                  */
  923                 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
  924                         printf("sigreturn: eflags = 0x%x\n", eflags);
  925                         return(EINVAL);
  926                 }
  927 
  928                 /*
  929                  * Don't allow users to load a valid privileged %cs.  Let the
  930                  * hardware check for invalid selectors, excess privilege in
  931                  * other selectors, invalid %eip's and invalid %esp's.
  932                  */
  933                 cs = ucp->uc_mcontext.mc_cs;
  934                 if (!CS_SECURE(cs)) {
  935                         printf("sigreturn: cs = 0x%x\n", cs);
  936                         trapsignal(p, SIGBUS, T_PROTFLT);
  937                         return(EINVAL);
  938                 }
  939                 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(struct trapframe));
  940         }
  941 
  942         if (ucp->uc_mcontext.mc_onstack & 1)
  943                 p->p_sigstk.ss_flags |= SS_ONSTACK;
  944         else
  945                 p->p_sigstk.ss_flags &= ~SS_ONSTACK;
  946 
  947         p->p_sigmask = ucp->uc_sigmask;
  948         SIG_CANTMASK(p->p_sigmask);
  949         return(EJUSTRETURN);
  950 }
  951 
  952 /*
  953  * Machine dependent boot() routine
  954  *
  955  * I haven't seen anything to put here yet
  956  * Possibly some stuff might be grafted back here from boot()
  957  */
  958 void
  959 cpu_boot(int howto)
  960 {
  961 }
  962 
  963 /*
  964  * Shutdown the CPU as much as possible
  965  */
  966 void
  967 cpu_halt(void)
  968 {
  969         for (;;)
  970                 __asm__ ("hlt");
  971 }
  972 
  973 /*
  974  * Hook to idle the CPU when possible.   This is disabled by default for
  975  * the SMP case as there is a small window of opportunity whereby a ready
  976  * process is delayed to the next clock tick.  It should be safe to enable
  977  * for SMP if power is a concern.
  978  *
  979  * On -stable, cpu_idle() is called with interrupts disabled and must
  980  * return with them enabled.
  981  */
  982 static int      cpu_idle_hlt = 1;
  983 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
  984     &cpu_idle_hlt, 0, "Idle loop HLT enable");
  985 
  986 void
  987 cpu_idle(void)
  988 {
  989 
  990 #ifdef SMP
  991         if (mp_grab_cpu_hlt())
  992                 return;
  993 #endif
  994 
  995         if (cpu_idle_hlt) {
  996                 /*
  997                  * We must guarentee that hlt is exactly the instruction
  998                  * following the sti.
  999                  */
 1000                 __asm __volatile("sti; hlt");
 1001         } else {
 1002                 __asm __volatile("sti");
 1003         }
 1004 }
 1005 
 1006 /*
 1007  * Clear registers on exec
 1008  */
 1009 void
 1010 setregs(p, entry, stack, ps_strings)
 1011         struct proc *p;
 1012         u_long entry;
 1013         u_long stack;
 1014         u_long ps_strings;
 1015 {
 1016         struct trapframe *regs = p->p_md.md_regs;
 1017         struct pcb *pcb = &p->p_addr->u_pcb;
 1018 
 1019         /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 1020         pcb->pcb_gs = _udatasel;
 1021         load_gs(_udatasel);
 1022 
 1023 #ifdef USER_LDT
 1024         /* was i386_user_cleanup() in NetBSD */
 1025         user_ldt_free(pcb);
 1026 #endif
 1027   
 1028         bzero((char *)regs, sizeof(struct trapframe));
 1029         regs->tf_eip = entry;
 1030         regs->tf_esp = stack;
 1031         regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 1032         regs->tf_ss = _udatasel;
 1033         regs->tf_ds = _udatasel;
 1034         regs->tf_es = _udatasel;
 1035         regs->tf_fs = _udatasel;
 1036         regs->tf_cs = _ucodesel;
 1037 
 1038         /* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 1039         regs->tf_ebx = ps_strings;
 1040 
 1041         /*
 1042          * Reset the hardware debug registers if they were in use.
 1043          * They won't have any meaning for the newly exec'd process.  
 1044          */
 1045         if (pcb->pcb_flags & PCB_DBREGS) {
 1046                 pcb->pcb_dr0 = 0;
 1047                 pcb->pcb_dr1 = 0;
 1048                 pcb->pcb_dr2 = 0;
 1049                 pcb->pcb_dr3 = 0;
 1050                 pcb->pcb_dr6 = 0;
 1051                 pcb->pcb_dr7 = 0;
 1052                 if (pcb == curpcb) {
 1053                         /*
 1054                          * Clear the debug registers on the running
 1055                          * CPU, otherwise they will end up affecting
 1056                          * the next process we switch to.
 1057                          */
 1058                         reset_dbregs();
 1059                 }
 1060                 pcb->pcb_flags &= ~PCB_DBREGS;
 1061         }
 1062 
 1063         /*
 1064          * Initialize the math emulator (if any) for the current process.
 1065          * Actually, just clear the bit that says that the emulator has
 1066          * been initialized.  Initialization is delayed until the process
 1067          * traps to the emulator (if it is done at all) mainly because
 1068          * emulators don't provide an entry point for initialization.
 1069          */
 1070         p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 1071 
 1072         /*
 1073          * Arrange to trap the next npx or `fwait' instruction (see npx.c
 1074          * for why fwait must be trapped at least if there is an npx or an
 1075          * emulator).  This is mainly to handle the case where npx0 is not
 1076          * configured, since the npx routines normally set up the trap
 1077          * otherwise.  It should be done only at boot time, but doing it
 1078          * here allows modifying `npx_exists' for testing the emulator on
 1079          * systems with an npx.
 1080          */
 1081         load_cr0(rcr0() | CR0_MP | CR0_TS);
 1082 
 1083 #if NNPX > 0
 1084         /* Initialize the npx (if any) for the current process. */
 1085         npxinit(__INITIAL_NPXCW__);
 1086 #endif
 1087 
 1088       /*
 1089        * XXX - Linux emulator
 1090        * Make sure sure edx is 0x0 on entry. Linux binaries depend
 1091        * on it.
 1092        */
 1093       p->p_retval[1] = 0;
 1094 }
 1095 
 1096 void
 1097 cpu_setregs(void)
 1098 {
 1099         unsigned int cr0;
 1100 
 1101         cr0 = rcr0();
 1102         cr0 |= CR0_NE;                  /* Done by npxinit() */
 1103         cr0 |= CR0_MP | CR0_TS;         /* Done at every execve() too. */
 1104 #ifdef I386_CPU
 1105         if (cpu_class != CPUCLASS_386)
 1106 #endif
 1107                 cr0 |= CR0_WP | CR0_AM;
 1108         load_cr0(cr0);
 1109         load_gs(_udatasel);
 1110 }
 1111 
 1112 static int
 1113 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 1114 {
 1115         int error;
 1116         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 1117                 req);
 1118         if (!error && req->newptr)
 1119                 resettodr();
 1120         return (error);
 1121 }
 1122 
 1123 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 1124         &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 1125 
 1126 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 1127         CTLFLAG_RW, &disable_rtc_set, 0, "");
 1128 
 1129 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 1130         CTLFLAG_RD, &bootinfo, bootinfo, "");
 1131 
 1132 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 1133         CTLFLAG_RW, &wall_cmos_clock, 0, "");
 1134 
 1135 extern u_long bootdev;          /* not a dev_t - encoding is different */
 1136 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 1137         CTLFLAG_RD, &bootdev, 0, "Boot device (not in dev_t format)");
 1138 
 1139 /*
 1140  * Initialize 386 and configure to run kernel
 1141  */
 1142 
 1143 /*
 1144  * Initialize segments & interrupt table
 1145  */
 1146 
 1147 int _default_ldt;
 1148 union descriptor gdt[NGDT * MAXCPU];    /* global descriptor table */
 1149 static struct gate_descriptor idt0[NIDT];
 1150 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
 1151 union descriptor ldt[NLDT];             /* local descriptor table */
 1152 #ifdef SMP
 1153 /* table descriptors - used to load tables by microp */
 1154 struct region_descriptor r_gdt, r_idt;
 1155 #endif
 1156 
 1157 #ifndef SMP
 1158 extern struct segment_descriptor common_tssd, *tss_gdt;
 1159 #endif
 1160 int private_tss;                        /* flag indicating private tss */
 1161 
 1162 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 1163 extern int has_f00f_bug;
 1164 #endif
 1165 
 1166 static struct i386tss dblfault_tss;
 1167 static char dblfault_stack[PAGE_SIZE];
 1168 
 1169 extern  struct user *proc0paddr;
 1170 
 1171 
 1172 /* software prototypes -- in more palatable form */
 1173 struct soft_segment_descriptor gdt_segs[] = {
 1174 /* GNULL_SEL    0 Null Descriptor */
 1175 {       0x0,                    /* segment base address  */
 1176         0x0,                    /* length */
 1177         0,                      /* segment type */
 1178         0,                      /* segment descriptor priority level */
 1179         0,                      /* segment descriptor present */
 1180         0, 0,
 1181         0,                      /* default 32 vs 16 bit size */
 1182         0                       /* limit granularity (byte/page units)*/ },
 1183 /* GCODE_SEL    1 Code Descriptor for kernel */
 1184 {       0x0,                    /* segment base address  */
 1185         0xfffff,                /* length - all address space */
 1186         SDT_MEMERA,             /* segment type */
 1187         0,                      /* segment descriptor priority level */
 1188         1,                      /* segment descriptor present */
 1189         0, 0,
 1190         1,                      /* default 32 vs 16 bit size */
 1191         1                       /* limit granularity (byte/page units)*/ },
 1192 /* GDATA_SEL    2 Data Descriptor for kernel */
 1193 {       0x0,                    /* segment base address  */
 1194         0xfffff,                /* length - all address space */
 1195         SDT_MEMRWA,             /* segment type */
 1196         0,                      /* segment descriptor priority level */
 1197         1,                      /* segment descriptor present */
 1198         0, 0,
 1199         1,                      /* default 32 vs 16 bit size */
 1200         1                       /* limit granularity (byte/page units)*/ },
 1201 /* GPRIV_SEL    3 SMP Per-Processor Private Data Descriptor */
 1202 {       0x0,                    /* segment base address  */
 1203         0xfffff,                /* length - all address space */
 1204         SDT_MEMRWA,             /* segment type */
 1205         0,                      /* segment descriptor priority level */
 1206         1,                      /* segment descriptor present */
 1207         0, 0,
 1208         1,                      /* default 32 vs 16 bit size */
 1209         1                       /* limit granularity (byte/page units)*/ },
 1210 /* GPROC0_SEL   4 Proc 0 Tss Descriptor */
 1211 {
 1212         0x0,                    /* segment base address */
 1213         sizeof(struct i386tss)-1,/* length - all address space */
 1214         SDT_SYS386TSS,          /* segment type */
 1215         0,                      /* segment descriptor priority level */
 1216         1,                      /* segment descriptor present */
 1217         0, 0,
 1218         0,                      /* unused - default 32 vs 16 bit size */
 1219         0                       /* limit granularity (byte/page units)*/ },
 1220 /* GLDT_SEL     5 LDT Descriptor */
 1221 {       (int) ldt,              /* segment base address  */
 1222         sizeof(ldt)-1,          /* length - all address space */
 1223         SDT_SYSLDT,             /* segment type */
 1224         SEL_UPL,                /* segment descriptor priority level */
 1225         1,                      /* segment descriptor present */
 1226         0, 0,
 1227         0,                      /* unused - default 32 vs 16 bit size */
 1228         0                       /* limit granularity (byte/page units)*/ },
 1229 /* GUSERLDT_SEL 6 User LDT Descriptor per process */
 1230 {       (int) ldt,              /* segment base address  */
 1231         (512 * sizeof(union descriptor)-1),             /* length */
 1232         SDT_SYSLDT,             /* segment type */
 1233         0,                      /* segment descriptor priority level */
 1234         1,                      /* segment descriptor present */
 1235         0, 0,
 1236         0,                      /* unused - default 32 vs 16 bit size */
 1237         0                       /* limit granularity (byte/page units)*/ },
 1238 /* GTGATE_SEL   7 Null Descriptor - Placeholder */
 1239 {       0x0,                    /* segment base address  */
 1240         0x0,                    /* length - all address space */
 1241         0,                      /* segment type */
 1242         0,                      /* segment descriptor priority level */
 1243         0,                      /* segment descriptor present */
 1244         0, 0,
 1245         0,                      /* default 32 vs 16 bit size */
 1246         0                       /* limit granularity (byte/page units)*/ },
 1247 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 1248 {       0x400,                  /* segment base address */
 1249         0xfffff,                /* length */
 1250         SDT_MEMRWA,             /* segment type */
 1251         0,                      /* segment descriptor priority level */
 1252         1,                      /* segment descriptor present */
 1253         0, 0,
 1254         1,                      /* default 32 vs 16 bit size */
 1255         1                       /* limit granularity (byte/page units)*/ },
 1256 /* GPANIC_SEL   9 Panic Tss Descriptor */
 1257 {       (int) &dblfault_tss,    /* segment base address  */
 1258         sizeof(struct i386tss)-1,/* length - all address space */
 1259         SDT_SYS386TSS,          /* segment type */
 1260         0,                      /* segment descriptor priority level */
 1261         1,                      /* segment descriptor present */
 1262         0, 0,
 1263         0,                      /* unused - default 32 vs 16 bit size */
 1264         0                       /* limit granularity (byte/page units)*/ },
 1265 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 1266 {       0,                      /* segment base address (overwritten)  */
 1267         0xfffff,                /* length */
 1268         SDT_MEMERA,             /* segment type */
 1269         0,                      /* segment descriptor priority level */
 1270         1,                      /* segment descriptor present */
 1271         0, 0,
 1272         0,                      /* default 32 vs 16 bit size */
 1273         1                       /* limit granularity (byte/page units)*/ },
 1274 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 1275 {       0,                      /* segment base address (overwritten)  */
 1276         0xfffff,                /* length */
 1277         SDT_MEMERA,             /* segment type */
 1278         0,                      /* segment descriptor priority level */
 1279         1,                      /* segment descriptor present */
 1280         0, 0,
 1281         0,                      /* default 32 vs 16 bit size */
 1282         1                       /* limit granularity (byte/page units)*/ },
 1283 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 1284 {       0,                      /* segment base address (overwritten) */
 1285         0xfffff,                /* length */
 1286         SDT_MEMRWA,             /* segment type */
 1287         0,                      /* segment descriptor priority level */
 1288         1,                      /* segment descriptor present */
 1289         0, 0,
 1290         1,                      /* default 32 vs 16 bit size */
 1291         1                       /* limit granularity (byte/page units)*/ },
 1292 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 1293 {       0,                      /* segment base address (overwritten) */
 1294         0xfffff,                /* length */
 1295         SDT_MEMRWA,             /* segment type */
 1296         0,                      /* segment descriptor priority level */
 1297         1,                      /* segment descriptor present */
 1298         0, 0,
 1299         0,                      /* default 32 vs 16 bit size */
 1300         1                       /* limit granularity (byte/page units)*/ },
 1301 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 1302 {       0,                      /* segment base address (overwritten) */
 1303         0xfffff,                /* length */
 1304         SDT_MEMRWA,             /* segment type */
 1305         0,                      /* segment descriptor priority level */
 1306         1,                      /* segment descriptor present */
 1307         0, 0,
 1308         0,                      /* default 32 vs 16 bit size */
 1309         1                       /* limit granularity (byte/page units)*/ },
 1310 };
 1311 
 1312 static struct soft_segment_descriptor ldt_segs[] = {
 1313         /* Null Descriptor - overwritten by call gate */
 1314 {       0x0,                    /* segment base address  */
 1315         0x0,                    /* length - all address space */
 1316         0,                      /* segment type */
 1317         0,                      /* segment descriptor priority level */
 1318         0,                      /* segment descriptor present */
 1319         0, 0,
 1320         0,                      /* default 32 vs 16 bit size */
 1321         0                       /* limit granularity (byte/page units)*/ },
 1322         /* Null Descriptor - overwritten by call gate */
 1323 {       0x0,                    /* segment base address  */
 1324         0x0,                    /* length - all address space */
 1325         0,                      /* segment type */
 1326         0,                      /* segment descriptor priority level */
 1327         0,                      /* segment descriptor present */
 1328         0, 0,
 1329         0,                      /* default 32 vs 16 bit size */
 1330         0                       /* limit granularity (byte/page units)*/ },
 1331         /* Null Descriptor - overwritten by call gate */
 1332 {       0x0,                    /* segment base address  */
 1333         0x0,                    /* length - all address space */
 1334         0,                      /* segment type */
 1335         0,                      /* segment descriptor priority level */
 1336         0,                      /* segment descriptor present */
 1337         0, 0,
 1338         0,                      /* default 32 vs 16 bit size */
 1339         0                       /* limit granularity (byte/page units)*/ },
 1340         /* Code Descriptor for user */
 1341 {       0x0,                    /* segment base address  */
 1342         0xfffff,                /* length - all address space */
 1343         SDT_MEMERA,             /* segment type */
 1344         SEL_UPL,                /* segment descriptor priority level */
 1345         1,                      /* segment descriptor present */
 1346         0, 0,
 1347         1,                      /* default 32 vs 16 bit size */
 1348         1                       /* limit granularity (byte/page units)*/ },
 1349         /* Null Descriptor - overwritten by call gate */
 1350 {       0x0,                    /* segment base address  */
 1351         0x0,                    /* length - all address space */
 1352         0,                      /* segment type */
 1353         0,                      /* segment descriptor priority level */
 1354         0,                      /* segment descriptor present */
 1355         0, 0,
 1356         0,                      /* default 32 vs 16 bit size */
 1357         0                       /* limit granularity (byte/page units)*/ },
 1358         /* Data Descriptor for user */
 1359 {       0x0,                    /* segment base address  */
 1360         0xfffff,                /* length - all address space */
 1361         SDT_MEMRWA,             /* segment type */
 1362         SEL_UPL,                /* segment descriptor priority level */
 1363         1,                      /* segment descriptor present */
 1364         0, 0,
 1365         1,                      /* default 32 vs 16 bit size */
 1366         1                       /* limit granularity (byte/page units)*/ },
 1367 };
 1368 
 1369 void
 1370 setidt(idx, func, typ, dpl, selec)
 1371         int idx;
 1372         inthand_t *func;
 1373         int typ;
 1374         int dpl;
 1375         int selec;
 1376 {
 1377         struct gate_descriptor *ip;
 1378 
 1379         ip = idt + idx;
 1380         ip->gd_looffset = (int)func;
 1381         ip->gd_selector = selec;
 1382         ip->gd_stkcpy = 0;
 1383         ip->gd_xx = 0;
 1384         ip->gd_type = typ;
 1385         ip->gd_dpl = dpl;
 1386         ip->gd_p = 1;
 1387         ip->gd_hioffset = ((int)func)>>16 ;
 1388 }
 1389 
 1390 #define IDTVEC(name)    __CONCAT(X,name)
 1391 
 1392 extern inthand_t
 1393         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 1394         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 1395         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 1396         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 1397         IDTVEC(xmm), IDTVEC(syscall), IDTVEC(int0x80_syscall);
 1398 
 1399 void
 1400 sdtossd(sd, ssd)
 1401         struct segment_descriptor *sd;
 1402         struct soft_segment_descriptor *ssd;
 1403 {
 1404         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 1405         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 1406         ssd->ssd_type  = sd->sd_type;
 1407         ssd->ssd_dpl   = sd->sd_dpl;
 1408         ssd->ssd_p     = sd->sd_p;
 1409         ssd->ssd_def32 = sd->sd_def32;
 1410         ssd->ssd_gran  = sd->sd_gran;
 1411 }
 1412 
 1413 #define PHYSMAP_SIZE    (2 * 8)
 1414 
 1415 /*
 1416  * Populate the (physmap) array with base/bound pairs describing the
 1417  * available physical memory in the system, then test this memory and
 1418  * build the phys_avail array describing the actually-available memory.
 1419  *
 1420  * If we cannot accurately determine the physical memory map, then use
 1421  * value from the 0xE801 call, and failing that, the RTC.
 1422  *
 1423  * Total memory size may be set by the kernel environment variable
 1424  * hw.physmem or the compile-time define MAXMEM.
 1425  *
 1426  * XXX first should be vm_paddr_t.
 1427  */
 1428 static void
 1429 getmemsize(int first)
 1430 {
 1431         int i, physmap_idx, pa_indx;
 1432         int hasbrokenint12;
 1433         u_int basemem, extmem;
 1434         struct vm86frame vmf;
 1435         struct vm86context vmc;
 1436         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1437         pt_entry_t *pte;
 1438         const char *cp;
 1439         struct {
 1440                 u_int64_t base;
 1441                 u_int64_t length;
 1442                 u_int32_t type;
 1443         } *smap;
 1444 
 1445         hasbrokenint12 = 0;
 1446         TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 1447         bzero(&vmf, sizeof(struct vm86frame));
 1448         bzero(physmap, sizeof(physmap));
 1449         basemem = 0;
 1450 
 1451         /*
 1452          * Some newer BIOSes has broken INT 12H implementation which cause
 1453          * kernel panic immediately. In this case, we need to scan SMAP
 1454          * with INT 15:E820 first, then determine base memory size.
 1455          */
 1456         if (hasbrokenint12) {
 1457                 goto int15e820;
 1458         }
 1459 
 1460         /*
 1461          * Perform "base memory" related probes & setup
 1462          */
 1463         vm86_intcall(0x12, &vmf);
 1464         basemem = vmf.vmf_ax;
 1465         if (basemem > 640) {
 1466                 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1467                         basemem);
 1468                 basemem = 640;
 1469         }
 1470 
 1471         /*
 1472          * XXX if biosbasemem is now < 640, there is a `hole'
 1473          * between the end of base memory and the start of
 1474          * ISA memory.  The hole may be empty or it may
 1475          * contain BIOS code or data.  Map it read/write so
 1476          * that the BIOS can write to it.  (Memory from 0 to
 1477          * the physical end of the kernel is mapped read-only
 1478          * to begin with and then parts of it are remapped.
 1479          * The parts that aren't remapped form holes that
 1480          * remain read-only and are unused by the kernel.
 1481          * The base memory area is below the physical end of
 1482          * the kernel and right now forms a read-only hole.
 1483          * The part of it from PAGE_SIZE to
 1484          * (trunc_page(biosbasemem * 1024) - 1) will be
 1485          * remapped and used by the kernel later.)
 1486          *
 1487          * This code is similar to the code used in
 1488          * pmap_mapdev, but since no memory needs to be
 1489          * allocated we simply change the mapping.
 1490          */
 1491         for (pa = trunc_page(basemem * 1024);
 1492              pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 1493                 pte = vtopte(pa + KERNBASE);
 1494                 *pte = pa | PG_RW | PG_V;
 1495         }
 1496 
 1497         /*
 1498          * if basemem != 640, map pages r/w into vm86 page table so 
 1499          * that the bios can scribble on it.
 1500          */
 1501         pte = (pt_entry_t *)vm86paddr;
 1502         for (i = basemem / 4; i < 160; i++)
 1503                 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1504 
 1505 int15e820:
 1506         /*
 1507          * map page 1 R/W into the kernel page table so we can use it
 1508          * as a buffer.  The kernel will unmap this page later.
 1509          */
 1510         pte = vtopte(KERNBASE + (1 << PAGE_SHIFT));
 1511         *pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 1512 
 1513         /*
 1514          * get memory map with INT 15:E820
 1515          */
 1516 #define SMAPSIZ         sizeof(*smap)
 1517 #define SMAP_SIG        0x534D4150                      /* 'SMAP' */
 1518 
 1519         vmc.npages = 0;
 1520         smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 1521         vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 1522 
 1523         physmap_idx = 0;
 1524         vmf.vmf_ebx = 0;
 1525         do {
 1526                 vmf.vmf_eax = 0xE820;
 1527                 vmf.vmf_edx = SMAP_SIG;
 1528                 vmf.vmf_ecx = SMAPSIZ;
 1529                 i = vm86_datacall(0x15, &vmf, &vmc);
 1530                 if (i || vmf.vmf_eax != SMAP_SIG)
 1531                         break;
 1532                 if (boothowto & RB_VERBOSE)
 1533                         printf("SMAP type=%02x base=%016llx len=%016llx\n",
 1534                             smap->type, smap->base, smap->length);
 1535 
 1536                 if (smap->type != 0x01)
 1537                         goto next_run;
 1538 
 1539                 if (smap->length == 0)
 1540                         goto next_run;
 1541 
 1542 #ifndef PAE
 1543                 if (smap->base >= 0xffffffff) {
 1544                         printf("%uK of memory above 4GB ignored\n",
 1545                             (u_int)(smap->length / 1024));
 1546                         goto next_run;
 1547                 }
 1548 #endif
 1549 
 1550                 for (i = 0; i <= physmap_idx; i += 2) {
 1551                         if (smap->base < physmap[i + 1]) {
 1552                                 if (boothowto & RB_VERBOSE)
 1553                                         printf(
 1554         "Overlapping or non-montonic memory region, ignoring second region\n");
 1555                                 goto next_run;
 1556                         }
 1557                 }
 1558 
 1559                 if (smap->base == physmap[physmap_idx + 1]) {
 1560                         physmap[physmap_idx + 1] += smap->length;
 1561                         goto next_run;
 1562                 }
 1563 
 1564                 physmap_idx += 2;
 1565                 if (physmap_idx == PHYSMAP_SIZE) {
 1566                         printf(
 1567                 "Too many segments in the physical address map, giving up\n");
 1568                         break;
 1569                 }
 1570                 physmap[physmap_idx] = smap->base;
 1571                 physmap[physmap_idx + 1] = smap->base + smap->length;
 1572 next_run: ;
 1573         } while (vmf.vmf_ebx != 0);
 1574 
 1575         /*
 1576          * Perform "base memory" related probes & setup based on SMAP
 1577          */
 1578         if (basemem == 0) {
 1579                 for (i = 0; i <= physmap_idx; i += 2) {
 1580                         if (physmap[i] == 0x00000000) {
 1581                                 basemem = physmap[i + 1] / 1024;
 1582                                 break;
 1583                         }
 1584                 }
 1585 
 1586                 if (basemem == 0) {
 1587                         basemem = 640;
 1588                 }
 1589 
 1590                 if (basemem > 640) {
 1591                         printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 1592                                 basemem);
 1593                         basemem = 640;
 1594                 }
 1595 
 1596                 for (pa = trunc_page(basemem * 1024);
 1597                      pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 1598                         pte = vtopte(pa + KERNBASE);
 1599                         *pte = pa | PG_RW | PG_V;
 1600                 }
 1601 
 1602                 pte = (pt_entry_t *)vm86paddr;
 1603                 for (i = basemem / 4; i < 160; i++)
 1604                         pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 1605         }
 1606 
 1607         if (physmap[1] != 0)
 1608                 goto physmap_done;
 1609 
 1610         /*
 1611          * If we failed above, try memory map with INT 15:E801
 1612          */
 1613         vmf.vmf_ax = 0xE801;
 1614         if (vm86_intcall(0x15, &vmf) == 0) {
 1615                 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 1616         } else {
 1617 #if 0
 1618                 vmf.vmf_ah = 0x88;
 1619                 vm86_intcall(0x15, &vmf);
 1620                 extmem = vmf.vmf_ax;
 1621 #else
 1622                 /*
 1623                  * Prefer the RTC value for extended memory.
 1624                  */
 1625                 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 1626 #endif
 1627         }
 1628 
 1629         /*
 1630          * Special hack for chipsets that still remap the 384k hole when
 1631          * there's 16MB of memory - this really confuses people that
 1632          * are trying to use bus mastering ISA controllers with the
 1633          * "16MB limit"; they only have 16MB, but the remapping puts
 1634          * them beyond the limit.
 1635          *
 1636          * If extended memory is between 15-16MB (16-17MB phys address range),
 1637          *      chop it to 15MB.
 1638          */
 1639         if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 1640                 extmem = 15 * 1024;
 1641 
 1642         physmap[0] = 0;
 1643         physmap[1] = basemem * 1024;
 1644         physmap_idx = 2;
 1645         physmap[physmap_idx] = 0x100000;
 1646         physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 1647 
 1648 physmap_done:
 1649         /*
 1650          * Now, physmap contains a map of physical memory.
 1651          */
 1652 
 1653 #ifdef SMP
 1654         /* make hole for AP bootstrap code */
 1655         physmap[1] = mp_bootaddress(physmap[1] / 1024);
 1656 
 1657         /* look for the MP hardware - needed for apic addresses */
 1658         mp_probe();
 1659 #endif
 1660 
 1661         /*
 1662          * Maxmem isn't the "maximum memory", it's one larger than the
 1663          * highest page of the physical address space.  It should be
 1664          * called something like "Maxphyspage".  We may adjust this 
 1665          * based on ``hw.physmem'' and the results of the memory test.
 1666          */
 1667         Maxmem = atop(physmap[physmap_idx + 1]);
 1668 
 1669 #ifdef MAXMEM
 1670         Maxmem = MAXMEM / 4;
 1671 #endif
 1672 
 1673         /*
 1674          * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes
 1675          * for the appropriate modifiers.  This overrides MAXMEM.
 1676          */
 1677         if ((cp = getenv("hw.physmem")) != NULL) {
 1678                 u_int64_t AllowMem, sanity;
 1679                 char *ep;
 1680 
 1681                 sanity = AllowMem = strtouq(cp, &ep, 0);
 1682                 if ((ep != cp) && (*ep != 0)) {
 1683                         switch(*ep) {
 1684                         case 'g':
 1685                         case 'G':
 1686                                 AllowMem <<= 10;
 1687                         case 'm':
 1688                         case 'M':
 1689                                 AllowMem <<= 10;
 1690                         case 'k':
 1691                         case 'K':
 1692                                 AllowMem <<= 10;
 1693                                 break;
 1694                         default:
 1695                                 AllowMem = sanity = 0;
 1696                         }
 1697                         if (AllowMem < sanity)
 1698                                 AllowMem = 0;
 1699                 }
 1700                 if (AllowMem == 0)
 1701                         printf("Ignoring invalid memory size of '%s'\n", cp);
 1702                 else
 1703                         Maxmem = atop(AllowMem);
 1704         }
 1705 
 1706         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1707             (boothowto & RB_VERBOSE))
 1708                 printf("Physical memory use set to %uK\n", Maxmem * 4);
 1709 
 1710         /*
 1711          * If Maxmem has been increased beyond what the system has detected,
 1712          * extend the last memory segment to the new limit.
 1713          */ 
 1714         if (atop(physmap[physmap_idx + 1]) < Maxmem)
 1715                 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 1716 
 1717         /*
 1718          * Size up each available chunk of physical memory.
 1719          */
 1720         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
 1721         pa_indx = 0;
 1722         phys_avail[pa_indx++] = physmap[0];
 1723         phys_avail[pa_indx] = physmap[0];
 1724         pte = vtopte(KERNBASE + PAGE_SIZE);
 1725 
 1726         /*
 1727          * physmap is in bytes, so when converting to page boundaries,
 1728          * round up the start address and round down the end address.
 1729          */
 1730         for (i = 0; i <= physmap_idx; i += 2) {
 1731                 vm_paddr_t end;
 1732 
 1733                 end = ptoa((vm_paddr_t)Maxmem);
 1734                 if (physmap[i + 1] < end)
 1735                         end = trunc_page(physmap[i + 1]);
 1736                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1737                         int tmp, page_bad;
 1738                         volatile int *ptr = (int *)(KERNBASE + PAGE_SIZE);
 1739 
 1740                         /*
 1741                          * block out kernel memory as not available.
 1742                          */
 1743                         if (pa >= 0x100000 && pa < first)
 1744                                 continue;
 1745         
 1746                         page_bad = FALSE;
 1747 
 1748                         /*
 1749                          * map page into kernel: valid, read/write,non-cacheable
 1750                          */
 1751                         *pte = pa | PG_V | PG_RW | PG_N;
 1752                         invltlb();
 1753 
 1754                         tmp = *ptr;
 1755                         /*
 1756                          * Test for alternating 1's and 0's
 1757                          */
 1758                         *ptr = 0xaaaaaaaa;
 1759                         if (*ptr != 0xaaaaaaaa) {
 1760                                 page_bad = TRUE;
 1761                         }
 1762                         /*
 1763                          * Test for alternating 0's and 1's
 1764                          */
 1765                         *ptr = 0x55555555;
 1766                         if (*ptr != 0x55555555) {
 1767                                 page_bad = TRUE;
 1768                         }
 1769                         /*
 1770                          * Test for all 1's
 1771                          */
 1772                         *ptr = 0xffffffff;
 1773                         if (*ptr != 0xffffffff) {
 1774                                 page_bad = TRUE;
 1775                         }
 1776                         /*
 1777                          * Test for all 0's
 1778                          */
 1779                         *ptr = 0x0;
 1780                         if (*ptr != 0x0) {
 1781                                 page_bad = TRUE;
 1782                         }
 1783                         /*
 1784                          * Restore original value.
 1785                          */
 1786                         *ptr = tmp;
 1787 
 1788                         /*
 1789                          * Adjust array of valid/good pages.
 1790                          */
 1791                         if (page_bad == TRUE) {
 1792                                 continue;
 1793                         }
 1794                         /*
 1795                          * If this good page is a continuation of the
 1796                          * previous set of good pages, then just increase
 1797                          * the end pointer. Otherwise start a new chunk.
 1798                          * Note that "end" points one higher than end,
 1799                          * making the range >= start and < end.
 1800                          * If we're also doing a speculative memory
 1801                          * test and we at or past the end, bump up Maxmem
 1802                          * so that we keep going. The first bad page
 1803                          * will terminate the loop.
 1804                          */
 1805                         if (phys_avail[pa_indx] == pa) {
 1806                                 phys_avail[pa_indx] += PAGE_SIZE;
 1807                         } else {
 1808                                 pa_indx++;
 1809                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1810                                         printf("Too many holes in the physical address space, giving up\n");
 1811                                         pa_indx--;
 1812                                         break;
 1813                                 }
 1814                                 phys_avail[pa_indx++] = pa;     /* start */
 1815                                 phys_avail[pa_indx] = pa + PAGE_SIZE;   /* end */
 1816                         }
 1817                         physmem++;
 1818                 }
 1819         }
 1820         *pte = 0;
 1821         invltlb();
 1822 
 1823         /*
 1824          * XXX
 1825          * The last chunk must contain at least one page plus the message
 1826          * buffer to avoid complicating other code (message buffer address
 1827          * calculation, etc.).
 1828          */
 1829         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1830             round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 1831                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1832                 phys_avail[pa_indx--] = 0;
 1833                 phys_avail[pa_indx--] = 0;
 1834         }
 1835 
 1836         Maxmem = atop(phys_avail[pa_indx]);
 1837 
 1838         /* Trim off space for the message buffer. */
 1839         phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 1840 
 1841         avail_end = phys_avail[pa_indx];
 1842 }
 1843 
 1844 void
 1845 init386(first)
 1846         int first;
 1847 {
 1848         struct gate_descriptor *gdp;
 1849         int gsel_tss, metadata_missing, off, x;
 1850 #ifndef SMP
 1851         /* table descriptors - used to load tables by microp */
 1852         struct region_descriptor r_gdt, r_idt;
 1853 #endif
 1854 
 1855         /*
 1856          * Prevent lowering of the ipl if we call tsleep() early.
 1857          */
 1858         safepri = cpl;
 1859 
 1860         proc0.p_addr = proc0paddr;
 1861 
 1862         atdevbase = ISA_HOLE_START + KERNBASE;
 1863 
 1864         metadata_missing = 0;
 1865         if (bootinfo.bi_modulep) {
 1866                 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 1867                 preload_bootstrap_relocate(KERNBASE);
 1868         } else {
 1869                 metadata_missing = 1;
 1870         }
 1871         if (bootinfo.bi_envp)
 1872                 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 1873 
 1874         /* Init basic tunables, hz etc */
 1875         init_param1();
 1876 
 1877         /*
 1878          * make gdt memory segments, the code segment goes up to end of the
 1879          * page with etext in it, the data segment goes to the end of
 1880          * the address space
 1881          */
 1882         /*
 1883          * XXX text protection is temporarily (?) disabled.  The limit was
 1884          * i386_btop(round_page(etext)) - 1.
 1885          */
 1886         gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 1887         gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 1888 #ifdef SMP
 1889         gdt_segs[GPRIV_SEL].ssd_limit =
 1890                 atop(sizeof(struct privatespace) - 1);
 1891         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 1892         gdt_segs[GPROC0_SEL].ssd_base =
 1893                 (int) &SMP_prvspace[0].globaldata.gd_common_tss;
 1894         SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0];
 1895 #else
 1896         gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 1897         gdt_segs[GPROC0_SEL].ssd_base = (int) &common_tss;
 1898 #endif
 1899 
 1900         for (x = 0; x < NGDT; x++) {
 1901 #ifdef BDE_DEBUGGER
 1902                 /* avoid overwriting db entries with APM ones */
 1903                 if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 1904                         continue;
 1905 #endif
 1906                 ssdtosd(&gdt_segs[x], &gdt[x].sd);
 1907         }
 1908 
 1909         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 1910         r_gdt.rd_base =  (int) gdt;
 1911         lgdt(&r_gdt);
 1912 
 1913         /* make ldt memory segments */
 1914         /*
 1915          * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 1916          * should be spelled ...MAX_USER...
 1917          */
 1918         ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 1919         ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 1920         for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 1921                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 1922 
 1923         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 1924         lldt(_default_ldt);
 1925 #ifdef USER_LDT
 1926         currentldt = _default_ldt;
 1927 #endif
 1928 
 1929         /* exceptions */
 1930         for (x = 0; x < NIDT; x++)
 1931                 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1932         setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1933         setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1934         setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1935         setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 1936         setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 1937         setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1938         setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1939         setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1940         setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 1941         setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1942         setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1943         setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1944         setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1945         setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1946         setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1947         setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1948         setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1949         setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1950         setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1951         setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1952         setidt(0x80, &IDTVEC(int0x80_syscall),
 1953                         SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 1954 
 1955         r_idt.rd_limit = sizeof(idt0) - 1;
 1956         r_idt.rd_base = (int) idt;
 1957         lidt(&r_idt);
 1958 
 1959         /*
 1960          * Initialize the console before we print anything out.
 1961          */
 1962         cninit();
 1963 
 1964         if (metadata_missing)
 1965                 printf("WARNING: loader(8) metadata is missing!\n");
 1966 
 1967 #include        "isa.h"
 1968 #if     NISA >0
 1969         isa_defaultirq();
 1970 #endif
 1971         rand_initialize();
 1972 
 1973 #ifdef DDB
 1974         kdb_init();
 1975         if (boothowto & RB_KDB)
 1976                 Debugger("Boot flags requested debugger");
 1977 #endif
 1978 
 1979         finishidentcpu();       /* Final stage of CPU initialization */
 1980         setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1981         setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1982         initializecpu();        /* Initialize CPU registers */
 1983 
 1984         /* make an initial tss so cpu can get interrupt stack on syscall! */
 1985         common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16;
 1986         common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
 1987         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 1988         private_tss = 0;
 1989         tss_gdt = &gdt[GPROC0_SEL].sd;
 1990         common_tssd = *tss_gdt;
 1991         common_tss.tss_ioopt = (sizeof common_tss) << 16;
 1992         ltr(gsel_tss);
 1993 
 1994         dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 1995             dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
 1996         dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 1997             dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 1998 #ifdef PAE
 1999         dblfault_tss.tss_cr3 = (int)IdlePDPT - KERNBASE;
 2000 #else
 2001         dblfault_tss.tss_cr3 = (int)IdlePTD;
 2002 #endif
 2003         dblfault_tss.tss_eip = (int) dblfault_handler;
 2004         dblfault_tss.tss_eflags = PSL_KERNEL;
 2005         dblfault_tss.tss_ds = dblfault_tss.tss_es =
 2006             dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 2007         dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 2008         dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 2009         dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 2010 
 2011         vm86_initialize();
 2012         getmemsize(first);
 2013         pmap_bootstrap(first, 0);
 2014         init_param2(physmem);
 2015 
 2016         /* now running on new page tables, configured,and u/iom is accessible */
 2017 
 2018         /* Map the message buffer. */
 2019         for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 2020                 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 2021 
 2022         msgbufinit(msgbufp, MSGBUF_SIZE);
 2023 
 2024         /* make a call gate to reenter kernel with */
 2025         gdp = &ldt[LSYS5CALLS_SEL].gd;
 2026 
 2027         x = (int) &IDTVEC(syscall);
 2028         gdp->gd_looffset = x++;
 2029         gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 2030         gdp->gd_stkcpy = 1;
 2031         gdp->gd_type = SDT_SYS386CGT;
 2032         gdp->gd_dpl = SEL_UPL;
 2033         gdp->gd_p = 1;
 2034         gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
 2035 
 2036         /* XXX does this work? */
 2037         ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2038         ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 2039 
 2040         /* transfer to user mode */
 2041 
 2042         _ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 2043         _udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 2044 
 2045         /* setup proc 0's pcb */
 2046         proc0.p_addr->u_pcb.pcb_flags = 0;
 2047 #ifdef PAE
 2048         proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePDPT - KERNBASE;
 2049 #else
 2050         proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 2051 #endif
 2052 #ifdef SMP
 2053         proc0.p_addr->u_pcb.pcb_mpnest = 1;
 2054 #endif
 2055         proc0.p_addr->u_pcb.pcb_ext = 0;
 2056         proc0.p_md.md_regs = &proc0_tf;
 2057 }
 2058 
 2059 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2060 static void f00f_hack(void *unused);
 2061 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 2062 
 2063 static void
 2064 f00f_hack(void *unused) {
 2065         struct gate_descriptor *new_idt;
 2066 #ifndef SMP
 2067         struct region_descriptor r_idt;
 2068 #endif
 2069         vm_offset_t tmp;
 2070 
 2071         if (!has_f00f_bug)
 2072                 return;
 2073 
 2074         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 2075 
 2076         r_idt.rd_limit = sizeof(idt0) - 1;
 2077 
 2078         tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 2079         if (tmp == 0)
 2080                 panic("kmem_alloc returned 0");
 2081         if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 2082                 panic("kmem_alloc returned non-page-aligned memory");
 2083         /* Put the first seven entries in the lower page */
 2084         new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 2085         bcopy(idt, new_idt, sizeof(idt0));
 2086         r_idt.rd_base = (int)new_idt;
 2087         lidt(&r_idt);
 2088         idt = new_idt;
 2089         if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 2090                            VM_PROT_READ, FALSE) != KERN_SUCCESS)
 2091                 panic("vm_map_protect failed");
 2092         return;
 2093 }
 2094 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 2095 
 2096 int
 2097 ptrace_set_pc(p, addr)
 2098         struct proc *p;
 2099         unsigned long addr;
 2100 {
 2101         p->p_md.md_regs->tf_eip = addr;
 2102         return (0);
 2103 }
 2104 
 2105 int
 2106 ptrace_single_step(p)
 2107         struct proc *p;
 2108 {
 2109         p->p_md.md_regs->tf_eflags |= PSL_T;
 2110         return (0);
 2111 }
 2112 
 2113 int ptrace_read_u_check(p, addr, len)
 2114         struct proc *p;
 2115         vm_offset_t addr;
 2116         size_t len;
 2117 {
 2118         vm_offset_t gap;
 2119 
 2120         if ((vm_offset_t) (addr + len) < addr)
 2121                 return EPERM;
 2122         if ((vm_offset_t) (addr + len) <= sizeof(struct user))
 2123                 return 0;
 2124 
 2125         gap = (char *) p->p_md.md_regs - (char *) p->p_addr;
 2126         
 2127         if ((vm_offset_t) addr < gap)
 2128                 return EPERM;
 2129         if ((vm_offset_t) (addr + len) <= 
 2130             (vm_offset_t) (gap + sizeof(struct trapframe)))
 2131                 return 0;
 2132         return EPERM;
 2133 }
 2134 
 2135 int ptrace_write_u(p, off, data)
 2136         struct proc *p;
 2137         vm_offset_t off;
 2138         long data;
 2139 {
 2140         struct trapframe frame_copy;
 2141         vm_offset_t min;
 2142         struct trapframe *tp;
 2143 
 2144         /*
 2145          * Privileged kernel state is scattered all over the user area.
 2146          * Only allow write access to parts of regs and to fpregs.
 2147          */
 2148         min = (char *)p->p_md.md_regs - (char *)p->p_addr;
 2149         if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 2150                 tp = p->p_md.md_regs;
 2151                 frame_copy = *tp;
 2152                 *(int *)((char *)&frame_copy + (off - min)) = data;
 2153                 if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 2154                     !CS_SECURE(frame_copy.tf_cs))
 2155                         return (EINVAL);
 2156                 *(int*)((char *)p->p_addr + off) = data;
 2157                 return (0);
 2158         }
 2159         min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
 2160         if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
 2161                 *(int*)((char *)p->p_addr + off) = data;
 2162                 return (0);
 2163         }
 2164         return (EFAULT);
 2165 }
 2166 
 2167 int
 2168 fill_regs(p, regs)
 2169         struct proc *p;
 2170         struct reg *regs;
 2171 {
 2172         struct pcb *pcb;
 2173         struct trapframe *tp;
 2174 
 2175         tp = p->p_md.md_regs;
 2176         regs->r_fs = tp->tf_fs;
 2177         regs->r_es = tp->tf_es;
 2178         regs->r_ds = tp->tf_ds;
 2179         regs->r_edi = tp->tf_edi;
 2180         regs->r_esi = tp->tf_esi;
 2181         regs->r_ebp = tp->tf_ebp;
 2182         regs->r_ebx = tp->tf_ebx;
 2183         regs->r_edx = tp->tf_edx;
 2184         regs->r_ecx = tp->tf_ecx;
 2185         regs->r_eax = tp->tf_eax;
 2186         regs->r_eip = tp->tf_eip;
 2187         regs->r_cs = tp->tf_cs;
 2188         regs->r_eflags = tp->tf_eflags;
 2189         regs->r_esp = tp->tf_esp;
 2190         regs->r_ss = tp->tf_ss;
 2191         pcb = &p->p_addr->u_pcb;
 2192         regs->r_gs = pcb->pcb_gs;
 2193         return (0);
 2194 }
 2195 
 2196 int
 2197 set_regs(p, regs)
 2198         struct proc *p;
 2199         struct reg *regs;
 2200 {
 2201         struct pcb *pcb;
 2202         struct trapframe *tp;
 2203 
 2204         tp = p->p_md.md_regs;
 2205         if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 2206             !CS_SECURE(regs->r_cs))
 2207                 return (EINVAL);
 2208         tp->tf_fs = regs->r_fs;
 2209         tp->tf_es = regs->r_es;
 2210         tp->tf_ds = regs->r_ds;
 2211         tp->tf_edi = regs->r_edi;
 2212         tp->tf_esi = regs->r_esi;
 2213         tp->tf_ebp = regs->r_ebp;
 2214         tp->tf_ebx = regs->r_ebx;
 2215         tp->tf_edx = regs->r_edx;
 2216         tp->tf_ecx = regs->r_ecx;
 2217         tp->tf_eax = regs->r_eax;
 2218         tp->tf_eip = regs->r_eip;
 2219         tp->tf_cs = regs->r_cs;
 2220         tp->tf_eflags = regs->r_eflags;
 2221         tp->tf_esp = regs->r_esp;
 2222         tp->tf_ss = regs->r_ss;
 2223         pcb = &p->p_addr->u_pcb;
 2224         pcb->pcb_gs = regs->r_gs;
 2225         return (0);
 2226 }
 2227 
 2228 #ifdef CPU_ENABLE_SSE
 2229 static void
 2230 fill_fpregs_xmm(sv_xmm, sv_87)
 2231         struct savexmm *sv_xmm;
 2232         struct save87 *sv_87;
 2233 {
 2234         register struct env87 *penv_87 = &sv_87->sv_env;
 2235         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2236         int i;
 2237 
 2238         /* FPU control/status */
 2239         penv_87->en_cw = penv_xmm->en_cw;
 2240         penv_87->en_sw = penv_xmm->en_sw;
 2241         penv_87->en_tw = penv_xmm->en_tw;
 2242         penv_87->en_fip = penv_xmm->en_fip;
 2243         penv_87->en_fcs = penv_xmm->en_fcs;
 2244         penv_87->en_opcode = penv_xmm->en_opcode;
 2245         penv_87->en_foo = penv_xmm->en_foo;
 2246         penv_87->en_fos = penv_xmm->en_fos;
 2247 
 2248         /* FPU registers */
 2249         for (i = 0; i < 8; ++i)
 2250                 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 2251 
 2252         sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
 2253 }
 2254 
 2255 static void
 2256 set_fpregs_xmm(sv_87, sv_xmm)
 2257         struct save87 *sv_87;
 2258         struct savexmm *sv_xmm;
 2259 {
 2260         register struct env87 *penv_87 = &sv_87->sv_env;
 2261         register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2262         int i;
 2263 
 2264         /* FPU control/status */
 2265         penv_xmm->en_cw = penv_87->en_cw;
 2266         penv_xmm->en_sw = penv_87->en_sw;
 2267         penv_xmm->en_tw = penv_87->en_tw;
 2268         penv_xmm->en_fip = penv_87->en_fip;
 2269         penv_xmm->en_fcs = penv_87->en_fcs;
 2270         penv_xmm->en_opcode = penv_87->en_opcode;
 2271         penv_xmm->en_foo = penv_87->en_foo;
 2272         penv_xmm->en_fos = penv_87->en_fos;
 2273 
 2274         /* FPU registers */
 2275         for (i = 0; i < 8; ++i)
 2276                 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 2277 
 2278         sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
 2279 }
 2280 #endif /* CPU_ENABLE_SSE */
 2281 
 2282 int
 2283 fill_fpregs(p, fpregs)
 2284         struct proc *p;
 2285         struct fpreg *fpregs;
 2286 {
 2287 #ifdef CPU_ENABLE_SSE
 2288         if (cpu_fxsr) {
 2289                 fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
 2290                                                 (struct save87 *)fpregs);
 2291                 return (0);
 2292         }
 2293 #endif /* CPU_ENABLE_SSE */
 2294         bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
 2295         return (0);
 2296 }
 2297 
 2298 int
 2299 set_fpregs(p, fpregs)
 2300         struct proc *p;
 2301         struct fpreg *fpregs;
 2302 {
 2303 #ifdef CPU_ENABLE_SSE
 2304         if (cpu_fxsr) {
 2305                 set_fpregs_xmm((struct save87 *)fpregs,
 2306                                            &p->p_addr->u_pcb.pcb_save.sv_xmm);
 2307                 return (0);
 2308         }
 2309 #endif /* CPU_ENABLE_SSE */
 2310         bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
 2311         return (0);
 2312 }
 2313 
 2314 int
 2315 fill_dbregs(p, dbregs)
 2316         struct proc *p;
 2317         struct dbreg *dbregs;
 2318 {
 2319         struct pcb *pcb;
 2320 
 2321         if (p == NULL) {
 2322                 dbregs->dr0 = rdr0();
 2323                 dbregs->dr1 = rdr1();
 2324                 dbregs->dr2 = rdr2();
 2325                 dbregs->dr3 = rdr3();
 2326                 dbregs->dr4 = rdr4();
 2327                 dbregs->dr5 = rdr5();
 2328                 dbregs->dr6 = rdr6();
 2329                 dbregs->dr7 = rdr7();
 2330         }
 2331         else {
 2332                 pcb = &p->p_addr->u_pcb;
 2333                 dbregs->dr0 = pcb->pcb_dr0;
 2334                 dbregs->dr1 = pcb->pcb_dr1;
 2335                 dbregs->dr2 = pcb->pcb_dr2;
 2336                 dbregs->dr3 = pcb->pcb_dr3;
 2337                 dbregs->dr4 = 0;
 2338                 dbregs->dr5 = 0;
 2339                 dbregs->dr6 = pcb->pcb_dr6;
 2340                 dbregs->dr7 = pcb->pcb_dr7;
 2341         }
 2342         return (0);
 2343 }
 2344 
 2345 int
 2346 set_dbregs(p, dbregs)
 2347         struct proc *p;
 2348         struct dbreg *dbregs;
 2349 {
 2350         struct pcb *pcb;
 2351         int i;
 2352         u_int32_t mask1, mask2;
 2353 
 2354         if (p == NULL) {
 2355                 load_dr0(dbregs->dr0);
 2356                 load_dr1(dbregs->dr1);
 2357                 load_dr2(dbregs->dr2);
 2358                 load_dr3(dbregs->dr3);
 2359                 load_dr4(dbregs->dr4);
 2360                 load_dr5(dbregs->dr5);
 2361                 load_dr6(dbregs->dr6);
 2362                 load_dr7(dbregs->dr7);
 2363         }
 2364         else {
 2365                 /*
 2366                  * Don't let an illegal value for dr7 get set.  Specifically,
 2367                  * check for undefined settings.  Setting these bit patterns
 2368                  * result in undefined behaviour and can lead to an unexpected
 2369                  * TRCTRAP.
 2370                  */
 2371                 for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 2372                      i++, mask1 <<= 2, mask2 <<= 2)
 2373                         if ((dbregs->dr7 & mask1) == mask2)
 2374                                 return (EINVAL);
 2375                 
 2376                 pcb = &p->p_addr->u_pcb;
 2377                 
 2378                 /*
 2379                  * Don't let a process set a breakpoint that is not within the
 2380                  * process's address space.  If a process could do this, it
 2381                  * could halt the system by setting a breakpoint in the kernel
 2382                  * (if ddb was enabled).  Thus, we need to check to make sure
 2383                  * that no breakpoints are being enabled for addresses outside
 2384                  * process's address space, unless, perhaps, we were called by
 2385                  * uid 0.
 2386                  *
 2387                  * XXX - what about when the watched area of the user's
 2388                  * address space is written into from within the kernel
 2389                  * ... wouldn't that still cause a breakpoint to be generated
 2390                  * from within kernel mode?
 2391                  */
 2392                 
 2393                 if (suser(p) != 0) {
 2394                         if (dbregs->dr7 & 0x3) {
 2395                                 /* dr0 is enabled */
 2396                                 if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 2397                                         return (EINVAL);
 2398                         }
 2399                         
 2400                         if (dbregs->dr7 & (0x3<<2)) {
 2401                                 /* dr1 is enabled */
 2402                                 if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 2403                                         return (EINVAL);
 2404                         }
 2405                         
 2406                         if (dbregs->dr7 & (0x3<<4)) {
 2407                                 /* dr2 is enabled */
 2408                                 if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
 2409                                         return (EINVAL);
 2410                         }
 2411                         
 2412                         if (dbregs->dr7 & (0x3<<6)) {
 2413                                 /* dr3 is enabled */
 2414                                 if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 2415                                         return (EINVAL);
 2416                         }
 2417                 }
 2418                 
 2419                 pcb->pcb_dr0 = dbregs->dr0;
 2420                 pcb->pcb_dr1 = dbregs->dr1;
 2421                 pcb->pcb_dr2 = dbregs->dr2;
 2422                 pcb->pcb_dr3 = dbregs->dr3;
 2423                 pcb->pcb_dr6 = dbregs->dr6;
 2424                 pcb->pcb_dr7 = dbregs->dr7;
 2425                 
 2426                 pcb->pcb_flags |= PCB_DBREGS;
 2427         }
 2428 
 2429         return (0);
 2430 }
 2431 
 2432 /*
 2433  * Return > 0 if a hardware breakpoint has been hit, and the
 2434  * breakpoint was in user space.  Return 0, otherwise.
 2435  */
 2436 int
 2437 user_dbreg_trap(void)
 2438 {
 2439         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
 2440         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
 2441         int nbp;            /* number of breakpoints that triggered */
 2442         caddr_t addr[4];    /* breakpoint addresses */
 2443         int i;
 2444         
 2445         dr7 = rdr7();
 2446         if ((dr7 & 0x000000ff) == 0) {
 2447                 /*
 2448                  * all GE and LE bits in the dr7 register are zero,
 2449                  * thus the trap couldn't have been caused by the
 2450                  * hardware debug registers
 2451                  */
 2452                 return 0;
 2453         }
 2454 
 2455         nbp = 0;
 2456         dr6 = rdr6();
 2457         bp = dr6 & 0x0000000f;
 2458 
 2459         if (!bp) {
 2460                 /*
 2461                  * None of the breakpoint bits are set meaning this
 2462                  * trap was not caused by any of the debug registers
 2463                  */
 2464                 return 0;
 2465         }
 2466 
 2467         /*
 2468          * at least one of the breakpoints were hit, check to see
 2469          * which ones and if any of them are user space addresses
 2470          */
 2471 
 2472         if (bp & 0x01) {
 2473                 addr[nbp++] = (caddr_t)rdr0();
 2474         }
 2475         if (bp & 0x02) {
 2476                 addr[nbp++] = (caddr_t)rdr1();
 2477         }
 2478         if (bp & 0x04) {
 2479                 addr[nbp++] = (caddr_t)rdr2();
 2480         }
 2481         if (bp & 0x08) {
 2482                 addr[nbp++] = (caddr_t)rdr3();
 2483         }
 2484 
 2485         for (i=0; i<nbp; i++) {
 2486                 if (addr[i] <
 2487                     (caddr_t)VM_MAXUSER_ADDRESS) {
 2488                         /*
 2489                          * addr[i] is in user space
 2490                          */
 2491                         return nbp;
 2492                 }
 2493         }
 2494 
 2495         /*
 2496          * None of the breakpoints are in user space.
 2497          */
 2498         return 0;
 2499 }
 2500 
 2501 
 2502 #ifndef DDB
 2503 void
 2504 Debugger(const char *msg)
 2505 {
 2506         printf("Debugger(\"%s\") called.\n", msg);
 2507 }
 2508 #endif /* no DDB */
 2509 
 2510 #include <sys/disklabel.h>
 2511 
 2512 /*
 2513  * Determine the size of the transfer, and make sure it is
 2514  * within the boundaries of the partition. Adjust transfer
 2515  * if needed, and signal errors or early completion.
 2516  */
 2517 int
 2518 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
 2519 {
 2520         struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
 2521         int labelsect = lp->d_partitions[0].p_offset;
 2522         int maxsz = p->p_size,
 2523                 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 2524 
 2525         /* overwriting disk label ? */
 2526         /* XXX should also protect bootstrap in first 8K */
 2527         if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 2528 #if LABELSECTOR != 0
 2529             bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 2530 #endif
 2531             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
 2532                 bp->b_error = EROFS;
 2533                 goto bad;
 2534         }
 2535 
 2536 #if     defined(DOSBBSECTOR) && defined(notyet)
 2537         /* overwriting master boot record? */
 2538         if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
 2539             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
 2540                 bp->b_error = EROFS;
 2541                 goto bad;
 2542         }
 2543 #endif
 2544 
 2545         /* beyond partition? */
 2546         if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
 2547                 /* if exactly at end of disk, return an EOF */
 2548                 if (bp->b_blkno == maxsz) {
 2549                         bp->b_resid = bp->b_bcount;
 2550                         return(0);
 2551                 }
 2552                 /* or truncate if part of it fits */
 2553                 sz = maxsz - bp->b_blkno;
 2554                 if (sz <= 0) {
 2555                         bp->b_error = EINVAL;
 2556                         goto bad;
 2557                 }
 2558                 bp->b_bcount = sz << DEV_BSHIFT;
 2559         }
 2560 
 2561         bp->b_pblkno = bp->b_blkno + p->p_offset;
 2562         return(1);
 2563 
 2564 bad:
 2565         bp->b_flags |= B_ERROR;
 2566         return(-1);
 2567 }
 2568 
 2569 #ifdef DDB
 2570 
 2571 /*
 2572  * Provide inb() and outb() as functions.  They are normally only
 2573  * available as macros calling inlined functions, thus cannot be
 2574  * called inside DDB.
 2575  *
 2576  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
 2577  */
 2578 
 2579 #undef inb
 2580 #undef outb
 2581 
 2582 /* silence compiler warnings */
 2583 u_char inb(u_int);
 2584 void outb(u_int, u_char);
 2585 
 2586 u_char
 2587 inb(u_int port)
 2588 {
 2589         u_char  data;
 2590         /*
 2591          * We use %%dx and not %1 here because i/o is done at %dx and not at
 2592          * %edx, while gcc generates inferior code (movw instead of movl)
 2593          * if we tell it to load (u_short) port.
 2594          */
 2595         __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 2596         return (data);
 2597 }
 2598 
 2599 void
 2600 outb(u_int port, u_char data)
 2601 {
 2602         u_char  al;
 2603         /*
 2604          * Use an unnecessary assignment to help gcc's register allocator.
 2605          * This make a large difference for gcc-1.40 and a tiny difference
 2606          * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 2607          * best results.  gcc-2.6.0 can't handle this.
 2608          */
 2609         al = data;
 2610         __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 2611 }
 2612 
 2613 #endif /* DDB */

Cache object: f75339b86403efe4c805413a9a202a2b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.