The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1992 Terrence R. Lambert.
    3  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   38  * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.209.2.26 1999/09/05 08:11:10 peter Exp $
   39  */
   40 
   41 #include "npx.h"
   42 #include "opt_bounce.h"
   43 #include "opt_cpu.h"
   44 #include "opt_ddb.h"
   45 #include "opt_machdep.h"
   46 #include "opt_msgbuf.h"
   47 #include "opt_perfmon.h"
   48 #include "opt_sysvipc.h"
   49 #include "opt_userconfig.h"
   50 
   51 #include <sys/param.h>
   52 #include <sys/systm.h>
   53 #include <sys/sysproto.h>
   54 #include <sys/signalvar.h>
   55 #include <sys/kernel.h>
   56 #include <sys/proc.h>
   57 #include <sys/buf.h>
   58 #include <sys/reboot.h>
   59 #include <sys/conf.h>
   60 #include <sys/file.h>
   61 #include <sys/callout.h>
   62 #include <sys/malloc.h>
   63 #include <sys/mbuf.h>
   64 #include <sys/mount.h>
   65 #include <sys/msgbuf.h>
   66 #include <sys/ioctl.h>
   67 #include <sys/sysent.h>
   68 #include <sys/tty.h>
   69 #include <sys/sysctl.h>
   70 #include <sys/vmmeter.h>
   71 
   72 #ifdef SYSVSHM
   73 #include <sys/shm.h>
   74 #endif
   75 
   76 #ifdef SYSVMSG
   77 #include <sys/msg.h>
   78 #endif
   79 
   80 #ifdef SYSVSEM
   81 #include <sys/sem.h>
   82 #endif
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_param.h>
   86 #include <vm/vm_prot.h>
   87 #include <vm/lock.h>
   88 #include <vm/vm_kern.h>
   89 #include <vm/vm_object.h>
   90 #include <vm/vm_page.h>
   91 #include <vm/vm_map.h>
   92 #include <vm/vm_pager.h>
   93 #include <vm/vm_extern.h>
   94 
   95 #include <sys/user.h>
   96 #include <sys/exec.h>
   97 #include <sys/vnode.h>
   98 
   99 #include <ddb/ddb.h>
  100 
  101 #include <net/netisr.h>
  102 
  103 #include <machine/cpu.h>
  104 #include <machine/npx.h>
  105 #include <machine/reg.h>
  106 #include <machine/psl.h>
  107 #include <machine/clock.h>
  108 #include <machine/specialreg.h>
  109 #include <machine/sysarch.h>
  110 #include <machine/cons.h>
  111 #include <machine/bootinfo.h>
  112 #include <machine/md_var.h>
  113 #ifdef PERFMON
  114 #include <machine/perfmon.h>
  115 #endif
  116 
  117 #include <i386/isa/isa_device.h>
  118 #include <i386/isa/rtc.h>
  119 #include <machine/random.h>
  120 
  121 extern void init386 __P((int first));
  122 extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
  123 extern int ptrace_single_step __P((struct proc *p));
  124 extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
  125 extern void dblfault_handler __P((void));
  126 
  127 extern void printcpuinfo(void); /* XXX header file */
  128 extern void earlysetcpuclass(void);     /* same header file */
  129 extern void finishidentcpu(void);
  130 extern void panicifcpuunsupported(void);
  131 extern void initializecpu(void);
  132 
  133 static void cpu_startup __P((void *));
  134 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
  135 
  136 
  137 #ifdef BOUNCE_BUFFERS
  138 extern char *bouncememory;
  139 extern int maxbkva;
  140 #ifdef BOUNCEPAGES
  141 int     bouncepages = BOUNCEPAGES;
  142 #else
  143 int     bouncepages = 0;
  144 #endif
  145 #endif  /* BOUNCE_BUFFERS */
  146 
  147 extern int freebufspace;
  148 int _udatasel, _ucodesel;
  149 u_int   atdevbase;
  150 
  151 int physmem = 0;
  152 int cold = 1;
  153 
  154 static int
  155 sysctl_hw_physmem SYSCTL_HANDLER_ARGS
  156 {
  157         int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
  158         return (error);
  159 }
  160 
  161 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
  162         0, 0, sysctl_hw_physmem, "I", "");
  163 
  164 static int
  165 sysctl_hw_usermem SYSCTL_HANDLER_ARGS
  166 {
  167         int error = sysctl_handle_int(oidp, 0,
  168                 ctob(physmem - cnt.v_wire_count), req);
  169         return (error);
  170 }
  171 
  172 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
  173         0, 0, sysctl_hw_usermem, "I", "");
  174 
  175 int boothowto = 0, bootverbose = 0, Maxmem = 0;
  176 long dumplo;
  177 extern int bootdev;
  178 
  179 vm_offset_t phys_avail[10];
  180 
  181 /* must be 2 less so 0 0 can signal end of chunks */
  182 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
  183 
  184 static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
  185 
  186 static vm_offset_t buffer_sva, buffer_eva;
  187 vm_offset_t clean_sva, clean_eva;
  188 static vm_offset_t pager_sva, pager_eva;
  189 extern struct linker_set netisr_set;
  190 
  191 #define offsetof(type, member)  ((size_t)(&((type *)0)->member))
  192 
  193 static void
  194 cpu_startup(dummy)
  195         void *dummy;
  196 {
  197         register unsigned i;
  198         register caddr_t v;
  199         vm_offset_t maxaddr;
  200         vm_size_t size = 0;
  201         int firstaddr;
  202         vm_offset_t minaddr;
  203 
  204         if (boothowto & RB_VERBOSE)
  205                 bootverbose++;
  206 
  207         /*
  208          * Good {morning,afternoon,evening,night}.
  209          */
  210         printf(version);
  211         earlysetcpuclass();
  212         startrtclock();
  213         printcpuinfo();
  214         panicifcpuunsupported();
  215 #ifdef PERFMON
  216         perfmon_init();
  217 #endif
  218         printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
  219         /*
  220          * Display any holes after the first chunk of extended memory.
  221          */
  222         if (bootverbose) {
  223                 int indx;
  224 
  225                 printf("Physical memory chunk(s):\n");
  226                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  227                         int size = phys_avail[indx + 1] - phys_avail[indx];
  228 
  229                         printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
  230                             phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
  231                 }
  232         }
  233 
  234         /*
  235          * Quickly wire in netisrs.
  236          */
  237         setup_netisrs(&netisr_set);
  238 
  239         /*
  240          * Allocate space for system data structures.
  241          * The first available kernel virtual address is in "v".
  242          * As pages of kernel virtual memory are allocated, "v" is incremented.
  243          * As pages of memory are allocated and cleared,
  244          * "firstaddr" is incremented.
  245          * An index into the kernel page table corresponding to the
  246          * virtual memory address maintained in "v" is kept in "mapaddr".
  247          */
  248 
  249         /*
  250          * Make two passes.  The first pass calculates how much memory is
  251          * needed and allocates it.  The second pass assigns virtual
  252          * addresses to the various data structures.
  253          */
  254         firstaddr = 0;
  255 again:
  256         v = (caddr_t)firstaddr;
  257 
  258 #define valloc(name, type, num) \
  259             (name) = (type *)v; v = (caddr_t)((name)+(num))
  260 #define valloclim(name, type, num, lim) \
  261             (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
  262         valloc(callout, struct callout, ncallout);
  263 #ifdef SYSVSHM
  264         valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
  265 #endif
  266 #ifdef SYSVSEM
  267         valloc(sema, struct semid_ds, seminfo.semmni);
  268         valloc(sem, struct sem, seminfo.semmns);
  269         /* This is pretty disgusting! */
  270         valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
  271 #endif
  272 #ifdef SYSVMSG
  273         valloc(msgpool, char, msginfo.msgmax);
  274         valloc(msgmaps, struct msgmap, msginfo.msgseg);
  275         valloc(msghdrs, struct msg, msginfo.msgtql);
  276         valloc(msqids, struct msqid_ds, msginfo.msgmni);
  277 #endif
  278 
  279         if (nbuf == 0) {
  280                 nbuf = 30;
  281                 if( physmem > 1024)
  282                         nbuf += min((physmem - 1024) / 8, 2048);
  283         }
  284         nswbuf = max(min(nbuf/4, 128), 16);
  285 
  286         valloc(swbuf, struct buf, nswbuf);
  287         valloc(buf, struct buf, nbuf);
  288 
  289 #ifdef BOUNCE_BUFFERS
  290         /*
  291          * If there is more than 16MB of memory, allocate some bounce buffers
  292          */
  293         if (Maxmem > 4096) {
  294                 if (bouncepages == 0) {
  295                         bouncepages = 64;
  296                 }
  297                 v = (caddr_t)((vm_offset_t)round_page(v));
  298                 valloc(bouncememory, char, bouncepages * PAGE_SIZE);
  299         }
  300 #endif
  301 
  302         /*
  303          * End of first pass, size has been calculated so allocate memory
  304          */
  305         if (firstaddr == 0) {
  306                 size = (vm_size_t)(v - firstaddr);
  307                 firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
  308                 if (firstaddr == 0)
  309                         panic("startup: no room for tables");
  310                 goto again;
  311         }
  312 
  313         /*
  314          * End of second pass, addresses have been assigned
  315          */
  316         if ((vm_size_t)(v - firstaddr) != size)
  317                 panic("startup: table size inconsistency");
  318 
  319 #ifdef BOUNCE_BUFFERS
  320         clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
  321                         (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) +
  322                                 maxbkva + pager_map_size, TRUE);
  323         io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
  324 #else
  325         clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
  326                         (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
  327 #endif
  328         buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
  329                                 (nbuf*BKVASIZE), TRUE);
  330         pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
  331                                 (nswbuf*MAXPHYS) + pager_map_size, TRUE);
  332         exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
  333                                 (16*ARG_MAX), TRUE);
  334         u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
  335                                 (maxproc*UPAGES*PAGE_SIZE), FALSE);
  336 
  337         /*
  338          * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
  339          * we use the more space efficient malloc in place of kmem_alloc.
  340          */
  341         {
  342                 vm_offset_t mb_map_size;
  343 
  344                 mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES;
  345                 mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
  346                 mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT);
  347                 bzero(mclrefcnt, mb_map_size / MCLBYTES);
  348                 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
  349                         mb_map_size, FALSE);
  350         }
  351 
  352         /*
  353          * Initialize callouts
  354          */
  355         callfree = callout;
  356         for (i = 1; i < ncallout; i++)
  357                 callout[i-1].c_next = &callout[i];
  358 
  359 #if defined(USERCONFIG)
  360 #if defined(USERCONFIG_BOOT)
  361         if (1) {
  362 #else
  363         if (boothowto & RB_CONFIG) {
  364 #endif
  365                 userconfig();
  366                 cninit();       /* the preferred console may have changed */
  367         }
  368 #endif
  369 
  370 #ifdef BOUNCE_BUFFERS
  371         /*
  372          * init bounce buffers
  373          */
  374         vm_bounce_init();
  375 #endif
  376 
  377         printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
  378             ptoa(cnt.v_free_count) / 1024);
  379 
  380         /*
  381          * Set up buffers, so they can be used to read disk labels.
  382          */
  383         bufinit();
  384         vm_pager_bufferinit();
  385 }
  386 
  387 int
  388 register_netisr(num, handler)
  389         int num;
  390         netisr_t *handler;
  391 {
  392         
  393         if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
  394                 printf("register_netisr: bad isr number: %d\n", num);
  395                 return (EINVAL);
  396         }
  397         netisrs[num] = handler;
  398         return (0);
  399 }
  400 
  401 static void
  402 setup_netisrs(ls)
  403         struct linker_set *ls;
  404 {
  405         int i;
  406         const struct netisrtab *nit;
  407 
  408         for(i = 0; ls->ls_items[i]; i++) {
  409                 nit = (const struct netisrtab *)ls->ls_items[i];
  410                 register_netisr(nit->nit_num, nit->nit_isr);
  411         }
  412 }
  413 
  414 
  415 /*
  416  * Send an interrupt to process.
  417  *
  418  * Stack is set up to allow sigcode stored
  419  * at top to call routine, followed by kcall
  420  * to sigreturn routine below.  After sigreturn
  421  * resets the signal mask, the stack, and the
  422  * frame pointer, it returns to the user
  423  * specified pc, psl.
  424  */
  425 void
  426 sendsig(catcher, sig, mask, code)
  427         sig_t catcher;
  428         int sig, mask;
  429         u_long code;
  430 {
  431         register struct proc *p = curproc;
  432         register int *regs;
  433         register struct sigframe *fp;
  434         struct sigframe sf;
  435         struct sigacts *psp = p->p_sigacts;
  436         int oonstack;
  437 
  438         regs = p->p_md.md_regs;
  439         oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
  440         /*
  441          * Allocate and validate space for the signal handler context.
  442          */
  443         if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
  444             (psp->ps_sigonstack & sigmask(sig))) {
  445                 fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
  446                     psp->ps_sigstk.ss_size - sizeof(struct sigframe));
  447                 psp->ps_sigstk.ss_flags |= SS_ONSTACK;
  448         } else {
  449                 fp = (struct sigframe *)regs[tESP] - 1;
  450         }
  451 
  452         /*
  453          * grow() will return FALSE if the fp will not fit inside the stack
  454          *      and the stack can not be grown. useracc will return FALSE
  455          *      if access is denied.
  456          */
  457         if ((grow(p, (int)fp) == FALSE) ||
  458             (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
  459                 /*
  460                  * Process has trashed its stack; give it an illegal
  461                  * instruction to halt it in its tracks.
  462                  */
  463                 SIGACTION(p, SIGILL) = SIG_DFL;
  464                 sig = sigmask(SIGILL);
  465                 p->p_sigignore &= ~sig;
  466                 p->p_sigcatch &= ~sig;
  467                 p->p_sigmask &= ~sig;
  468                 psignal(p, SIGILL);
  469                 return;
  470         }
  471 
  472         /*
  473          * Build the argument list for the signal handler.
  474          */
  475         if (p->p_sysent->sv_sigtbl) {
  476                 if (sig < p->p_sysent->sv_sigsize)
  477                         sig = p->p_sysent->sv_sigtbl[sig];
  478                 else
  479                         sig = p->p_sysent->sv_sigsize + 1;
  480         }
  481         sf.sf_signum = sig;
  482         sf.sf_code = code;
  483         sf.sf_scp = &fp->sf_sc;
  484         sf.sf_addr = (char *) regs[tERR];
  485         sf.sf_handler = catcher;
  486 
  487         /* save scratch registers */
  488         sf.sf_sc.sc_eax = regs[tEAX];
  489         sf.sf_sc.sc_ebx = regs[tEBX];
  490         sf.sf_sc.sc_ecx = regs[tECX];
  491         sf.sf_sc.sc_edx = regs[tEDX];
  492         sf.sf_sc.sc_esi = regs[tESI];
  493         sf.sf_sc.sc_edi = regs[tEDI];
  494         sf.sf_sc.sc_cs = regs[tCS];
  495         sf.sf_sc.sc_ds = regs[tDS];
  496         sf.sf_sc.sc_ss = regs[tSS];
  497         sf.sf_sc.sc_es = regs[tES];
  498         sf.sf_sc.sc_isp = regs[tISP];
  499 
  500         /*
  501          * Build the signal context to be used by sigreturn.
  502          */
  503         sf.sf_sc.sc_onstack = oonstack;
  504         sf.sf_sc.sc_mask = mask;
  505         sf.sf_sc.sc_sp = regs[tESP];
  506         sf.sf_sc.sc_fp = regs[tEBP];
  507         sf.sf_sc.sc_pc = regs[tEIP];
  508         sf.sf_sc.sc_ps = regs[tEFLAGS];
  509 
  510         /*
  511          * Copy the sigframe out to the user's stack.
  512          */
  513         if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
  514                 /*
  515                  * Something is wrong with the stack pointer.
  516                  * ...Kill the process.
  517                  */
  518                 sigexit(p, SIGILL);
  519         };
  520 
  521         regs[tESP] = (int)fp;
  522         regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
  523         regs[tEFLAGS] &= ~PSL_VM;
  524         regs[tCS] = _ucodesel;
  525         regs[tDS] = _udatasel;
  526         regs[tES] = _udatasel;
  527         regs[tSS] = _udatasel;
  528 }
  529 
  530 /*
  531  * System call to cleanup state after a signal
  532  * has been taken.  Reset signal mask and
  533  * stack state from context left by sendsig (above).
  534  * Return to previous pc and psl as specified by
  535  * context left by sendsig. Check carefully to
  536  * make sure that the user has not modified the
  537  * state to gain improper privileges.
  538  */
  539 int
  540 sigreturn(p, uap, retval)
  541         struct proc *p;
  542         struct sigreturn_args /* {
  543                 struct sigcontext *sigcntxp;
  544         } */ *uap;
  545         int *retval;
  546 {
  547         register struct sigcontext *scp;
  548         register struct sigframe *fp;
  549         register int *regs = p->p_md.md_regs;
  550         int eflags;
  551 
  552         /*
  553          * (XXX old comment) regs[tESP] points to the return address.
  554          * The user scp pointer is above that.
  555          * The return address is faked in the signal trampoline code
  556          * for consistency.
  557          */
  558         scp = uap->sigcntxp;
  559         fp = (struct sigframe *)
  560              ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
  561 
  562         if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0)
  563                 return(EINVAL);
  564 
  565         /*
  566          * Don't allow users to change privileged or reserved flags.
  567          */
  568 #define EFLAGS_SECURE(ef, oef)  ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  569         eflags = scp->sc_ps;
  570         /*
  571          * XXX do allow users to change the privileged flag PSL_RF.  The
  572          * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
  573          * sometimes set it there too.  tf_eflags is kept in the signal
  574          * context during signal handling and there is no other place
  575          * to remember it, so the PSL_RF bit may be corrupted by the
  576          * signal handler without us knowing.  Corruption of the PSL_RF
  577          * bit at worst causes one more or one less debugger trap, so
  578          * allowing it is fairly harmless.
  579          */
  580         if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
  581 #ifdef DEBUG
  582                 printf("sigreturn: eflags = 0x%x\n", eflags);
  583 #endif
  584                 return(EINVAL);
  585         }
  586 
  587         /*
  588          * Don't allow users to load a valid privileged %cs.  Let the
  589          * hardware check for invalid selectors, excess privilege in
  590          * other selectors, invalid %eip's and invalid %esp's.
  591          */
  592 #define CS_SECURE(cs)   (ISPL(cs) == SEL_UPL)
  593         if (!CS_SECURE(scp->sc_cs)) {
  594 #ifdef DEBUG
  595                 printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
  596 #endif
  597                 trapsignal(p, SIGBUS, T_PROTFLT);
  598                 return(EINVAL);
  599         }
  600 
  601         /* restore scratch registers */
  602         regs[tEAX] = scp->sc_eax;
  603         regs[tEBX] = scp->sc_ebx;
  604         regs[tECX] = scp->sc_ecx;
  605         regs[tEDX] = scp->sc_edx;
  606         regs[tESI] = scp->sc_esi;
  607         regs[tEDI] = scp->sc_edi;
  608         regs[tCS] = scp->sc_cs;
  609         regs[tDS] = scp->sc_ds;
  610         regs[tES] = scp->sc_es;
  611         regs[tSS] = scp->sc_ss;
  612         regs[tISP] = scp->sc_isp;
  613 
  614         if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0)
  615                 return(EINVAL);
  616 
  617         if (scp->sc_onstack & 01)
  618                 p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK;
  619         else
  620                 p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
  621         p->p_sigmask = scp->sc_mask & ~sigcantmask;
  622         regs[tEBP] = scp->sc_fp;
  623         regs[tESP] = scp->sc_sp;
  624         regs[tEIP] = scp->sc_pc;
  625         regs[tEFLAGS] = eflags;
  626         return(EJUSTRETURN);
  627 }
  628 
  629 /*
  630  * Machine depdnetnt boot() routine
  631  *
  632  * I haven't seen anything too put here yet
  633  * Possibly some stuff might be grafted back here from boot()
  634  */
  635 void
  636 cpu_boot(int howto)
  637 {
  638 }
  639 
  640 /*
  641  * Shutdown the CPU as much as possible
  642  */
  643 void
  644 cpu_halt(void)
  645 {
  646         for (;;)
  647                 __asm__ ("hlt");
  648 }
  649 
  650 /*
  651  * Clear registers on exec
  652  */
  653 void
  654 setregs(p, entry, stack)
  655         struct proc *p;
  656         u_long entry;
  657         u_long stack;
  658 {
  659         int *regs = p->p_md.md_regs;
  660 
  661 #ifdef USER_LDT
  662         struct pcb *pcb = &p->p_addr->u_pcb;
  663 
  664         /* was i386_user_cleanup() in NetBSD */
  665         if (pcb->pcb_ldt) {
  666                 if (pcb == curpcb)
  667                         lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
  668                 kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
  669                         pcb->pcb_ldt_len * sizeof(union descriptor));
  670                 pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
  671         }
  672 #endif
  673   
  674         bzero(regs, sizeof(struct trapframe));
  675         regs[tEIP] = entry;
  676         regs[tESP] = stack;
  677         regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
  678         regs[tSS] = _udatasel;
  679         regs[tDS] = _udatasel;
  680         regs[tES] = _udatasel;
  681         regs[tCS] = _ucodesel;
  682 
  683         /*
  684          * Initialize the math emulator (if any) for the current process.
  685          * Actually, just clear the bit that says that the emulator has
  686          * been initialized.  Initialization is delayed until the process
  687          * traps to the emulator (if it is done at all) mainly because
  688          * emulators don't provide an entry point for initialization.
  689          */
  690         p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
  691 
  692         /*
  693          * Arrange to trap the next npx or `fwait' instruction (see npx.c
  694          * for why fwait must be trapped at least if there is an npx or an
  695          * emulator).  This is mainly to handle the case where npx0 is not
  696          * configured, since the npx routines normally set up the trap
  697          * otherwise.  It should be done only at boot time, but doing it
  698          * here allows modifying `npx_exists' for testing the emulator on
  699          * systems with an npx.
  700          */
  701         load_cr0(rcr0() | CR0_MP | CR0_TS);
  702 
  703 #if NNPX > 0
  704         /* Initialize the npx (if any) for the current process. */
  705         npxinit(__INITIAL_NPXCW__);
  706 #endif
  707 }
  708 
  709 static int
  710 sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
  711 {
  712         int error;
  713         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
  714                 req);
  715         if (!error && req->newptr)
  716                 resettodr();
  717         return (error);
  718 }
  719 
  720 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
  721         &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
  722 
  723 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
  724         CTLFLAG_RW, &disable_rtc_set, 0, "");
  725 
  726 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
  727         CTLFLAG_RD, &bootinfo, bootinfo, "");
  728 
  729 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
  730         CTLFLAG_RW, &wall_cmos_clock, 0, "");
  731 
  732 /*
  733  * Initialize 386 and configure to run kernel
  734  */
  735 
  736 /*
  737  * Initialize segments & interrupt table
  738  */
  739 
  740 int currentldt;
  741 int _default_ldt;
  742 union descriptor gdt[NGDT];             /* global descriptor table */
  743 struct gate_descriptor idt[NIDT];       /* interrupt descriptor table */
  744 union descriptor ldt[NLDT];             /* local descriptor table */
  745 
  746 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  747 struct gate_descriptor *t_idt;
  748 extern int has_f00f_bug;
  749 #endif
  750 
  751 static struct i386tss dblfault_tss;
  752 static char dblfault_stack[PAGE_SIZE];
  753 
  754 extern  struct user *proc0paddr;
  755 
  756 /* software prototypes -- in more palatable form */
  757 struct soft_segment_descriptor gdt_segs[] = {
  758 /* GNULL_SEL    0 Null Descriptor */
  759 {       0x0,                    /* segment base address  */
  760         0x0,                    /* length */
  761         0,                      /* segment type */
  762         0,                      /* segment descriptor priority level */
  763         0,                      /* segment descriptor present */
  764         0, 0,
  765         0,                      /* default 32 vs 16 bit size */
  766         0                       /* limit granularity (byte/page units)*/ },
  767 /* GCODE_SEL    1 Code Descriptor for kernel */
  768 {       0x0,                    /* segment base address  */
  769         0xfffff,                /* length - all address space */
  770         SDT_MEMERA,             /* segment type */
  771         0,                      /* segment descriptor priority level */
  772         1,                      /* segment descriptor present */
  773         0, 0,
  774         1,                      /* default 32 vs 16 bit size */
  775         1                       /* limit granularity (byte/page units)*/ },
  776 /* GDATA_SEL    2 Data Descriptor for kernel */
  777 {       0x0,                    /* segment base address  */
  778         0xfffff,                /* length - all address space */
  779         SDT_MEMRWA,             /* segment type */
  780         0,                      /* segment descriptor priority level */
  781         1,                      /* segment descriptor present */
  782         0, 0,
  783         1,                      /* default 32 vs 16 bit size */
  784         1                       /* limit granularity (byte/page units)*/ },
  785 /* GLDT_SEL     3 LDT Descriptor */
  786 {       (int) ldt,              /* segment base address  */
  787         sizeof(ldt)-1,          /* length - all address space */
  788         SDT_SYSLDT,             /* segment type */
  789         0,                      /* segment descriptor priority level */
  790         1,                      /* segment descriptor present */
  791         0, 0,
  792         0,                      /* unused - default 32 vs 16 bit size */
  793         0                       /* limit granularity (byte/page units)*/ },
  794 /* GTGATE_SEL   4 Null Descriptor - Placeholder */
  795 {       0x0,                    /* segment base address  */
  796         0x0,                    /* length - all address space */
  797         0,                      /* segment type */
  798         0,                      /* segment descriptor priority level */
  799         0,                      /* segment descriptor present */
  800         0, 0,
  801         0,                      /* default 32 vs 16 bit size */
  802         0                       /* limit granularity (byte/page units)*/ },
  803 /* GPANIC_SEL   5 Panic Tss Descriptor */
  804 {       (int) &dblfault_tss,    /* segment base address  */
  805         sizeof(struct i386tss)-1,/* length - all address space */
  806         SDT_SYS386TSS,          /* segment type */
  807         0,                      /* segment descriptor priority level */
  808         1,                      /* segment descriptor present */
  809         0, 0,
  810         0,                      /* unused - default 32 vs 16 bit size */
  811         0                       /* limit granularity (byte/page units)*/ },
  812 /* GPROC0_SEL   6 Proc 0 Tss Descriptor */
  813 {       (int) kstack,           /* segment base address  */
  814         sizeof(struct i386tss)-1,/* length - all address space */
  815         SDT_SYS386TSS,          /* segment type */
  816         0,                      /* segment descriptor priority level */
  817         1,                      /* segment descriptor present */
  818         0, 0,
  819         0,                      /* unused - default 32 vs 16 bit size */
  820         0                       /* limit granularity (byte/page units)*/ },
  821 /* GUSERLDT_SEL 7 User LDT Descriptor per process */
  822 {       (int) ldt,              /* segment base address  */
  823         (512 * sizeof(union descriptor)-1),             /* length */
  824         SDT_SYSLDT,             /* segment type */
  825         0,                      /* segment descriptor priority level */
  826         1,                      /* segment descriptor present */
  827         0, 0,
  828         0,                      /* unused - default 32 vs 16 bit size */
  829         0                       /* limit granularity (byte/page units)*/ },
  830 /* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
  831 {       0,                      /* segment base address (overwritten by APM)  */
  832         0xfffff,                /* length */
  833         SDT_MEMERA,             /* segment type */
  834         0,                      /* segment descriptor priority level */
  835         1,                      /* segment descriptor present */
  836         0, 0,
  837         1,                      /* default 32 vs 16 bit size */
  838         1                       /* limit granularity (byte/page units)*/ },
  839 /* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
  840 {       0,                      /* segment base address (overwritten by APM)  */
  841         0xfffff,                /* length */
  842         SDT_MEMERA,             /* segment type */
  843         0,                      /* segment descriptor priority level */
  844         1,                      /* segment descriptor present */
  845         0, 0,
  846         0,                      /* default 32 vs 16 bit size */
  847         1                       /* limit granularity (byte/page units)*/ },
  848 /* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */
  849 {       0,                      /* segment base address (overwritten by APM) */
  850         0xfffff,                /* length */
  851         SDT_MEMRWA,             /* segment type */
  852         0,                      /* segment descriptor priority level */
  853         1,                      /* segment descriptor present */
  854         0, 0,
  855         1,                      /* default 32 vs 16 bit size */
  856         1                       /* limit granularity (byte/page units)*/ },
  857 };
  858 
  859 static struct soft_segment_descriptor ldt_segs[] = {
  860         /* Null Descriptor - overwritten by call gate */
  861 {       0x0,                    /* segment base address  */
  862         0x0,                    /* length - all address space */
  863         0,                      /* segment type */
  864         0,                      /* segment descriptor priority level */
  865         0,                      /* segment descriptor present */
  866         0, 0,
  867         0,                      /* default 32 vs 16 bit size */
  868         0                       /* limit granularity (byte/page units)*/ },
  869         /* Null Descriptor - overwritten by call gate */
  870 {       0x0,                    /* segment base address  */
  871         0x0,                    /* length - all address space */
  872         0,                      /* segment type */
  873         0,                      /* segment descriptor priority level */
  874         0,                      /* segment descriptor present */
  875         0, 0,
  876         0,                      /* default 32 vs 16 bit size */
  877         0                       /* limit granularity (byte/page units)*/ },
  878         /* Null Descriptor - overwritten by call gate */
  879 {       0x0,                    /* segment base address  */
  880         0x0,                    /* length - all address space */
  881         0,                      /* segment type */
  882         0,                      /* segment descriptor priority level */
  883         0,                      /* segment descriptor present */
  884         0, 0,
  885         0,                      /* default 32 vs 16 bit size */
  886         0                       /* limit granularity (byte/page units)*/ },
  887         /* Code Descriptor for user */
  888 {       0x0,                    /* segment base address  */
  889         0xfffff,                /* length - all address space */
  890         SDT_MEMERA,             /* segment type */
  891         SEL_UPL,                /* segment descriptor priority level */
  892         1,                      /* segment descriptor present */
  893         0, 0,
  894         1,                      /* default 32 vs 16 bit size */
  895         1                       /* limit granularity (byte/page units)*/ },
  896         /* Data Descriptor for user */
  897 {       0x0,                    /* segment base address  */
  898         0xfffff,                /* length - all address space */
  899         SDT_MEMRWA,             /* segment type */
  900         SEL_UPL,                /* segment descriptor priority level */
  901         1,                      /* segment descriptor present */
  902         0, 0,
  903         1,                      /* default 32 vs 16 bit size */
  904         1                       /* limit granularity (byte/page units)*/ },
  905 };
  906 
  907 void
  908 setidt(idx, func, typ, dpl, selec)
  909         int idx;
  910         inthand_t *func;
  911         int typ;
  912         int dpl;
  913         int selec;
  914 {
  915         struct gate_descriptor *ip;
  916 
  917 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  918         ip = (t_idt != NULL ? t_idt : idt) + idx;
  919 #else
  920         ip = idt + idx;
  921 #endif
  922         ip->gd_looffset = (int)func;
  923         ip->gd_selector = selec;
  924         ip->gd_stkcpy = 0;
  925         ip->gd_xx = 0;
  926         ip->gd_type = typ;
  927         ip->gd_dpl = dpl;
  928         ip->gd_p = 1;
  929         ip->gd_hioffset = ((int)func)>>16 ;
  930 }
  931 
  932 #define IDTVEC(name)    __CONCAT(X,name)
  933 
  934 extern inthand_t
  935         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
  936         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
  937         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
  938         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
  939         IDTVEC(syscall), IDTVEC(int0x80_syscall);
  940 
  941 void
  942 sdtossd(sd, ssd)
  943         struct segment_descriptor *sd;
  944         struct soft_segment_descriptor *ssd;
  945 {
  946         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
  947         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
  948         ssd->ssd_type  = sd->sd_type;
  949         ssd->ssd_dpl   = sd->sd_dpl;
  950         ssd->ssd_p     = sd->sd_p;
  951         ssd->ssd_def32 = sd->sd_def32;
  952         ssd->ssd_gran  = sd->sd_gran;
  953 }
  954 
  955 void
  956 init386(first)
  957         int first;
  958 {
  959         int x;
  960         unsigned biosbasemem, biosextmem;
  961         struct gate_descriptor *gdp;
  962         int gsel_tss;
  963         struct isa_device *idp;
  964         /* table descriptors - used to load tables by microp */
  965         struct region_descriptor r_gdt, r_idt;
  966         int pagesinbase, pagesinext;
  967         int target_page, pa_indx;
  968         int off;
  969         int speculative_mprobe;
  970 
  971         proc0.p_addr = proc0paddr;
  972 
  973         atdevbase = ISA_HOLE_START + KERNBASE;
  974 
  975         /*
  976          * Initialize the console before we print anything out.
  977          */
  978         cninit();
  979 
  980         /*
  981          * make gdt memory segments, the code segment goes up to end of the
  982          * page with etext in it, the data segment goes to the end of
  983          * the address space
  984          */
  985         /*
  986          * XXX text protection is temporarily (?) disabled.  The limit was
  987          * i386_btop(round_page(etext)) - 1.
  988          */
  989         gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
  990         gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
  991         for (x = 0; x < NGDT; x++)
  992                 ssdtosd(&gdt_segs[x], &gdt[x].sd);
  993 
  994         /* make ldt memory segments */
  995         /*
  996          * The data segment limit must not cover the user area because we
  997          * don't want the user area to be writable in copyout() etc. (page
  998          * level protection is lost in kernel mode on 386's).  Also, we
  999          * don't want the user area to be writable directly (page level
 1000          * protection of the user area is not available on 486's with
 1001          * CR0_WP set, because there is no user-read/kernel-write mode).
 1002          *
 1003          * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 1004          * should be spelled ...MAX_USER...
 1005          */
 1006 #define VM_END_USER_RW_ADDRESS  VM_MAXUSER_ADDRESS
 1007         /*
 1008          * The code segment limit has to cover the user area until we move
 1009          * the signal trampoline out of the user area.  This is safe because
 1010          * the code segment cannot be written to directly.
 1011          */
 1012 #define VM_END_USER_R_ADDRESS   (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE)
 1013         ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
 1014         ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
 1015         /* Note. eventually want private ldts per process */
 1016         for (x = 0; x < NLDT; x++)
 1017                 ssdtosd(&ldt_segs[x], &ldt[x].sd);
 1018 
 1019         /* exceptions */
 1020         for (x = 0; x < NIDT; x++)
 1021                 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1022         setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1023         setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1024         setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1025         setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 1026         setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 1027         setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1028         setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1029         setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1030         setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 1031         setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1032         setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1033         setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1034         setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1035         setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1036         setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1037         setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1038         setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1039         setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1040         setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1041         setidt(0x80, &IDTVEC(int0x80_syscall),
 1042                         SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 1043 
 1044 #include        "isa.h"
 1045 #if     NISA >0
 1046         isa_defaultirq();
 1047 #endif
 1048         rand_initialize();
 1049 
 1050         r_gdt.rd_limit = sizeof(gdt) - 1;
 1051         r_gdt.rd_base =  (int) gdt;
 1052         lgdt(&r_gdt);
 1053 
 1054         r_idt.rd_limit = sizeof(idt) - 1;
 1055         r_idt.rd_base = (int) idt;
 1056         lidt(&r_idt);
 1057 
 1058         _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 1059         lldt(_default_ldt);
 1060         currentldt = _default_ldt;
 1061 
 1062 #ifdef DDB
 1063         kdb_init();
 1064         if (boothowto & RB_KDB)
 1065                 Debugger("Boot flags requested debugger");
 1066 #endif
 1067 
 1068         finishidentcpu();       /* Final stage of CPU initialization */
 1069         setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1070         setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 1071         initializecpu();        /* Initialize CPU registers */
 1072 
 1073         /* Use BIOS values stored in RTC CMOS RAM, since probing
 1074          * breaks certain 386 AT relics.
 1075          */
 1076         biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
 1077         biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
 1078 
 1079         /*
 1080          * If BIOS tells us that it has more than 640k in the basemem,
 1081          *      don't believe it - set it to 640k.
 1082          */
 1083         if (biosbasemem > 640) {
 1084                 printf("Preposterous RTC basemem of %dK, truncating to 640K\n",
 1085                        biosbasemem);
 1086                 biosbasemem = 640;
 1087         }
 1088         if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) {
 1089                 printf("Preposterous BIOS basemem of %dK, truncating to 640K\n",
 1090                        bootinfo.bi_basemem);
 1091                 bootinfo.bi_basemem = 640;
 1092         }
 1093 
 1094         /*
 1095          * Warn if the official BIOS interface disagrees with the RTC
 1096          * interface used above about the amount of base memory or the
 1097          * amount of extended memory.  Prefer the BIOS value for the base
 1098          * memory.  This is necessary for machines that `steal' base
 1099          * memory for use as BIOS memory, at least if we are going to use
 1100          * the BIOS for apm.  Prefer the RTC value for extended memory.
 1101          * Eventually the hackish interface shouldn't even be looked at.
 1102          */
 1103         if (bootinfo.bi_memsizes_valid) {
 1104                 if (bootinfo.bi_basemem != biosbasemem) {
 1105                         vm_offset_t pa;
 1106 
 1107                         printf(
 1108         "BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n",
 1109                                bootinfo.bi_basemem, biosbasemem);
 1110                         biosbasemem = bootinfo.bi_basemem;
 1111 
 1112                         /*
 1113                          * XXX if biosbasemem is now < 640, there is `hole'
 1114                          * between the end of base memory and the start of
 1115                          * ISA memory.  The hole may be empty or it may
 1116                          * contain BIOS code or data.  Map it read/write so
 1117                          * that the BIOS can write to it.  (Memory from 0 to
 1118                          * the physical end of the kernel is mapped read-only
 1119                          * to begin with and then parts of it are remapped.
 1120                          * The parts that aren't remapped form holes that
 1121                          * remain read-only and are unused by the kernel.
 1122                          * The base memory area is below the physical end of
 1123                          * the kernel and right now forms a read-only hole.
 1124                          * The part of it from 0 to
 1125                          * (trunc_page(biosbasemem * 1024) - 1) will be
 1126                          * remapped and used by the kernel later.)
 1127                          *
 1128                          * This code is similar to the code used in
 1129                          * pmap_mapdev, but since no memory needs to be
 1130                          * allocated we simply change the mapping.
 1131                          */
 1132                         for (pa = trunc_page(biosbasemem * 1024);
 1133                              pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 1134                                 unsigned *pte;
 1135 
 1136                                 pte = (unsigned *)vtopte(pa + KERNBASE);
 1137                                 *pte = pa | PG_RW | PG_V;
 1138                         }
 1139                 }
 1140                 if (bootinfo.bi_extmem != biosextmem)
 1141                         printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
 1142                                bootinfo.bi_extmem, biosextmem);
 1143         }
 1144 
 1145         pagesinbase = biosbasemem * 1024 / PAGE_SIZE;
 1146         pagesinext = biosextmem * 1024 / PAGE_SIZE;
 1147 
 1148         /*
 1149          * Special hack for chipsets that still remap the 384k hole when
 1150          *      there's 16MB of memory - this really confuses people that
 1151          *      are trying to use bus mastering ISA controllers with the
 1152          *      "16MB limit"; they only have 16MB, but the remapping puts
 1153          *      them beyond the limit.
 1154          */
 1155         /*
 1156          * If extended memory is between 15-16MB (16-17MB phys address range),
 1157          *      chop it to 15MB.
 1158          */
 1159         if ((pagesinext > 3840) && (pagesinext < 4096))
 1160                 pagesinext = 3840;
 1161 
 1162         /*
 1163          * Maxmem isn't the "maximum memory", it's one larger than the
 1164          * highest page of the physical address space.  It should be
 1165          * called something like "Maxphyspage".
 1166          */
 1167         Maxmem = pagesinext + 0x100000/PAGE_SIZE;
 1168         /*
 1169          * Indicate that we wish to do a speculative search for memory beyond
 1170          * the end of the reported size if the indicated amount is 64MB (0x4000
 1171          * pages) - which is the largest amount that the BIOS/bootblocks can
 1172          * currently report. If a specific amount of memory is indicated via
 1173          * the MAXMEM option or the npx0 "msize", then don't do the speculative
 1174          * memory probe.
 1175          */
 1176         if (Maxmem >= 0x4000)
 1177                 speculative_mprobe = TRUE;
 1178         else
 1179                 speculative_mprobe = FALSE;
 1180 
 1181 #ifdef MAXMEM
 1182         Maxmem = MAXMEM/4;
 1183         speculative_mprobe = FALSE;
 1184 #endif
 1185 
 1186 #if NNPX > 0
 1187         idp = find_isadev(isa_devtab_null, &npxdriver, 0);
 1188         if (idp != NULL && idp->id_msize != 0) {
 1189                 Maxmem = idp->id_msize / 4;
 1190                 speculative_mprobe = FALSE;
 1191         }
 1192 #endif
 1193 
 1194         /* call pmap initialization to make new kernel address space */
 1195         pmap_bootstrap (first, 0);
 1196 
 1197         /*
 1198          * Size up each available chunk of physical memory.
 1199          */
 1200 
 1201         /*
 1202          * We currently don't bother testing base memory.
 1203          * XXX  ...but we probably should.
 1204          */
 1205         pa_indx = 0;
 1206         if (pagesinbase > 1) {
 1207                 phys_avail[pa_indx++] = PAGE_SIZE;      /* skip first page of memory */
 1208                 phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
 1209                 physmem = pagesinbase - 1;
 1210         } else {
 1211                 /* point at first chunk end */
 1212                 pa_indx++;
 1213         }
 1214 
 1215         for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
 1216                 int tmp, page_bad;
 1217 
 1218                 page_bad = FALSE;
 1219 
 1220                 /*
 1221                  * map page into kernel: valid, read/write, non-cacheable
 1222                  */
 1223                 *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page;
 1224                 invltlb();
 1225 
 1226                 tmp = *(int *)CADDR1;
 1227                 /*
 1228                  * Test for alternating 1's and 0's
 1229                  */
 1230                 *(volatile int *)CADDR1 = 0xaaaaaaaa;
 1231                 if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
 1232                         page_bad = TRUE;
 1233                 }
 1234                 /*
 1235                  * Test for alternating 0's and 1's
 1236                  */
 1237                 *(volatile int *)CADDR1 = 0x55555555;
 1238                 if (*(volatile int *)CADDR1 != 0x55555555) {
 1239                         page_bad = TRUE;
 1240                 }
 1241                 /*
 1242                  * Test for all 1's
 1243                  */
 1244                 *(volatile int *)CADDR1 = 0xffffffff;
 1245                 if (*(volatile int *)CADDR1 != 0xffffffff) {
 1246                         page_bad = TRUE;
 1247                 }
 1248                 /*
 1249                  * Test for all 0's
 1250                  */
 1251                 *(volatile int *)CADDR1 = 0x0;
 1252                 if (*(volatile int *)CADDR1 != 0x0) {
 1253                         /*
 1254                          * test of page failed
 1255                          */
 1256                         page_bad = TRUE;
 1257                 }
 1258                 /*
 1259                  * Restore original value.
 1260                  */
 1261                 *(int *)CADDR1 = tmp;
 1262 
 1263                 /*
 1264                  * Adjust array of valid/good pages.
 1265                  */
 1266                 if (page_bad == FALSE) {
 1267                         /*
 1268                          * If this good page is a continuation of the
 1269                          * previous set of good pages, then just increase
 1270                          * the end pointer. Otherwise start a new chunk.
 1271                          * Note that "end" points one higher than end,
 1272                          * making the range >= start and < end.
 1273                          * If we're also doing a speculative memory
 1274                          * test and we at or past the end, bump up Maxmem
 1275                          * so that we keep going. The first bad page
 1276                          * will terminate the loop.
 1277                          */
 1278                         if (phys_avail[pa_indx] == target_page) {
 1279                                 phys_avail[pa_indx] += PAGE_SIZE;
 1280                                 if (speculative_mprobe == TRUE &&
 1281                                     phys_avail[pa_indx] >= (64*1024*1024))
 1282                                         Maxmem++;
 1283                         } else {
 1284                                 pa_indx++;
 1285                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1286                                         printf("Too many holes in the physical address space, giving up\n");
 1287                                         pa_indx--;
 1288                                         break;
 1289                                 }
 1290                                 phys_avail[pa_indx++] = target_page;    /* start */
 1291                                 phys_avail[pa_indx] = target_page + PAGE_SIZE;  /* end */
 1292                         }
 1293                         physmem++;
 1294                 }
 1295         }
 1296 
 1297         *(int *)CMAP1 = 0;
 1298         invltlb();
 1299 
 1300         /*
 1301          * XXX
 1302          * The last chunk must contain at least one page plus the message
 1303          * buffer to avoid complicating other code (message buffer address
 1304          * calculation, etc.).
 1305          */
 1306         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1307             round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 1308                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1309                 phys_avail[pa_indx--] = 0;
 1310                 phys_avail[pa_indx--] = 0;
 1311         }
 1312 
 1313         Maxmem = atop(phys_avail[pa_indx]);
 1314 
 1315         /* Trim off space for the message buffer. */
 1316         phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 1317 
 1318         avail_end = phys_avail[pa_indx];
 1319 
 1320         /* now running on new page tables, configured,and u/iom is accessible */
 1321 
 1322         /* Map the message buffer. */
 1323         for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 1324                 pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off,
 1325                            avail_end + off, VM_PROT_ALL, TRUE);
 1326         msgbufinit(msgbufp, MSGBUF_SIZE);
 1327 
 1328         /* make a initial tss so microp can get interrupt stack on syscall! */
 1329         proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE;
 1330         proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
 1331         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 1332 
 1333         dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 1334             dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
 1335         dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 1336             dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 1337         dblfault_tss.tss_cr3 = IdlePTD;
 1338         dblfault_tss.tss_eip = (int) dblfault_handler;
 1339         dblfault_tss.tss_eflags = PSL_KERNEL;
 1340         dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs =
 1341                 GSEL(GDATA_SEL, SEL_KPL);
 1342         dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 1343         dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 1344 
 1345         ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
 1346                 (sizeof(struct i386tss))<<16;
 1347 
 1348         ltr(gsel_tss);
 1349 
 1350         /* make a call gate to reenter kernel with */
 1351         gdp = &ldt[LSYS5CALLS_SEL].gd;
 1352 
 1353         x = (int) &IDTVEC(syscall);
 1354         gdp->gd_looffset = x++;
 1355         gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 1356         gdp->gd_stkcpy = 1;
 1357         gdp->gd_type = SDT_SYS386CGT;
 1358         gdp->gd_dpl = SEL_UPL;
 1359         gdp->gd_p = 1;
 1360         gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
 1361 
 1362         /* XXX does this work? */
 1363         ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 1364 
 1365         /* transfer to user mode */
 1366 
 1367         _ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 1368         _udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 1369 
 1370         /* setup proc 0's pcb */
 1371         proc0.p_addr->u_pcb.pcb_flags = 0;
 1372         proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD;
 1373 }
 1374 
 1375 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 1376 static void f00f_hack(void *unused);
 1377 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 1378 
 1379 static void
 1380 f00f_hack(void *unused) {
 1381         struct region_descriptor r_idt;
 1382         vm_offset_t tmp;
 1383 
 1384         if (!has_f00f_bug)
 1385                 return;
 1386 
 1387         printf("Intel Pentium detected, installing workaround for F00F bug\n");
 1388 
 1389         r_idt.rd_limit = sizeof(idt) - 1;
 1390 
 1391         tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 1392         if (tmp == 0)
 1393                 panic("kmem_alloc returned 0");
 1394         if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 1395                 panic("kmem_alloc returned non-page-aligned memory");
 1396         /* Put the first seven entries in the lower page */
 1397         t_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 1398         bcopy(idt, t_idt, sizeof(idt));
 1399         r_idt.rd_base = (int)t_idt;
 1400         lidt(&r_idt);
 1401         if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 1402                            VM_PROT_READ, FALSE) != KERN_SUCCESS)
 1403                 panic("vm_map_protect failed");
 1404         return;
 1405 }
 1406 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 1407 
 1408 /*
 1409  * The registers are in the frame; the frame is in the user area of
 1410  * the process in question; when the process is active, the registers
 1411  * are in "the kernel stack"; when it's not, they're still there, but
 1412  * things get flipped around.  So, since p->p_md.md_regs is the whole address
 1413  * of the register set, take its offset from the kernel stack, and
 1414  * index into the user block.  Don't you just *love* virtual memory?
 1415  * (I'm starting to think seymour is right...)
 1416  */
 1417 #define TF_REGP(p)      ((struct trapframe *) \
 1418                          ((char *)(p)->p_addr \
 1419                           + ((char *)(p)->p_md.md_regs - kstack)))
 1420 
 1421 int
 1422 ptrace_set_pc(p, addr)
 1423         struct proc *p;
 1424         unsigned int addr;
 1425 {
 1426         TF_REGP(p)->tf_eip = addr;
 1427         return (0);
 1428 }
 1429 
 1430 int
 1431 ptrace_single_step(p)
 1432         struct proc *p;
 1433 {
 1434         TF_REGP(p)->tf_eflags |= PSL_T;
 1435         return (0);
 1436 }
 1437 
 1438 int ptrace_write_u(p, off, data)
 1439         struct proc *p;
 1440         vm_offset_t off;
 1441         int data;
 1442 {
 1443         struct trapframe frame_copy;
 1444         vm_offset_t min;
 1445         struct trapframe *tp;
 1446 
 1447         /*
 1448          * Privileged kernel state is scattered all over the user area.
 1449          * Only allow write access to parts of regs and to fpregs.
 1450          */
 1451         min = (char *)p->p_md.md_regs - kstack;
 1452         if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 1453                 tp = TF_REGP(p);
 1454                 frame_copy = *tp;
 1455                 *(int *)((char *)&frame_copy + (off - min)) = data;
 1456                 if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 1457                     !CS_SECURE(frame_copy.tf_cs))
 1458                         return (EINVAL);
 1459                 *(int*)((char *)p->p_addr + off) = data;
 1460                 return (0);
 1461         }
 1462         min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
 1463         if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
 1464                 *(int*)((char *)p->p_addr + off) = data;
 1465                 return (0);
 1466         }
 1467         return (EFAULT);
 1468 }
 1469 
 1470 int
 1471 fill_regs(p, regs)
 1472         struct proc *p;
 1473         struct reg *regs;
 1474 {
 1475         struct trapframe *tp;
 1476 
 1477         tp = TF_REGP(p);
 1478         regs->r_es = tp->tf_es;
 1479         regs->r_ds = tp->tf_ds;
 1480         regs->r_edi = tp->tf_edi;
 1481         regs->r_esi = tp->tf_esi;
 1482         regs->r_ebp = tp->tf_ebp;
 1483         regs->r_ebx = tp->tf_ebx;
 1484         regs->r_edx = tp->tf_edx;
 1485         regs->r_ecx = tp->tf_ecx;
 1486         regs->r_eax = tp->tf_eax;
 1487         regs->r_eip = tp->tf_eip;
 1488         regs->r_cs = tp->tf_cs;
 1489         regs->r_eflags = tp->tf_eflags;
 1490         regs->r_esp = tp->tf_esp;
 1491         regs->r_ss = tp->tf_ss;
 1492         return (0);
 1493 }
 1494 
 1495 int
 1496 set_regs(p, regs)
 1497         struct proc *p;
 1498         struct reg *regs;
 1499 {
 1500         struct trapframe *tp;
 1501 
 1502         tp = TF_REGP(p);
 1503         if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
 1504             !CS_SECURE(regs->r_cs))
 1505                 return (EINVAL);
 1506         tp->tf_es = regs->r_es;
 1507         tp->tf_ds = regs->r_ds;
 1508         tp->tf_edi = regs->r_edi;
 1509         tp->tf_esi = regs->r_esi;
 1510         tp->tf_ebp = regs->r_ebp;
 1511         tp->tf_ebx = regs->r_ebx;
 1512         tp->tf_edx = regs->r_edx;
 1513         tp->tf_ecx = regs->r_ecx;
 1514         tp->tf_eax = regs->r_eax;
 1515         tp->tf_eip = regs->r_eip;
 1516         tp->tf_cs = regs->r_cs;
 1517         tp->tf_eflags = regs->r_eflags;
 1518         tp->tf_esp = regs->r_esp;
 1519         tp->tf_ss = regs->r_ss;
 1520         return (0);
 1521 }
 1522 
 1523 #ifndef DDB
 1524 void
 1525 Debugger(const char *msg)
 1526 {
 1527         printf("Debugger(\"%s\") called.\n", msg);
 1528 }
 1529 #endif /* no DDB */
 1530 
 1531 #include <sys/disklabel.h>
 1532 #define b_cylin b_resid
 1533 /*
 1534  * Determine the size of the transfer, and make sure it is
 1535  * within the boundaries of the partition. Adjust transfer
 1536  * if needed, and signal errors or early completion.
 1537  */
 1538 int
 1539 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
 1540 {
 1541         struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
 1542         int labelsect = lp->d_partitions[0].p_offset;
 1543         int maxsz = p->p_size,
 1544                 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 1545 
 1546         /* overwriting disk label ? */
 1547         /* XXX should also protect bootstrap in first 8K */
 1548         if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 1549 #if LABELSECTOR != 0
 1550             bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 1551 #endif
 1552             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
 1553                 bp->b_error = EROFS;
 1554                 goto bad;
 1555         }
 1556 
 1557 #if     defined(DOSBBSECTOR) && defined(notyet)
 1558         /* overwriting master boot record? */
 1559         if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
 1560             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
 1561                 bp->b_error = EROFS;
 1562                 goto bad;
 1563         }
 1564 #endif
 1565 
 1566         /* beyond partition? */
 1567         if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
 1568                 /* if exactly at end of disk, return an EOF */
 1569                 if (bp->b_blkno == maxsz) {
 1570                         bp->b_resid = bp->b_bcount;
 1571                         return(0);
 1572                 }
 1573                 /* or truncate if part of it fits */
 1574                 sz = maxsz - bp->b_blkno;
 1575                 if (sz <= 0) {
 1576                         bp->b_error = EINVAL;
 1577                         goto bad;
 1578                 }
 1579                 bp->b_bcount = sz << DEV_BSHIFT;
 1580         }
 1581 
 1582         /* calculate cylinder for disksort to order transfers with */
 1583         bp->b_pblkno = bp->b_blkno + p->p_offset;
 1584         bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
 1585         return(1);
 1586 
 1587 bad:
 1588         bp->b_flags |= B_ERROR;
 1589         return(-1);
 1590 }

Cache object: f62efcda179097df0de9aba1178cbf76


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.