The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/machdep.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2003 Peter Wemm.
    3  * Copyright (c) 1992 Terrence R. Lambert.
    4  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * William Jolitz.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   39  */
   40 
   41 #include <sys/cdefs.h>
   42 __FBSDID("$FreeBSD: releng/6.2/sys/amd64/amd64/machdep.c 165477 2006-12-23 01:11:13Z davidxu $");
   43 
   44 #include "opt_atalk.h"
   45 #include "opt_atpic.h"
   46 #include "opt_compat.h"
   47 #include "opt_cpu.h"
   48 #include "opt_ddb.h"
   49 #include "opt_inet.h"
   50 #include "opt_ipx.h"
   51 #include "opt_isa.h"
   52 #include "opt_kstack_pages.h"
   53 #include "opt_maxmem.h"
   54 #include "opt_msgbuf.h"
   55 #include "opt_perfmon.h"
   56 
   57 #include <sys/param.h>
   58 #include <sys/proc.h>
   59 #include <sys/systm.h>
   60 #include <sys/bio.h>
   61 #include <sys/buf.h>
   62 #include <sys/bus.h>
   63 #include <sys/callout.h>
   64 #include <sys/cons.h>
   65 #include <sys/cpu.h>
   66 #include <sys/eventhandler.h>
   67 #include <sys/exec.h>
   68 #include <sys/imgact.h>
   69 #include <sys/kdb.h>
   70 #include <sys/kernel.h>
   71 #include <sys/ktr.h>
   72 #include <sys/linker.h>
   73 #include <sys/lock.h>
   74 #include <sys/malloc.h>
   75 #include <sys/memrange.h>
   76 #include <sys/msgbuf.h>
   77 #include <sys/mutex.h>
   78 #include <sys/pcpu.h>
   79 #include <sys/ptrace.h>
   80 #include <sys/reboot.h>
   81 #include <sys/sched.h>
   82 #include <sys/signalvar.h>
   83 #include <sys/sysctl.h>
   84 #include <sys/sysent.h>
   85 #include <sys/sysproto.h>
   86 #include <sys/ucontext.h>
   87 #include <sys/vmmeter.h>
   88 
   89 #include <vm/vm.h>
   90 #include <vm/vm_extern.h>
   91 #include <vm/vm_kern.h>
   92 #include <vm/vm_page.h>
   93 #include <vm/vm_map.h>
   94 #include <vm/vm_object.h>
   95 #include <vm/vm_pager.h>
   96 #include <vm/vm_param.h>
   97 
   98 #ifdef DDB
   99 #ifndef KDB
  100 #error KDB must be enabled in order for DDB to work!
  101 #endif
  102 #endif
  103 #include <ddb/ddb.h>
  104 
  105 #include <net/netisr.h>
  106 
  107 #include <machine/clock.h>
  108 #include <machine/cpu.h>
  109 #include <machine/cputypes.h>
  110 #include <machine/intr_machdep.h>
  111 #include <machine/md_var.h>
  112 #include <machine/metadata.h>
  113 #include <machine/pc/bios.h>
  114 #include <machine/pcb.h>
  115 #include <machine/proc.h>
  116 #include <machine/reg.h>
  117 #include <machine/sigframe.h>
  118 #include <machine/specialreg.h>
  119 #ifdef PERFMON
  120 #include <machine/perfmon.h>
  121 #endif
  122 #include <machine/tss.h>
  123 #ifdef SMP
  124 #include <machine/smp.h>
  125 #endif
  126 
  127 #ifdef DEV_ATPIC
  128 #include <amd64/isa/icu.h>
  129 #else
  130 #include <machine/apicvar.h>
  131 #endif
  132 
  133 #include <isa/isareg.h>
  134 #include <isa/rtc.h>
  135 
  136 /* Sanity check for __curthread() */
  137 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  138 
  139 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
  140 extern void dblfault_handler(void);
  141 
  142 extern void printcpuinfo(void); /* XXX header file */
  143 extern void identify_cpu(void);
  144 extern void panicifcpuunsupported(void);
  145 
  146 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  147 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  148 
  149 static void cpu_startup(void *);
  150 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
  151 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
  152 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
  153 
  154 #ifdef DDB
  155 extern vm_offset_t ksym_start, ksym_end;
  156 #endif
  157 
  158 int     _udatasel, _ucodesel, _ucode32sel;
  159 
  160 int cold = 1;
  161 
  162 long Maxmem = 0;
  163 long realmem = 0;
  164 
  165 #define PHYSMAP_SIZE    (2 * 30)
  166 
  167 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  168 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  169 
  170 /* must be 2 less so 0 0 can signal end of chunks */
  171 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
  172 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
  173 
  174 struct kva_md_info kmi;
  175 
  176 static struct trapframe proc0_tf;
  177 struct region_descriptor r_gdt, r_idt;
  178 
  179 struct pcpu __pcpu[MAXCPU];
  180 
  181 struct mtx icu_lock;
  182 
  183 struct mem_range_softc mem_range_softc;
  184 
  185 static void
  186 cpu_startup(dummy)
  187         void *dummy;
  188 {
  189         /*
  190          * Good {morning,afternoon,evening,night}.
  191          */
  192         startrtclock();
  193         printcpuinfo();
  194         panicifcpuunsupported();
  195 #ifdef PERFMON
  196         perfmon_init();
  197 #endif
  198         printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
  199             ptoa((uintmax_t)Maxmem) / 1048576);
  200         realmem = Maxmem;
  201         /*
  202          * Display any holes after the first chunk of extended memory.
  203          */
  204         if (bootverbose) {
  205                 int indx;
  206 
  207                 printf("Physical memory chunk(s):\n");
  208                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  209                         vm_paddr_t size;
  210 
  211                         size = phys_avail[indx + 1] - phys_avail[indx];
  212                         printf(
  213                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  214                             (uintmax_t)phys_avail[indx],
  215                             (uintmax_t)phys_avail[indx + 1] - 1,
  216                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  217                 }
  218         }
  219 
  220         vm_ksubmap_init(&kmi);
  221 
  222         printf("avail memory = %ju (%ju MB)\n",
  223             ptoa((uintmax_t)cnt.v_free_count),
  224             ptoa((uintmax_t)cnt.v_free_count) / 1048576);
  225 
  226         /*
  227          * Set up buffers, so they can be used to read disk labels.
  228          */
  229         bufinit();
  230         vm_pager_bufferinit();
  231 
  232         cpu_setregs();
  233 }
  234 
  235 /*
  236  * Send an interrupt to process.
  237  *
  238  * Stack is set up to allow sigcode stored
  239  * at top to call routine, followed by kcall
  240  * to sigreturn routine below.  After sigreturn
  241  * resets the signal mask, the stack, and the
  242  * frame pointer, it returns to the user
  243  * specified pc, psl.
  244  */
  245 void
  246 sendsig(catcher, sig, mask, code)
  247         sig_t catcher;
  248         int sig;
  249         sigset_t *mask;
  250         u_long code;
  251 {
  252         struct sigframe sf, *sfp;
  253         struct proc *p;
  254         struct thread *td;
  255         struct sigacts *psp;
  256         char *sp;
  257         struct trapframe *regs;
  258         int oonstack;
  259 
  260         td = curthread;
  261         p = td->td_proc;
  262         PROC_LOCK_ASSERT(p, MA_OWNED);
  263         psp = p->p_sigacts;
  264         mtx_assert(&psp->ps_mtx, MA_OWNED);
  265         regs = td->td_frame;
  266         oonstack = sigonstack(regs->tf_rsp);
  267 
  268         /* Save user context. */
  269         bzero(&sf, sizeof(sf));
  270         sf.sf_uc.uc_sigmask = *mask;
  271         sf.sf_uc.uc_stack = td->td_sigstk;
  272         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  273             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  274         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  275         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
  276         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  277         get_fpcontext(td, &sf.sf_uc.uc_mcontext);
  278         fpstate_drop(td);
  279 
  280         /* Allocate space for the signal handler context. */
  281         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  282             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  283                 sp = td->td_sigstk.ss_sp +
  284                     td->td_sigstk.ss_size - sizeof(struct sigframe);
  285 #if defined(COMPAT_43)
  286                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  287 #endif
  288         } else
  289                 sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128;
  290         /* Align to 16 bytes. */
  291         sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
  292 
  293         /* Translate the signal if appropriate. */
  294         if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
  295                 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
  296 
  297         /* Build the argument list for the signal handler. */
  298         regs->tf_rdi = sig;                     /* arg 1 in %rdi */
  299         regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */
  300         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  301                 /* Signal handler installed with SA_SIGINFO. */
  302                 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */
  303                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  304 
  305                 /* Fill in POSIX parts */
  306                 sf.sf_si.si_signo = sig;
  307                 sf.sf_si.si_code = code;
  308                 sf.sf_si.si_addr = (void *)regs->tf_addr;
  309                 regs->tf_rcx = regs->tf_addr;   /* arg 4 in %rcx */
  310         } else {
  311                 /* Old FreeBSD-style arguments. */
  312                 regs->tf_rsi = code;            /* arg 2 in %rsi */
  313                 regs->tf_rcx = regs->tf_addr;   /* arg 4 in %rcx */
  314                 sf.sf_ahu.sf_handler = catcher;
  315         }
  316         mtx_unlock(&psp->ps_mtx);
  317         PROC_UNLOCK(p);
  318 
  319         /*
  320          * Copy the sigframe out to the user's stack.
  321          */
  322         if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
  323 #ifdef DEBUG
  324                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  325 #endif
  326                 PROC_LOCK(p);
  327                 sigexit(td, SIGILL);
  328         }
  329 
  330         regs->tf_rsp = (long)sfp;
  331         regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
  332         regs->tf_rflags &= ~PSL_T;
  333         regs->tf_cs = _ucodesel;
  334         PROC_LOCK(p);
  335         mtx_lock(&psp->ps_mtx);
  336 }
  337 
  338 /*
  339  * Build siginfo_t for SA thread
  340  */
  341 void
  342 cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
  343 {
  344         struct proc *p;
  345         struct thread *td;
  346         struct trapframe *regs;
  347 
  348         td = curthread;
  349         p = td->td_proc;
  350         regs = td->td_frame;
  351         PROC_LOCK_ASSERT(p, MA_OWNED);
  352 
  353         bzero(si, sizeof(*si));
  354         si->si_signo = sig;
  355         si->si_code = code;
  356         si->si_addr = (void *)regs->tf_addr;
  357         /* XXXKSE fill other fields */
  358 }
  359 
  360 /*
  361  * System call to cleanup state after a signal
  362  * has been taken.  Reset signal mask and
  363  * stack state from context left by sendsig (above).
  364  * Return to previous pc and psl as specified by
  365  * context left by sendsig. Check carefully to
  366  * make sure that the user has not modified the
  367  * state to gain improper privileges.
  368  *
  369  * MPSAFE
  370  */
  371 int
  372 sigreturn(td, uap)
  373         struct thread *td;
  374         struct sigreturn_args /* {
  375                 const __ucontext *sigcntxp;
  376         } */ *uap;
  377 {
  378         ucontext_t uc;
  379         struct proc *p = td->td_proc;
  380         struct trapframe *regs;
  381         const ucontext_t *ucp;
  382         long rflags;
  383         int cs, error, ret;
  384 
  385         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  386         if (error != 0)
  387                 return (error);
  388         ucp = &uc;
  389         regs = td->td_frame;
  390         rflags = ucp->uc_mcontext.mc_rflags;
  391         /*
  392          * Don't allow users to change privileged or reserved flags.
  393          */
  394         /*
  395          * XXX do allow users to change the privileged flag PSL_RF.
  396          * The cpu sets PSL_RF in tf_rflags for faults.  Debuggers
  397          * should sometimes set it there too.  tf_rflags is kept in
  398          * the signal context during signal handling and there is no
  399          * other place to remember it, so the PSL_RF bit may be
  400          * corrupted by the signal handler without us knowing.
  401          * Corruption of the PSL_RF bit at worst causes one more or
  402          * one less debugger trap, so allowing it is fairly harmless.
  403          */
  404         if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
  405                 printf("sigreturn: rflags = 0x%lx\n", rflags);
  406                 return (EINVAL);
  407         }
  408 
  409         /*
  410          * Don't allow users to load a valid privileged %cs.  Let the
  411          * hardware check for invalid selectors, excess privilege in
  412          * other selectors, invalid %eip's and invalid %esp's.
  413          */
  414         cs = ucp->uc_mcontext.mc_cs;
  415         if (!CS_SECURE(cs)) {
  416                 printf("sigreturn: cs = 0x%x\n", cs);
  417                 trapsignal(td, SIGBUS, T_PROTFLT);
  418                 return (EINVAL);
  419         }
  420 
  421         ret = set_fpcontext(td, &ucp->uc_mcontext);
  422         if (ret != 0)
  423                 return (ret);
  424         bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
  425 
  426         PROC_LOCK(p);
  427 #if defined(COMPAT_43)
  428         if (ucp->uc_mcontext.mc_onstack & 1)
  429                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  430         else
  431                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  432 #endif
  433 
  434         td->td_sigmask = ucp->uc_sigmask;
  435         SIG_CANTMASK(td->td_sigmask);
  436         signotify(td);
  437         PROC_UNLOCK(p);
  438         td->td_pcb->pcb_flags |= PCB_FULLCTX;
  439         return (EJUSTRETURN);
  440 }
  441 
  442 #ifdef COMPAT_FREEBSD4
  443 int
  444 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
  445 {
  446  
  447         return sigreturn(td, (struct sigreturn_args *)uap);
  448 }
  449 #endif
  450 
  451 
  452 /*
  453  * Machine dependent boot() routine
  454  *
  455  * I haven't seen anything to put here yet
  456  * Possibly some stuff might be grafted back here from boot()
  457  */
  458 void
  459 cpu_boot(int howto)
  460 {
  461 }
  462 
  463 /* Get current clock frequency for the given cpu id. */
  464 int
  465 cpu_est_clockrate(int cpu_id, uint64_t *rate)
  466 {
  467         register_t reg;
  468         uint64_t tsc1, tsc2;
  469 
  470         if (pcpu_find(cpu_id) == NULL || rate == NULL)
  471                 return (EINVAL);
  472 
  473         /* If we're booting, trust the rate calibrated moments ago. */
  474         if (cold) {
  475                 *rate = tsc_freq;
  476                 return (0);
  477         }
  478 
  479 #ifdef SMP
  480         /* Schedule ourselves on the indicated cpu. */
  481         mtx_lock_spin(&sched_lock);
  482         sched_bind(curthread, cpu_id);
  483         mtx_unlock_spin(&sched_lock);
  484 #endif
  485 
  486         /* Calibrate by measuring a short delay. */
  487         reg = intr_disable();
  488         tsc1 = rdtsc();
  489         DELAY(1000);
  490         tsc2 = rdtsc();
  491         intr_restore(reg);
  492 
  493 #ifdef SMP
  494         mtx_lock_spin(&sched_lock);
  495         sched_unbind(curthread);
  496         mtx_unlock_spin(&sched_lock);
  497 #endif
  498 
  499         /*
  500          * Calculate the difference in readings, convert to Mhz, and
  501          * subtract 0.5% of the total.  Empirical testing has shown that
  502          * overhead in DELAY() works out to approximately this value.
  503          */
  504         tsc2 -= tsc1;
  505         *rate = tsc2 * 1000 - tsc2 * 5;
  506         return (0);
  507 }
  508 
  509 /*
  510  * Shutdown the CPU as much as possible
  511  */
  512 void
  513 cpu_halt(void)
  514 {
  515         for (;;)
  516                 __asm__ ("hlt");
  517 }
  518 
  519 /*
  520  * Hook to idle the CPU when possible.  In the SMP case we default to
  521  * off because a halted cpu will not currently pick up a new thread in the
  522  * run queue until the next timer tick.  If turned on this will result in
  523  * approximately a 4.2% loss in real time performance in buildworld tests
  524  * (but improves user and sys times oddly enough), and saves approximately
  525  * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
  526  *
  527  * XXX we need to have a cpu mask of idle cpus and generate an IPI or
  528  * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
  529  * Then we can have our cake and eat it too.
  530  *
  531  * XXX I'm turning it on for SMP as well by default for now.  It seems to
  532  * help lock contention somewhat, and this is critical for HTT. -Peter
  533  */
  534 static int      cpu_idle_hlt = 1;
  535 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
  536     &cpu_idle_hlt, 0, "Idle loop HLT enable");
  537 
  538 static void
  539 cpu_idle_default(void)
  540 {
  541         /*
  542          * we must absolutely guarentee that hlt is the
  543          * absolute next instruction after sti or we
  544          * introduce a timing window.
  545          */
  546         __asm __volatile("sti; hlt");
  547 }
  548 
  549 /*
  550  * Note that we have to be careful here to avoid a race between checking
  551  * sched_runnable() and actually halting.  If we don't do this, we may waste
  552  * the time between calling hlt and the next interrupt even though there
  553  * is a runnable process.
  554  */
  555 void
  556 cpu_idle(void)
  557 {
  558 
  559 #ifdef SMP
  560         if (mp_grab_cpu_hlt())
  561                 return;
  562 #endif
  563         if (cpu_idle_hlt) {
  564                 disable_intr();
  565                 if (sched_runnable())
  566                         enable_intr();
  567                 else
  568                         (*cpu_idle_hook)();
  569         }
  570 }
  571 
  572 /* Other subsystems (e.g., ACPI) can hook this later. */
  573 void (*cpu_idle_hook)(void) = cpu_idle_default;
  574 
  575 /*
  576  * Clear registers on exec
  577  */
  578 void
  579 exec_setregs(td, entry, stack, ps_strings)
  580         struct thread *td;
  581         u_long entry;
  582         u_long stack;
  583         u_long ps_strings;
  584 {
  585         struct trapframe *regs = td->td_frame;
  586         struct pcb *pcb = td->td_pcb;
  587         
  588         critical_enter();
  589         wrmsr(MSR_FSBASE, 0);
  590         wrmsr(MSR_KGSBASE, 0);  /* User value while we're in the kernel */
  591         pcb->pcb_fsbase = 0;
  592         pcb->pcb_gsbase = 0;
  593         critical_exit();
  594         load_ds(_udatasel);
  595         load_es(_udatasel);
  596         load_fs(_udatasel);
  597         load_gs(_udatasel);
  598         pcb->pcb_ds = _udatasel;
  599         pcb->pcb_es = _udatasel;
  600         pcb->pcb_fs = _udatasel;
  601         pcb->pcb_gs = _udatasel;
  602 
  603         bzero((char *)regs, sizeof(struct trapframe));
  604         regs->tf_rip = entry;
  605         regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
  606         regs->tf_rdi = stack;           /* argv */
  607         regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
  608         regs->tf_ss = _udatasel;
  609         regs->tf_cs = _ucodesel;
  610 
  611         /*
  612          * Reset the hardware debug registers if they were in use.
  613          * They won't have any meaning for the newly exec'd process.
  614          */
  615         if (pcb->pcb_flags & PCB_DBREGS) {
  616                 pcb->pcb_dr0 = 0;
  617                 pcb->pcb_dr1 = 0;
  618                 pcb->pcb_dr2 = 0;
  619                 pcb->pcb_dr3 = 0;
  620                 pcb->pcb_dr6 = 0;
  621                 pcb->pcb_dr7 = 0;
  622                 if (pcb == PCPU_GET(curpcb)) {
  623                         /*
  624                          * Clear the debug registers on the running
  625                          * CPU, otherwise they will end up affecting
  626                          * the next process we switch to.
  627                          */
  628                         reset_dbregs();
  629                 }
  630                 pcb->pcb_flags &= ~PCB_DBREGS;
  631         }
  632 
  633         /*
  634          * Drop the FP state if we hold it, so that the process gets a
  635          * clean FP state if it uses the FPU again.
  636          */
  637         fpstate_drop(td);
  638 }
  639 
  640 void
  641 cpu_setregs(void)
  642 {
  643         register_t cr0;
  644 
  645         cr0 = rcr0();
  646         /*
  647          * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
  648          * BSP.  See the comments there about why we set them.
  649          */
  650         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
  651         load_cr0(cr0);
  652 }
  653 
  654 static int
  655 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
  656 {
  657         int error;
  658         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
  659                 req);
  660         if (!error && req->newptr)
  661                 resettodr();
  662         return (error);
  663 }
  664 
  665 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
  666         &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
  667 
  668 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
  669         CTLFLAG_RW, &disable_rtc_set, 0, "");
  670 
  671 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
  672         CTLFLAG_RW, &wall_cmos_clock, 0, "");
  673 
  674 /*
  675  * Initialize amd64 and configure to run kernel
  676  */
  677 
  678 /*
  679  * Initialize segments & interrupt table
  680  */
  681 
  682 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor table */
  683 static struct gate_descriptor idt0[NIDT];
  684 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
  685 
  686 static char dblfault_stack[PAGE_SIZE] __aligned(16);
  687 
  688 struct amd64tss common_tss[MAXCPU];
  689 
  690 /* software prototypes -- in more palatable form */
  691 struct soft_segment_descriptor gdt_segs[] = {
  692 /* GNULL_SEL    0 Null Descriptor */
  693 {       0x0,                    /* segment base address  */
  694         0x0,                    /* length */
  695         0,                      /* segment type */
  696         0,                      /* segment descriptor priority level */
  697         0,                      /* segment descriptor present */
  698         0,                      /* long */
  699         0,                      /* default 32 vs 16 bit size */
  700         0                       /* limit granularity (byte/page units)*/ },
  701 /* GCODE_SEL    1 Code Descriptor for kernel */
  702 {       0x0,                    /* segment base address  */
  703         0xfffff,                /* length - all address space */
  704         SDT_MEMERA,             /* segment type */
  705         SEL_KPL,                /* segment descriptor priority level */
  706         1,                      /* segment descriptor present */
  707         1,                      /* long */
  708         0,                      /* default 32 vs 16 bit size */
  709         1                       /* limit granularity (byte/page units)*/ },
  710 /* GDATA_SEL    2 Data Descriptor for kernel */
  711 {       0x0,                    /* segment base address  */
  712         0xfffff,                /* length - all address space */
  713         SDT_MEMRWA,             /* segment type */
  714         SEL_KPL,                /* segment descriptor priority level */
  715         1,                      /* segment descriptor present */
  716         1,                      /* long */
  717         0,                      /* default 32 vs 16 bit size */
  718         1                       /* limit granularity (byte/page units)*/ },
  719 /* GUCODE32_SEL 3 32 bit Code Descriptor for user */
  720 {       0x0,                    /* segment base address  */
  721         0xfffff,                /* length - all address space */
  722         SDT_MEMERA,             /* segment type */
  723         SEL_UPL,                /* segment descriptor priority level */
  724         1,                      /* segment descriptor present */
  725         0,                      /* long */
  726         1,                      /* default 32 vs 16 bit size */
  727         1                       /* limit granularity (byte/page units)*/ },
  728 /* GUDATA_SEL   4 32/64 bit Data Descriptor for user */
  729 {       0x0,                    /* segment base address  */
  730         0xfffff,                /* length - all address space */
  731         SDT_MEMRWA,             /* segment type */
  732         SEL_UPL,                /* segment descriptor priority level */
  733         1,                      /* segment descriptor present */
  734         0,                      /* long */
  735         1,                      /* default 32 vs 16 bit size */
  736         1                       /* limit granularity (byte/page units)*/ },
  737 /* GUCODE_SEL   5 64 bit Code Descriptor for user */
  738 {       0x0,                    /* segment base address  */
  739         0xfffff,                /* length - all address space */
  740         SDT_MEMERA,             /* segment type */
  741         SEL_UPL,                /* segment descriptor priority level */
  742         1,                      /* segment descriptor present */
  743         1,                      /* long */
  744         0,                      /* default 32 vs 16 bit size */
  745         1                       /* limit granularity (byte/page units)*/ },
  746 /* GPROC0_SEL   6 Proc 0 Tss Descriptor */
  747 {
  748         0x0,                    /* segment base address */
  749         sizeof(struct amd64tss)-1,/* length - all address space */
  750         SDT_SYSTSS,             /* segment type */
  751         SEL_KPL,                /* segment descriptor priority level */
  752         1,                      /* segment descriptor present */
  753         0,                      /* long */
  754         0,                      /* unused - default 32 vs 16 bit size */
  755         0                       /* limit granularity (byte/page units)*/ },
  756 /* Actually, the TSS is a system descriptor which is double size */
  757 {       0x0,                    /* segment base address  */
  758         0x0,                    /* length */
  759         0,                      /* segment type */
  760         0,                      /* segment descriptor priority level */
  761         0,                      /* segment descriptor present */
  762         0,                      /* long */
  763         0,                      /* default 32 vs 16 bit size */
  764         0                       /* limit granularity (byte/page units)*/ },
  765 };
  766 
  767 void
  768 setidt(idx, func, typ, dpl, ist)
  769         int idx;
  770         inthand_t *func;
  771         int typ;
  772         int dpl;
  773         int ist;
  774 {
  775         struct gate_descriptor *ip;
  776 
  777         ip = idt + idx;
  778         ip->gd_looffset = (uintptr_t)func;
  779         ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
  780         ip->gd_ist = ist;
  781         ip->gd_xx = 0;
  782         ip->gd_type = typ;
  783         ip->gd_dpl = dpl;
  784         ip->gd_p = 1;
  785         ip->gd_hioffset = ((uintptr_t)func)>>16 ;
  786 }
  787 
  788 #define IDTVEC(name)    __CONCAT(X,name)
  789 
  790 extern inthand_t
  791         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
  792         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
  793         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
  794         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
  795         IDTVEC(xmm), IDTVEC(dblfault),
  796         IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  797 
  798 void
  799 sdtossd(sd, ssd)
  800         struct user_segment_descriptor *sd;
  801         struct soft_segment_descriptor *ssd;
  802 {
  803 
  804         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
  805         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
  806         ssd->ssd_type  = sd->sd_type;
  807         ssd->ssd_dpl   = sd->sd_dpl;
  808         ssd->ssd_p     = sd->sd_p;
  809         ssd->ssd_long  = sd->sd_long;
  810         ssd->ssd_def32 = sd->sd_def32;
  811         ssd->ssd_gran  = sd->sd_gran;
  812 }
  813 
  814 void
  815 ssdtosd(ssd, sd)
  816         struct soft_segment_descriptor *ssd;
  817         struct user_segment_descriptor *sd;
  818 {
  819 
  820         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  821         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
  822         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  823         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  824         sd->sd_type  = ssd->ssd_type;
  825         sd->sd_dpl   = ssd->ssd_dpl;
  826         sd->sd_p     = ssd->ssd_p;
  827         sd->sd_long  = ssd->ssd_long;
  828         sd->sd_def32 = ssd->ssd_def32;
  829         sd->sd_gran  = ssd->ssd_gran;
  830 }
  831 
  832 void
  833 ssdtosyssd(ssd, sd)
  834         struct soft_segment_descriptor *ssd;
  835         struct system_segment_descriptor *sd;
  836 {
  837 
  838         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  839         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
  840         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  841         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  842         sd->sd_type  = ssd->ssd_type;
  843         sd->sd_dpl   = ssd->ssd_dpl;
  844         sd->sd_p     = ssd->ssd_p;
  845         sd->sd_gran  = ssd->ssd_gran;
  846 }
  847 
  848 #if !defined(DEV_ATPIC) && defined(DEV_ISA)
  849 #include <isa/isavar.h>
  850 u_int
  851 isa_irq_pending(void)
  852 {
  853 
  854         return (0);
  855 }
  856 #endif
  857 
  858 u_int basemem;
  859 
  860 /*
  861  * Populate the (physmap) array with base/bound pairs describing the
  862  * available physical memory in the system, then test this memory and
  863  * build the phys_avail array describing the actually-available memory.
  864  *
  865  * If we cannot accurately determine the physical memory map, then use
  866  * value from the 0xE801 call, and failing that, the RTC.
  867  *
  868  * Total memory size may be set by the kernel environment variable
  869  * hw.physmem or the compile-time define MAXMEM.
  870  *
  871  * XXX first should be vm_paddr_t.
  872  */
  873 static void
  874 getmemsize(caddr_t kmdp, u_int64_t first)
  875 {
  876         int i, physmap_idx, pa_indx, da_indx;
  877         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
  878         u_long physmem_tunable;
  879         pt_entry_t *pte;
  880         struct bios_smap *smapbase, *smap, *smapend;
  881         u_int32_t smapsize;
  882         quad_t dcons_addr, dcons_size;
  883 
  884         bzero(physmap, sizeof(physmap));
  885         basemem = 0;
  886         physmap_idx = 0;
  887 
  888         /*
  889          * get memory map from INT 15:E820, kindly supplied by the loader.
  890          *
  891          * subr_module.c says:
  892          * "Consumer may safely assume that size value precedes data."
  893          * ie: an int32_t immediately precedes smap.
  894          */
  895         smapbase = (struct bios_smap *)preload_search_info(kmdp,
  896             MODINFO_METADATA | MODINFOMD_SMAP);
  897         if (smapbase == NULL)
  898                 panic("No BIOS smap info from loader!");
  899 
  900         smapsize = *((u_int32_t *)smapbase - 1);
  901         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
  902 
  903         for (smap = smapbase; smap < smapend; smap++) {
  904                 if (boothowto & RB_VERBOSE)
  905                         printf("SMAP type=%02x base=%016lx len=%016lx\n",
  906                             smap->type, smap->base, smap->length);
  907 
  908                 if (smap->type != 0x01)
  909                         continue;
  910 
  911                 if (smap->length == 0)
  912                         continue;
  913 
  914                 for (i = 0; i <= physmap_idx; i += 2) {
  915                         if (smap->base < physmap[i + 1]) {
  916                                 if (boothowto & RB_VERBOSE)
  917                                         printf(
  918         "Overlapping or non-montonic memory region, ignoring second region\n");
  919                                 continue;
  920                         }
  921                 }
  922 
  923                 if (smap->base == physmap[physmap_idx + 1]) {
  924                         physmap[physmap_idx + 1] += smap->length;
  925                         continue;
  926                 }
  927 
  928                 physmap_idx += 2;
  929                 if (physmap_idx == PHYSMAP_SIZE) {
  930                         printf(
  931                 "Too many segments in the physical address map, giving up\n");
  932                         break;
  933                 }
  934                 physmap[physmap_idx] = smap->base;
  935                 physmap[physmap_idx + 1] = smap->base + smap->length;
  936         }
  937 
  938         /*
  939          * Find the 'base memory' segment for SMP
  940          */
  941         basemem = 0;
  942         for (i = 0; i <= physmap_idx; i += 2) {
  943                 if (physmap[i] == 0x00000000) {
  944                         basemem = physmap[i + 1] / 1024;
  945                         break;
  946                 }
  947         }
  948         if (basemem == 0)
  949                 panic("BIOS smap did not include a basemem segment!");
  950 
  951 #ifdef SMP
  952         /* make hole for AP bootstrap code */
  953         physmap[1] = mp_bootaddress(physmap[1] / 1024);
  954 #endif
  955 
  956         /*
  957          * Maxmem isn't the "maximum memory", it's one larger than the
  958          * highest page of the physical address space.  It should be
  959          * called something like "Maxphyspage".  We may adjust this
  960          * based on ``hw.physmem'' and the results of the memory test.
  961          */
  962         Maxmem = atop(physmap[physmap_idx + 1]);
  963 
  964 #ifdef MAXMEM
  965         Maxmem = MAXMEM / 4;
  966 #endif
  967 
  968         if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
  969                 Maxmem = atop(physmem_tunable);
  970 
  971         /*
  972          * Don't allow MAXMEM or hw.physmem to extend the amount of memory
  973          * in the system.
  974          */
  975         if (Maxmem > atop(physmap[physmap_idx + 1]))
  976                 Maxmem = atop(physmap[physmap_idx + 1]);
  977 
  978         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
  979             (boothowto & RB_VERBOSE))
  980                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
  981 
  982         /* call pmap initialization to make new kernel address space */
  983         pmap_bootstrap(&first);
  984 
  985         /*
  986          * Size up each available chunk of physical memory.
  987          */
  988         physmap[0] = PAGE_SIZE;         /* mask off page 0 */
  989         pa_indx = 0;
  990         da_indx = 1;
  991         phys_avail[pa_indx++] = physmap[0];
  992         phys_avail[pa_indx] = physmap[0];
  993         dump_avail[da_indx] = physmap[0];
  994         pte = CMAP1;
  995 
  996         /*
  997          * Get dcons buffer address
  998          */
  999         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 1000             getenv_quad("dcons.size", &dcons_size) == 0)
 1001                 dcons_addr = 0;
 1002 
 1003         /*
 1004          * physmap is in bytes, so when converting to page boundaries,
 1005          * round up the start address and round down the end address.
 1006          */
 1007         for (i = 0; i <= physmap_idx; i += 2) {
 1008                 vm_paddr_t end;
 1009 
 1010                 end = ptoa((vm_paddr_t)Maxmem);
 1011                 if (physmap[i + 1] < end)
 1012                         end = trunc_page(physmap[i + 1]);
 1013                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1014                         int tmp, page_bad, full;
 1015                         int *ptr = (int *)CADDR1;
 1016 
 1017                         full = FALSE;
 1018                         /*
 1019                          * block out kernel memory as not available.
 1020                          */
 1021                         if (pa >= 0x100000 && pa < first)
 1022                                 goto do_dump_avail;
 1023 
 1024                         /*
 1025                          * block out dcons buffer
 1026                          */
 1027                         if (dcons_addr > 0
 1028                             && pa >= trunc_page(dcons_addr)
 1029                             && pa < dcons_addr + dcons_size)
 1030                                 goto do_dump_avail;
 1031 
 1032                         page_bad = FALSE;
 1033 
 1034                         /*
 1035                          * map page into kernel: valid, read/write,non-cacheable
 1036                          */
 1037                         *pte = pa | PG_V | PG_RW | PG_N;
 1038                         invltlb();
 1039 
 1040                         tmp = *(int *)ptr;
 1041                         /*
 1042                          * Test for alternating 1's and 0's
 1043                          */
 1044                         *(volatile int *)ptr = 0xaaaaaaaa;
 1045                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 1046                                 page_bad = TRUE;
 1047                         /*
 1048                          * Test for alternating 0's and 1's
 1049                          */
 1050                         *(volatile int *)ptr = 0x55555555;
 1051                         if (*(volatile int *)ptr != 0x55555555)
 1052                                 page_bad = TRUE;
 1053                         /*
 1054                          * Test for all 1's
 1055                          */
 1056                         *(volatile int *)ptr = 0xffffffff;
 1057                         if (*(volatile int *)ptr != 0xffffffff)
 1058                                 page_bad = TRUE;
 1059                         /*
 1060                          * Test for all 0's
 1061                          */
 1062                         *(volatile int *)ptr = 0x0;
 1063                         if (*(volatile int *)ptr != 0x0)
 1064                                 page_bad = TRUE;
 1065                         /*
 1066                          * Restore original value.
 1067                          */
 1068                         *(int *)ptr = tmp;
 1069 
 1070                         /*
 1071                          * Adjust array of valid/good pages.
 1072                          */
 1073                         if (page_bad == TRUE)
 1074                                 continue;
 1075                         /*
 1076                          * If this good page is a continuation of the
 1077                          * previous set of good pages, then just increase
 1078                          * the end pointer. Otherwise start a new chunk.
 1079                          * Note that "end" points one higher than end,
 1080                          * making the range >= start and < end.
 1081                          * If we're also doing a speculative memory
 1082                          * test and we at or past the end, bump up Maxmem
 1083                          * so that we keep going. The first bad page
 1084                          * will terminate the loop.
 1085                          */
 1086                         if (phys_avail[pa_indx] == pa) {
 1087                                 phys_avail[pa_indx] += PAGE_SIZE;
 1088                         } else {
 1089                                 pa_indx++;
 1090                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1091                                         printf(
 1092                 "Too many holes in the physical address space, giving up\n");
 1093                                         pa_indx--;
 1094                                         full = TRUE;
 1095                                         goto do_dump_avail;
 1096                                 }
 1097                                 phys_avail[pa_indx++] = pa;     /* start */
 1098                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 1099                         }
 1100                         physmem++;
 1101 do_dump_avail:
 1102                         if (dump_avail[da_indx] == pa) {
 1103                                 dump_avail[da_indx] += PAGE_SIZE;
 1104                         } else {
 1105                                 da_indx++;
 1106                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 1107                                         da_indx--;
 1108                                         goto do_next;
 1109                                 }
 1110                                 dump_avail[da_indx++] = pa; /* start */
 1111                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 1112                         }
 1113 do_next:
 1114                         if (full)
 1115                                 break;
 1116                 }
 1117         }
 1118         *pte = 0;
 1119         invltlb();
 1120 
 1121         /*
 1122          * XXX
 1123          * The last chunk must contain at least one page plus the message
 1124          * buffer to avoid complicating other code (message buffer address
 1125          * calculation, etc.).
 1126          */
 1127         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1128             round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 1129                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1130                 phys_avail[pa_indx--] = 0;
 1131                 phys_avail[pa_indx--] = 0;
 1132         }
 1133 
 1134         Maxmem = atop(phys_avail[pa_indx]);
 1135 
 1136         /* Trim off space for the message buffer. */
 1137         phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 1138 
 1139         avail_end = phys_avail[pa_indx];
 1140 }
 1141 
 1142 u_int64_t
 1143 hammer_time(u_int64_t modulep, u_int64_t physfree)
 1144 {
 1145         caddr_t kmdp;
 1146         int gsel_tss, off, x;
 1147         struct pcpu *pc;
 1148         u_int64_t msr;
 1149         char *env;
 1150 
 1151         thread0.td_kstack = physfree + KERNBASE;
 1152         bzero((void *)thread0.td_kstack, KSTACK_PAGES * PAGE_SIZE);
 1153         physfree += KSTACK_PAGES * PAGE_SIZE;
 1154         thread0.td_pcb = (struct pcb *)
 1155            (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 1156 
 1157         /*
 1158          * This may be done better later if it gets more high level
 1159          * components in it. If so just link td->td_proc here.
 1160          */
 1161         proc_linkup(&proc0, &ksegrp0, &thread0);
 1162 
 1163         preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
 1164         preload_bootstrap_relocate(KERNBASE);
 1165         kmdp = preload_search_by_type("elf kernel");
 1166         if (kmdp == NULL)
 1167                 kmdp = preload_search_by_type("elf64 kernel");
 1168         boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 1169         kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE;
 1170 #ifdef DDB
 1171         ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 1172         ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 1173 #endif
 1174 
 1175         /* Init basic tunables, hz etc */
 1176         init_param1();
 1177 
 1178         /*
 1179          * make gdt memory segments
 1180          */
 1181         gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
 1182 
 1183         for (x = 0; x < NGDT; x++) {
 1184                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
 1185                         ssdtosd(&gdt_segs[x], &gdt[x]);
 1186         }
 1187         ssdtosyssd(&gdt_segs[GPROC0_SEL],
 1188             (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1189 
 1190         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 1191         r_gdt.rd_base =  (long) gdt;
 1192         lgdt(&r_gdt);
 1193         pc = &__pcpu[0];
 1194 
 1195         wrmsr(MSR_FSBASE, 0);           /* User value */
 1196         wrmsr(MSR_GSBASE, (u_int64_t)pc);
 1197         wrmsr(MSR_KGSBASE, 0);          /* User value while in the kernel */
 1198 
 1199         pcpu_init(pc, 0, sizeof(struct pcpu));
 1200         PCPU_SET(prvspace, pc);
 1201         PCPU_SET(curthread, &thread0);
 1202         PCPU_SET(curpcb, thread0.td_pcb);
 1203         PCPU_SET(tssp, &common_tss[0]);
 1204 
 1205         /*
 1206          * Initialize mutexes.
 1207          *
 1208          * icu_lock: in order to allow an interrupt to occur in a critical
 1209          *           section, to set pcpu->ipending (etc...) properly, we
 1210          *           must be able to get the icu lock, so it can't be
 1211          *           under witness.
 1212          */
 1213         mutex_init();
 1214         mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
 1215         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 1216 
 1217         /* exceptions */
 1218         for (x = 0; x < NIDT; x++)
 1219                 setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
 1220         setidt(IDT_DE, &IDTVEC(div),  SDT_SYSIGT, SEL_KPL, 0);
 1221         setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYSIGT, SEL_KPL, 0);
 1222         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 0);
 1223         setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYSIGT, SEL_UPL, 0);
 1224         setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYSIGT, SEL_KPL, 0);
 1225         setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYSIGT, SEL_KPL, 0);
 1226         setidt(IDT_UD, &IDTVEC(ill),  SDT_SYSIGT, SEL_KPL, 0);
 1227         setidt(IDT_NM, &IDTVEC(dna),  SDT_SYSIGT, SEL_KPL, 0);
 1228         setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
 1229         setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYSIGT, SEL_KPL, 0);
 1230         setidt(IDT_TS, &IDTVEC(tss),  SDT_SYSIGT, SEL_KPL, 0);
 1231         setidt(IDT_NP, &IDTVEC(missing),  SDT_SYSIGT, SEL_KPL, 0);
 1232         setidt(IDT_SS, &IDTVEC(stk),  SDT_SYSIGT, SEL_KPL, 0);
 1233         setidt(IDT_GP, &IDTVEC(prot),  SDT_SYSIGT, SEL_KPL, 0);
 1234         setidt(IDT_PF, &IDTVEC(page),  SDT_SYSIGT, SEL_KPL, 0);
 1235         setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYSIGT, SEL_KPL, 0);
 1236         setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
 1237         setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYSIGT, SEL_KPL, 0);
 1238         setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
 1239 
 1240         r_idt.rd_limit = sizeof(idt0) - 1;
 1241         r_idt.rd_base = (long) idt;
 1242         lidt(&r_idt);
 1243 
 1244         /*
 1245          * Initialize the console before we print anything out.
 1246          */
 1247         cninit();
 1248 
 1249 #ifdef DEV_ISA
 1250 #ifdef DEV_ATPIC
 1251         elcr_probe();
 1252         atpic_startup();
 1253 #else
 1254         /* Reset and mask the atpics and leave them shut down. */
 1255         atpic_reset();
 1256 
 1257         /*
 1258          * Point the ICU spurious interrupt vectors at the APIC spurious
 1259          * interrupt handler.
 1260          */
 1261         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1262         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1263 #endif
 1264 #else
 1265 #error "have you forgotten the isa device?";
 1266 #endif
 1267 
 1268         kdb_init();
 1269 
 1270 #ifdef KDB
 1271         if (boothowto & RB_KDB)
 1272                 kdb_enter("Boot flags requested debugger");
 1273 #endif
 1274 
 1275         identify_cpu();         /* Final stage of CPU initialization */
 1276         initializecpu();        /* Initialize CPU registers */
 1277 
 1278         /* make an initial tss so cpu can get interrupt stack on syscall! */
 1279         common_tss[0].tss_rsp0 = thread0.td_kstack + \
 1280             KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb);
 1281         /* Ensure the stack is aligned to 16 bytes */
 1282         common_tss[0].tss_rsp0 &= ~0xFul;
 1283         PCPU_SET(rsp0, common_tss[0].tss_rsp0);
 1284 
 1285         /* doublefault stack space, runs on ist1 */
 1286         common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
 1287 
 1288         /* Set the IO permission bitmap (empty due to tss seg limit) */
 1289         common_tss[0].tss_iobase = sizeof(struct amd64tss);
 1290 
 1291         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 1292         ltr(gsel_tss);
 1293 
 1294         /* Set up the fast syscall stuff */
 1295         msr = rdmsr(MSR_EFER) | EFER_SCE;
 1296         wrmsr(MSR_EFER, msr);
 1297         wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
 1298         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 1299         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 1300               ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 1301         wrmsr(MSR_STAR, msr);
 1302         wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
 1303 
 1304         getmemsize(kmdp, physfree);
 1305         init_param2(physmem);
 1306 
 1307         /* now running on new page tables, configured,and u/iom is accessible */
 1308 
 1309         /* Map the message buffer. */
 1310         for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 1311                 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 1312 
 1313         msgbufinit(msgbufp, MSGBUF_SIZE);
 1314         fpuinit();
 1315 
 1316         /* transfer to user mode */
 1317 
 1318         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 1319         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 1320         _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
 1321 
 1322         /* setup proc 0's pcb */
 1323         thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
 1324         thread0.td_pcb->pcb_cr3 = KPML4phys;
 1325         thread0.td_frame = &proc0_tf;
 1326 
 1327         env = getenv("kernelname");
 1328         if (env != NULL)
 1329                 strlcpy(kernelname, env, sizeof(kernelname));
 1330 
 1331         /* Location of kernel stack for locore */
 1332         return ((u_int64_t)thread0.td_pcb);
 1333 }
 1334 
 1335 void
 1336 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 1337 {
 1338 
 1339         pcpu->pc_acpi_id = 0xffffffff;
 1340 }
 1341 
 1342 void
 1343 spinlock_enter(void)
 1344 {
 1345         struct thread *td;
 1346 
 1347         td = curthread;
 1348         if (td->td_md.md_spinlock_count == 0)
 1349                 td->td_md.md_saved_flags = intr_disable();
 1350         td->td_md.md_spinlock_count++;
 1351         critical_enter();
 1352 }
 1353 
 1354 void
 1355 spinlock_exit(void)
 1356 {
 1357         struct thread *td;
 1358 
 1359         td = curthread;
 1360         critical_exit();
 1361         td->td_md.md_spinlock_count--;
 1362         if (td->td_md.md_spinlock_count == 0)
 1363                 intr_restore(td->td_md.md_saved_flags);
 1364 }
 1365 
 1366 /*
 1367  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 1368  * we want to start a backtrace from the function that caused us to enter
 1369  * the debugger. We have the context in the trapframe, but base the trace
 1370  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 1371  * enough for a backtrace.
 1372  */
 1373 void
 1374 makectx(struct trapframe *tf, struct pcb *pcb)
 1375 {
 1376 
 1377         pcb->pcb_r12 = tf->tf_r12;
 1378         pcb->pcb_r13 = tf->tf_r13;
 1379         pcb->pcb_r14 = tf->tf_r14;
 1380         pcb->pcb_r15 = tf->tf_r15;
 1381         pcb->pcb_rbp = tf->tf_rbp;
 1382         pcb->pcb_rbx = tf->tf_rbx;
 1383         pcb->pcb_rip = tf->tf_rip;
 1384         pcb->pcb_rsp = (ISPL(tf->tf_cs)) ? tf->tf_rsp : (long)(tf + 1) - 8;
 1385 }
 1386 
 1387 int
 1388 ptrace_set_pc(struct thread *td, unsigned long addr)
 1389 {
 1390         td->td_frame->tf_rip = addr;
 1391         return (0);
 1392 }
 1393 
 1394 int
 1395 ptrace_single_step(struct thread *td)
 1396 {
 1397         td->td_frame->tf_rflags |= PSL_T;
 1398         return (0);
 1399 }
 1400 
 1401 int
 1402 ptrace_clear_single_step(struct thread *td)
 1403 {
 1404         td->td_frame->tf_rflags &= ~PSL_T;
 1405         return (0);
 1406 }
 1407 
 1408 int
 1409 fill_regs(struct thread *td, struct reg *regs)
 1410 {
 1411         struct trapframe *tp;
 1412 
 1413         tp = td->td_frame;
 1414         regs->r_r15 = tp->tf_r15;
 1415         regs->r_r14 = tp->tf_r14;
 1416         regs->r_r13 = tp->tf_r13;
 1417         regs->r_r12 = tp->tf_r12;
 1418         regs->r_r11 = tp->tf_r11;
 1419         regs->r_r10 = tp->tf_r10;
 1420         regs->r_r9  = tp->tf_r9;
 1421         regs->r_r8  = tp->tf_r8;
 1422         regs->r_rdi = tp->tf_rdi;
 1423         regs->r_rsi = tp->tf_rsi;
 1424         regs->r_rbp = tp->tf_rbp;
 1425         regs->r_rbx = tp->tf_rbx;
 1426         regs->r_rdx = tp->tf_rdx;
 1427         regs->r_rcx = tp->tf_rcx;
 1428         regs->r_rax = tp->tf_rax;
 1429         regs->r_rip = tp->tf_rip;
 1430         regs->r_cs = tp->tf_cs;
 1431         regs->r_rflags = tp->tf_rflags;
 1432         regs->r_rsp = tp->tf_rsp;
 1433         regs->r_ss = tp->tf_ss;
 1434         return (0);
 1435 }
 1436 
 1437 int
 1438 set_regs(struct thread *td, struct reg *regs)
 1439 {
 1440         struct trapframe *tp;
 1441         register_t rflags;
 1442 
 1443         tp = td->td_frame;
 1444         rflags = regs->r_rflags & 0xffffffff;
 1445         if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 1446                 return (EINVAL);
 1447         tp->tf_r15 = regs->r_r15;
 1448         tp->tf_r14 = regs->r_r14;
 1449         tp->tf_r13 = regs->r_r13;
 1450         tp->tf_r12 = regs->r_r12;
 1451         tp->tf_r11 = regs->r_r11;
 1452         tp->tf_r10 = regs->r_r10;
 1453         tp->tf_r9  = regs->r_r9;
 1454         tp->tf_r8  = regs->r_r8;
 1455         tp->tf_rdi = regs->r_rdi;
 1456         tp->tf_rsi = regs->r_rsi;
 1457         tp->tf_rbp = regs->r_rbp;
 1458         tp->tf_rbx = regs->r_rbx;
 1459         tp->tf_rdx = regs->r_rdx;
 1460         tp->tf_rcx = regs->r_rcx;
 1461         tp->tf_rax = regs->r_rax;
 1462         tp->tf_rip = regs->r_rip;
 1463         tp->tf_cs = regs->r_cs;
 1464         tp->tf_rflags = rflags;
 1465         tp->tf_rsp = regs->r_rsp;
 1466         tp->tf_ss = regs->r_ss;
 1467         td->td_pcb->pcb_flags |= PCB_FULLCTX;
 1468         return (0);
 1469 }
 1470 
 1471 /* XXX check all this stuff! */
 1472 /* externalize from sv_xmm */
 1473 static void
 1474 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 1475 {
 1476         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 1477         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 1478         int i;
 1479 
 1480         /* pcb -> fpregs */
 1481         bzero(fpregs, sizeof(*fpregs));
 1482 
 1483         /* FPU control/status */
 1484         penv_fpreg->en_cw = penv_xmm->en_cw;
 1485         penv_fpreg->en_sw = penv_xmm->en_sw;
 1486         penv_fpreg->en_tw = penv_xmm->en_tw;
 1487         penv_fpreg->en_opcode = penv_xmm->en_opcode;
 1488         penv_fpreg->en_rip = penv_xmm->en_rip;
 1489         penv_fpreg->en_rdp = penv_xmm->en_rdp;
 1490         penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 1491         penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 1492 
 1493         /* FPU registers */
 1494         for (i = 0; i < 8; ++i)
 1495                 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 1496 
 1497         /* SSE registers */
 1498         for (i = 0; i < 16; ++i)
 1499                 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 1500 }
 1501 
 1502 /* internalize from fpregs into sv_xmm */
 1503 static void
 1504 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 1505 {
 1506         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 1507         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 1508         int i;
 1509 
 1510         /* fpregs -> pcb */
 1511         /* FPU control/status */
 1512         penv_xmm->en_cw = penv_fpreg->en_cw;
 1513         penv_xmm->en_sw = penv_fpreg->en_sw;
 1514         penv_xmm->en_tw = penv_fpreg->en_tw;
 1515         penv_xmm->en_opcode = penv_fpreg->en_opcode;
 1516         penv_xmm->en_rip = penv_fpreg->en_rip;
 1517         penv_xmm->en_rdp = penv_fpreg->en_rdp;
 1518         penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 1519         penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 1520 
 1521         /* FPU registers */
 1522         for (i = 0; i < 8; ++i)
 1523                 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 1524 
 1525         /* SSE registers */
 1526         for (i = 0; i < 16; ++i)
 1527                 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 1528 }
 1529 
 1530 /* externalize from td->pcb */
 1531 int
 1532 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 1533 {
 1534 
 1535         fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs);
 1536         return (0);
 1537 }
 1538 
 1539 /* internalize to td->pcb */
 1540 int
 1541 set_fpregs(struct thread *td, struct fpreg *fpregs)
 1542 {
 1543 
 1544         set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save);
 1545         return (0);
 1546 }
 1547 
 1548 /*
 1549  * Get machine context.
 1550  */
 1551 int
 1552 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 1553 {
 1554         struct trapframe *tp;
 1555 
 1556         tp = td->td_frame;
 1557         PROC_LOCK(curthread->td_proc);
 1558         mcp->mc_onstack = sigonstack(tp->tf_rsp);
 1559         PROC_UNLOCK(curthread->td_proc);
 1560         mcp->mc_r15 = tp->tf_r15;
 1561         mcp->mc_r14 = tp->tf_r14;
 1562         mcp->mc_r13 = tp->tf_r13;
 1563         mcp->mc_r12 = tp->tf_r12;
 1564         mcp->mc_r11 = tp->tf_r11;
 1565         mcp->mc_r10 = tp->tf_r10;
 1566         mcp->mc_r9  = tp->tf_r9;
 1567         mcp->mc_r8  = tp->tf_r8;
 1568         mcp->mc_rdi = tp->tf_rdi;
 1569         mcp->mc_rsi = tp->tf_rsi;
 1570         mcp->mc_rbp = tp->tf_rbp;
 1571         mcp->mc_rbx = tp->tf_rbx;
 1572         mcp->mc_rcx = tp->tf_rcx;
 1573         mcp->mc_rflags = tp->tf_rflags;
 1574         if (flags & GET_MC_CLEAR_RET) {
 1575                 mcp->mc_rax = 0;
 1576                 mcp->mc_rdx = 0;
 1577                 mcp->mc_rflags &= ~PSL_C;
 1578         } else {
 1579                 mcp->mc_rax = tp->tf_rax;
 1580                 mcp->mc_rdx = tp->tf_rdx;
 1581         }
 1582         mcp->mc_rip = tp->tf_rip;
 1583         mcp->mc_cs = tp->tf_cs;
 1584         mcp->mc_rsp = tp->tf_rsp;
 1585         mcp->mc_ss = tp->tf_ss;
 1586         mcp->mc_len = sizeof(*mcp);
 1587         get_fpcontext(td, mcp);
 1588         return (0);
 1589 }
 1590 
 1591 /*
 1592  * Set machine context.
 1593  *
 1594  * However, we don't set any but the user modifiable flags, and we won't
 1595  * touch the cs selector.
 1596  */
 1597 int
 1598 set_mcontext(struct thread *td, const mcontext_t *mcp)
 1599 {
 1600         struct trapframe *tp;
 1601         long rflags;
 1602         int ret;
 1603 
 1604         tp = td->td_frame;
 1605         if (mcp->mc_len != sizeof(*mcp))
 1606                 return (EINVAL);
 1607         rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 1608             (tp->tf_rflags & ~PSL_USERCHANGE);
 1609         ret = set_fpcontext(td, mcp);
 1610         if (ret != 0)
 1611                 return (ret);
 1612         tp->tf_r15 = mcp->mc_r15;
 1613         tp->tf_r14 = mcp->mc_r14;
 1614         tp->tf_r13 = mcp->mc_r13;
 1615         tp->tf_r12 = mcp->mc_r12;
 1616         tp->tf_r11 = mcp->mc_r11;
 1617         tp->tf_r10 = mcp->mc_r10;
 1618         tp->tf_r9  = mcp->mc_r9;
 1619         tp->tf_r8  = mcp->mc_r8;
 1620         tp->tf_rdi = mcp->mc_rdi;
 1621         tp->tf_rsi = mcp->mc_rsi;
 1622         tp->tf_rbp = mcp->mc_rbp;
 1623         tp->tf_rbx = mcp->mc_rbx;
 1624         tp->tf_rdx = mcp->mc_rdx;
 1625         tp->tf_rcx = mcp->mc_rcx;
 1626         tp->tf_rax = mcp->mc_rax;
 1627         tp->tf_rip = mcp->mc_rip;
 1628         tp->tf_rflags = rflags;
 1629         tp->tf_rsp = mcp->mc_rsp;
 1630         tp->tf_ss = mcp->mc_ss;
 1631         td->td_pcb->pcb_flags |= PCB_FULLCTX;
 1632         return (0);
 1633 }
 1634 
 1635 static void
 1636 get_fpcontext(struct thread *td, mcontext_t *mcp)
 1637 {
 1638 
 1639         mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate);
 1640         mcp->mc_fpformat = fpuformat();
 1641 }
 1642 
 1643 static int
 1644 set_fpcontext(struct thread *td, const mcontext_t *mcp)
 1645 {
 1646         struct savefpu *fpstate;
 1647 
 1648         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 1649                 return (0);
 1650         else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 1651                 return (EINVAL);
 1652         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
 1653                 /* We don't care what state is left in the FPU or PCB. */
 1654                 fpstate_drop(td);
 1655         else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 1656             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 1657                 /*
 1658                  * XXX we violate the dubious requirement that fpusetregs()
 1659                  * be called with interrupts disabled.
 1660                  * XXX obsolete on trap-16 systems?
 1661                  */
 1662                 fpstate = (struct savefpu *)&mcp->mc_fpstate;
 1663                 fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
 1664                 fpusetregs(td, fpstate);
 1665         } else
 1666                 return (EINVAL);
 1667         return (0);
 1668 }
 1669 
 1670 void
 1671 fpstate_drop(struct thread *td)
 1672 {
 1673         register_t s;
 1674 
 1675         s = intr_disable();
 1676         if (PCPU_GET(fpcurthread) == td)
 1677                 fpudrop();
 1678         /*
 1679          * XXX force a full drop of the fpu.  The above only drops it if we
 1680          * owned it.
 1681          *
 1682          * XXX I don't much like fpugetregs()'s semantics of doing a full
 1683          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 1684          * We only need to drop to !PCB_INITDONE in sendsig().  But
 1685          * sendsig() is the only caller of fpugetregs()... perhaps we just
 1686          * have too many layers.
 1687          */
 1688         curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE;
 1689         intr_restore(s);
 1690 }
 1691 
 1692 int
 1693 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 1694 {
 1695         struct pcb *pcb;
 1696 
 1697         if (td == NULL) {
 1698                 dbregs->dr[0] = rdr0();
 1699                 dbregs->dr[1] = rdr1();
 1700                 dbregs->dr[2] = rdr2();
 1701                 dbregs->dr[3] = rdr3();
 1702                 dbregs->dr[6] = rdr6();
 1703                 dbregs->dr[7] = rdr7();
 1704         } else {
 1705                 pcb = td->td_pcb;
 1706                 dbregs->dr[0] = pcb->pcb_dr0;
 1707                 dbregs->dr[1] = pcb->pcb_dr1;
 1708                 dbregs->dr[2] = pcb->pcb_dr2;
 1709                 dbregs->dr[3] = pcb->pcb_dr3;
 1710                 dbregs->dr[6] = pcb->pcb_dr6;
 1711                 dbregs->dr[7] = pcb->pcb_dr7;
 1712         }
 1713         dbregs->dr[4] = 0;
 1714         dbregs->dr[5] = 0;
 1715         dbregs->dr[8] = 0;
 1716         dbregs->dr[9] = 0;
 1717         dbregs->dr[10] = 0;
 1718         dbregs->dr[11] = 0;
 1719         dbregs->dr[12] = 0;
 1720         dbregs->dr[13] = 0;
 1721         dbregs->dr[14] = 0;
 1722         dbregs->dr[15] = 0;
 1723         return (0);
 1724 }
 1725 
 1726 int
 1727 set_dbregs(struct thread *td, struct dbreg *dbregs)
 1728 {
 1729         struct pcb *pcb;
 1730         int i;
 1731         u_int64_t mask1, mask2;
 1732 
 1733         if (td == NULL) {
 1734                 load_dr0(dbregs->dr[0]);
 1735                 load_dr1(dbregs->dr[1]);
 1736                 load_dr2(dbregs->dr[2]);
 1737                 load_dr3(dbregs->dr[3]);
 1738                 load_dr6(dbregs->dr[6]);
 1739                 load_dr7(dbregs->dr[7]);
 1740         } else {
 1741                 /*
 1742                  * Don't let an illegal value for dr7 get set.  Specifically,
 1743                  * check for undefined settings.  Setting these bit patterns
 1744                  * result in undefined behaviour and can lead to an unexpected
 1745                  * TRCTRAP or a general protection fault right here.
 1746                  * Upper bits of dr6 and dr7 must not be set
 1747                  */
 1748                 for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8;
 1749                      i++, mask1 <<= 2, mask2 <<= 2)
 1750                         if ((dbregs->dr[7] & mask1) == mask2)
 1751                                 return (EINVAL);
 1752                 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 1753                     (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 1754                         return (EINVAL);
 1755 
 1756                 pcb = td->td_pcb;
 1757 
 1758                 /*
 1759                  * Don't let a process set a breakpoint that is not within the
 1760                  * process's address space.  If a process could do this, it
 1761                  * could halt the system by setting a breakpoint in the kernel
 1762                  * (if ddb was enabled).  Thus, we need to check to make sure
 1763                  * that no breakpoints are being enabled for addresses outside
 1764                  * process's address space.
 1765                  *
 1766                  * XXX - what about when the watched area of the user's
 1767                  * address space is written into from within the kernel
 1768                  * ... wouldn't that still cause a breakpoint to be generated
 1769                  * from within kernel mode?
 1770                  */
 1771 
 1772                 if (dbregs->dr[7] & 0x3) {
 1773                         /* dr0 is enabled */
 1774                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 1775                                 return (EINVAL);
 1776                 }
 1777                 if (dbregs->dr[7] & 0x3<<2) {
 1778                         /* dr1 is enabled */
 1779                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 1780                                 return (EINVAL);
 1781                 }
 1782                 if (dbregs->dr[7] & 0x3<<4) {
 1783                         /* dr2 is enabled */
 1784                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 1785                                 return (EINVAL);
 1786                 }
 1787                 if (dbregs->dr[7] & 0x3<<6) {
 1788                         /* dr3 is enabled */
 1789                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 1790                                 return (EINVAL);
 1791                 }
 1792 
 1793                 pcb->pcb_dr0 = dbregs->dr[0];
 1794                 pcb->pcb_dr1 = dbregs->dr[1];
 1795                 pcb->pcb_dr2 = dbregs->dr[2];
 1796                 pcb->pcb_dr3 = dbregs->dr[3];
 1797                 pcb->pcb_dr6 = dbregs->dr[6];
 1798                 pcb->pcb_dr7 = dbregs->dr[7];
 1799 
 1800                 pcb->pcb_flags |= PCB_DBREGS;
 1801         }
 1802 
 1803         return (0);
 1804 }
 1805 
 1806 void
 1807 reset_dbregs(void)
 1808 {
 1809 
 1810         load_dr7(0);    /* Turn off the control bits first */
 1811         load_dr0(0);
 1812         load_dr1(0);
 1813         load_dr2(0);
 1814         load_dr3(0);
 1815         load_dr6(0);
 1816 }
 1817 
 1818 /*
 1819  * Return > 0 if a hardware breakpoint has been hit, and the
 1820  * breakpoint was in user space.  Return 0, otherwise.
 1821  */
 1822 int
 1823 user_dbreg_trap(void)
 1824 {
 1825         u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */
 1826         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
 1827         int nbp;            /* number of breakpoints that triggered */
 1828         caddr_t addr[4];    /* breakpoint addresses */
 1829         int i;
 1830         
 1831         dr7 = rdr7();
 1832         if ((dr7 & 0x000000ff) == 0) {
 1833                 /*
 1834                  * all GE and LE bits in the dr7 register are zero,
 1835                  * thus the trap couldn't have been caused by the
 1836                  * hardware debug registers
 1837                  */
 1838                 return 0;
 1839         }
 1840 
 1841         nbp = 0;
 1842         dr6 = rdr6();
 1843         bp = dr6 & 0x0000000f;
 1844 
 1845         if (!bp) {
 1846                 /*
 1847                  * None of the breakpoint bits are set meaning this
 1848                  * trap was not caused by any of the debug registers
 1849                  */
 1850                 return 0;
 1851         }
 1852 
 1853         /*
 1854          * at least one of the breakpoints were hit, check to see
 1855          * which ones and if any of them are user space addresses
 1856          */
 1857 
 1858         if (bp & 0x01) {
 1859                 addr[nbp++] = (caddr_t)rdr0();
 1860         }
 1861         if (bp & 0x02) {
 1862                 addr[nbp++] = (caddr_t)rdr1();
 1863         }
 1864         if (bp & 0x04) {
 1865                 addr[nbp++] = (caddr_t)rdr2();
 1866         }
 1867         if (bp & 0x08) {
 1868                 addr[nbp++] = (caddr_t)rdr3();
 1869         }
 1870 
 1871         for (i=0; i<nbp; i++) {
 1872                 if (addr[i] <
 1873                     (caddr_t)VM_MAXUSER_ADDRESS) {
 1874                         /*
 1875                          * addr[i] is in user space
 1876                          */
 1877                         return nbp;
 1878                 }
 1879         }
 1880 
 1881         /*
 1882          * None of the breakpoints are in user space.
 1883          */
 1884         return 0;
 1885 }
 1886 
 1887 #ifdef KDB
 1888 
 1889 /*
 1890  * Provide inb() and outb() as functions.  They are normally only
 1891  * available as macros calling inlined functions, thus cannot be
 1892  * called from the debugger.
 1893  *
 1894  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
 1895  */
 1896 
 1897 #undef inb
 1898 #undef outb
 1899 
 1900 /* silence compiler warnings */
 1901 u_char inb(u_int);
 1902 void outb(u_int, u_char);
 1903 
 1904 u_char
 1905 inb(u_int port)
 1906 {
 1907         u_char  data;
 1908         /*
 1909          * We use %%dx and not %1 here because i/o is done at %dx and not at
 1910          * %edx, while gcc generates inferior code (movw instead of movl)
 1911          * if we tell it to load (u_short) port.
 1912          */
 1913         __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 1914         return (data);
 1915 }
 1916 
 1917 void
 1918 outb(u_int port, u_char data)
 1919 {
 1920         u_char  al;
 1921         /*
 1922          * Use an unnecessary assignment to help gcc's register allocator.
 1923          * This make a large difference for gcc-1.40 and a tiny difference
 1924          * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 1925          * best results.  gcc-2.6.0 can't handle this.
 1926          */
 1927         al = data;
 1928         __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 1929 }
 1930 
 1931 #endif /* KDB */

Cache object: d358ff8483dbe1ce0d180f16c90f69a2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.