The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 2003 Peter Wemm.
    5  * Copyright (c) 1992 Terrence R. Lambert.
    6  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * William Jolitz.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by the University of
   23  *      California, Berkeley and its contributors.
   24  * 4. Neither the name of the University nor the names of its contributors
   25  *    may be used to endorse or promote products derived from this software
   26  *    without specific prior written permission.
   27  *
   28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   38  * SUCH DAMAGE.
   39  *
   40  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   41  */
   42 
   43 #include <sys/cdefs.h>
   44 __FBSDID("$FreeBSD$");
   45 
   46 #include "opt_atpic.h"
   47 #include "opt_cpu.h"
   48 #include "opt_ddb.h"
   49 #include "opt_inet.h"
   50 #include "opt_isa.h"
   51 #include "opt_kstack_pages.h"
   52 #include "opt_maxmem.h"
   53 #include "opt_mp_watchdog.h"
   54 #include "opt_pci.h"
   55 #include "opt_platform.h"
   56 #include "opt_sched.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/proc.h>
   60 #include <sys/systm.h>
   61 #include <sys/bio.h>
   62 #include <sys/buf.h>
   63 #include <sys/bus.h>
   64 #include <sys/callout.h>
   65 #include <sys/cons.h>
   66 #include <sys/cpu.h>
   67 #include <sys/efi.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/exec.h>
   70 #include <sys/imgact.h>
   71 #include <sys/kdb.h>
   72 #include <sys/kernel.h>
   73 #include <sys/ktr.h>
   74 #include <sys/linker.h>
   75 #include <sys/lock.h>
   76 #include <sys/malloc.h>
   77 #include <sys/memrange.h>
   78 #include <sys/msgbuf.h>
   79 #include <sys/mutex.h>
   80 #include <sys/pcpu.h>
   81 #include <sys/ptrace.h>
   82 #include <sys/reboot.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/sched.h>
   85 #include <sys/signalvar.h>
   86 #ifdef SMP
   87 #include <sys/smp.h>
   88 #endif
   89 #include <sys/syscallsubr.h>
   90 #include <sys/sysctl.h>
   91 #include <sys/sysent.h>
   92 #include <sys/sysproto.h>
   93 #include <sys/ucontext.h>
   94 #include <sys/vmmeter.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_extern.h>
   98 #include <vm/vm_kern.h>
   99 #include <vm/vm_page.h>
  100 #include <vm/vm_map.h>
  101 #include <vm/vm_object.h>
  102 #include <vm/vm_pager.h>
  103 #include <vm/vm_param.h>
  104 #include <vm/vm_phys.h>
  105 
  106 #ifdef DDB
  107 #ifndef KDB
  108 #error KDB must be enabled in order for DDB to work!
  109 #endif
  110 #include <ddb/ddb.h>
  111 #include <ddb/db_sym.h>
  112 #endif
  113 
  114 #include <net/netisr.h>
  115 
  116 #include <machine/clock.h>
  117 #include <machine/cpu.h>
  118 #include <machine/cputypes.h>
  119 #include <machine/frame.h>
  120 #include <machine/intr_machdep.h>
  121 #include <x86/mca.h>
  122 #include <machine/md_var.h>
  123 #include <machine/metadata.h>
  124 #include <machine/mp_watchdog.h>
  125 #include <machine/pc/bios.h>
  126 #include <machine/pcb.h>
  127 #include <machine/proc.h>
  128 #include <machine/reg.h>
  129 #include <machine/sigframe.h>
  130 #include <machine/specialreg.h>
  131 #include <machine/trap.h>
  132 #include <machine/tss.h>
  133 #include <x86/ucode.h>
  134 #include <x86/ifunc.h>
  135 #ifdef SMP
  136 #include <machine/smp.h>
  137 #endif
  138 #ifdef FDT
  139 #include <x86/fdt.h>
  140 #endif
  141 
  142 #ifdef DEV_ATPIC
  143 #include <x86/isa/icu.h>
  144 #else
  145 #include <x86/apicvar.h>
  146 #endif
  147 
  148 #include <isa/isareg.h>
  149 #include <isa/rtc.h>
  150 #include <x86/init.h>
  151 
  152 /* Sanity check for __curthread() */
  153 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  154 
  155 /*
  156  * The PTI trampoline stack needs enough space for a hardware trapframe and a
  157  * couple of scratch registers, as well as the trapframe left behind after an
  158  * iret fault.
  159  */
  160 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) -
  161     offsetof(struct pti_frame, pti_rip));
  162 
  163 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
  164 
  165 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  166 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  167 
  168 static void cpu_startup(void *);
  169 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  170     char *xfpusave, size_t xfpusave_len);
  171 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  172     char *xfpustate, size_t xfpustate_len);
  173 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  174 
  175 /* Preload data parse function */
  176 static caddr_t native_parse_preload_data(u_int64_t);
  177 
  178 /* Native function to fetch and parse the e820 map */
  179 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *);
  180 
  181 /* Default init_ops implementation. */
  182 struct init_ops init_ops = {
  183         .parse_preload_data =   native_parse_preload_data,
  184         .early_clock_source_init =      i8254_init,
  185         .early_delay =                  i8254_delay,
  186         .parse_memmap =                 native_parse_memmap,
  187 #ifdef SMP
  188         .mp_bootaddress =               mp_bootaddress,
  189         .start_all_aps =                native_start_all_aps,
  190 #endif
  191 #ifdef DEV_PCI
  192         .msi_init =                     msi_init,
  193 #endif
  194 };
  195 
  196 /*
  197  * Physical address of the EFI System Table. Stashed from the metadata hints
  198  * passed into the kernel and used by the EFI code to call runtime services.
  199  */
  200 vm_paddr_t efi_systbl_phys;
  201 
  202 /* Intel ICH registers */
  203 #define ICH_PMBASE      0x400
  204 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  205 
  206 int     _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
  207 
  208 int cold = 1;
  209 
  210 long Maxmem = 0;
  211 long realmem = 0;
  212 
  213 /*
  214  * The number of PHYSMAP entries must be one less than the number of
  215  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  216  * physical address that is accessible by ISA DMA is split into two
  217  * PHYSSEG entries.
  218  */
  219 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  220 
  221 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  222 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  223 
  224 /* must be 2 less so 0 0 can signal end of chunks */
  225 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
  226 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
  227 
  228 struct kva_md_info kmi;
  229 
  230 static struct trapframe proc0_tf;
  231 struct region_descriptor r_idt;
  232 
  233 struct pcpu *__pcpu;
  234 struct pcpu temp_bsp_pcpu;
  235 
  236 struct mtx icu_lock;
  237 
  238 struct mem_range_softc mem_range_softc;
  239 
  240 struct mtx dt_lock;     /* lock for GDT and LDT */
  241 
  242 void (*vmm_resume_p)(void);
  243 
  244 static void
  245 cpu_startup(dummy)
  246         void *dummy;
  247 {
  248         uintmax_t memsize;
  249         char *sysenv;
  250 
  251         /*
  252          * On MacBooks, we need to disallow the legacy USB circuit to
  253          * generate an SMI# because this can cause several problems,
  254          * namely: incorrect CPU frequency detection and failure to
  255          * start the APs.
  256          * We do this by disabling a bit in the SMI_EN (SMI Control and
  257          * Enable register) of the Intel ICH LPC Interface Bridge. 
  258          */
  259         sysenv = kern_getenv("smbios.system.product");
  260         if (sysenv != NULL) {
  261                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  262                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  263                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  264                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  265                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  266                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  267                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  268                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  269                         if (bootverbose)
  270                                 printf("Disabling LEGACY_USB_EN bit on "
  271                                     "Intel ICH.\n");
  272                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  273                 }
  274                 freeenv(sysenv);
  275         }
  276 
  277         /*
  278          * Good {morning,afternoon,evening,night}.
  279          */
  280         startrtclock();
  281         printcpuinfo();
  282 
  283         /*
  284          * Display physical memory if SMBIOS reports reasonable amount.
  285          */
  286         memsize = 0;
  287         sysenv = kern_getenv("smbios.memory.enabled");
  288         if (sysenv != NULL) {
  289                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  290                 freeenv(sysenv);
  291         }
  292         if (memsize < ptoa((uintmax_t)vm_free_count()))
  293                 memsize = ptoa((uintmax_t)Maxmem);
  294         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  295         realmem = atop(memsize);
  296 
  297         /*
  298          * Display any holes after the first chunk of extended memory.
  299          */
  300         if (bootverbose) {
  301                 int indx;
  302 
  303                 printf("Physical memory chunk(s):\n");
  304                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  305                         vm_paddr_t size;
  306 
  307                         size = phys_avail[indx + 1] - phys_avail[indx];
  308                         printf(
  309                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  310                             (uintmax_t)phys_avail[indx],
  311                             (uintmax_t)phys_avail[indx + 1] - 1,
  312                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  313                 }
  314         }
  315 
  316         vm_ksubmap_init(&kmi);
  317 
  318         printf("avail memory = %ju (%ju MB)\n",
  319             ptoa((uintmax_t)vm_free_count()),
  320             ptoa((uintmax_t)vm_free_count()) / 1048576);
  321 #ifdef DEV_PCI
  322         if (bootverbose && intel_graphics_stolen_base != 0)
  323                 printf("intel stolen mem: base %#jx size %ju MB\n",
  324                     (uintmax_t)intel_graphics_stolen_base,
  325                     (uintmax_t)intel_graphics_stolen_size / 1024 / 1024);
  326 #endif
  327 
  328         /*
  329          * Set up buffers, so they can be used to read disk labels.
  330          */
  331         bufinit();
  332         vm_pager_bufferinit();
  333 
  334         cpu_setregs();
  335 }
  336 
  337 /*
  338  * Send an interrupt to process.
  339  *
  340  * Stack is set up to allow sigcode stored
  341  * at top to call routine, followed by call
  342  * to sigreturn routine below.  After sigreturn
  343  * resets the signal mask, the stack, and the
  344  * frame pointer, it returns to the user
  345  * specified pc, psl.
  346  */
  347 void
  348 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  349 {
  350         struct sigframe sf, *sfp;
  351         struct pcb *pcb;
  352         struct proc *p;
  353         struct thread *td;
  354         struct sigacts *psp;
  355         char *sp;
  356         struct trapframe *regs;
  357         char *xfpusave;
  358         size_t xfpusave_len;
  359         int sig;
  360         int oonstack;
  361 
  362         td = curthread;
  363         pcb = td->td_pcb;
  364         p = td->td_proc;
  365         PROC_LOCK_ASSERT(p, MA_OWNED);
  366         sig = ksi->ksi_signo;
  367         psp = p->p_sigacts;
  368         mtx_assert(&psp->ps_mtx, MA_OWNED);
  369         regs = td->td_frame;
  370         oonstack = sigonstack(regs->tf_rsp);
  371 
  372         if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
  373                 xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
  374                 xfpusave = __builtin_alloca(xfpusave_len);
  375         } else {
  376                 xfpusave_len = 0;
  377                 xfpusave = NULL;
  378         }
  379 
  380         /* Save user context. */
  381         bzero(&sf, sizeof(sf));
  382         sf.sf_uc.uc_sigmask = *mask;
  383         sf.sf_uc.uc_stack = td->td_sigstk;
  384         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  385             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  386         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  387         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
  388         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  389         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  390         fpstate_drop(td);
  391         update_pcb_bases(pcb);
  392         sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
  393         sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
  394         bzero(sf.sf_uc.uc_mcontext.mc_spare,
  395             sizeof(sf.sf_uc.uc_mcontext.mc_spare));
  396         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  397 
  398         /* Allocate space for the signal handler context. */
  399         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  400             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  401                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  402 #if defined(COMPAT_43)
  403                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  404 #endif
  405         } else
  406                 sp = (char *)regs->tf_rsp - 128;
  407         if (xfpusave != NULL) {
  408                 sp -= xfpusave_len;
  409                 sp = (char *)((unsigned long)sp & ~0x3Ful);
  410                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  411         }
  412         sp -= sizeof(struct sigframe);
  413         /* Align to 16 bytes. */
  414         sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
  415 
  416         /* Build the argument list for the signal handler. */
  417         regs->tf_rdi = sig;                     /* arg 1 in %rdi */
  418         regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */
  419         bzero(&sf.sf_si, sizeof(sf.sf_si));
  420         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  421                 /* Signal handler installed with SA_SIGINFO. */
  422                 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */
  423                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  424 
  425                 /* Fill in POSIX parts */
  426                 sf.sf_si = ksi->ksi_info;
  427                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  428                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  429         } else {
  430                 /* Old FreeBSD-style arguments. */
  431                 regs->tf_rsi = ksi->ksi_code;   /* arg 2 in %rsi */
  432                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  433                 sf.sf_ahu.sf_handler = catcher;
  434         }
  435         mtx_unlock(&psp->ps_mtx);
  436         PROC_UNLOCK(p);
  437 
  438         /*
  439          * Copy the sigframe out to the user's stack.
  440          */
  441         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  442             (xfpusave != NULL && copyout(xfpusave,
  443             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  444             != 0)) {
  445 #ifdef DEBUG
  446                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  447 #endif
  448                 PROC_LOCK(p);
  449                 sigexit(td, SIGILL);
  450         }
  451 
  452         regs->tf_rsp = (long)sfp;
  453         regs->tf_rip = p->p_sysent->sv_sigcode_base;
  454         regs->tf_rflags &= ~(PSL_T | PSL_D);
  455         regs->tf_cs = _ucodesel;
  456         regs->tf_ds = _udatasel;
  457         regs->tf_ss = _udatasel;
  458         regs->tf_es = _udatasel;
  459         regs->tf_fs = _ufssel;
  460         regs->tf_gs = _ugssel;
  461         regs->tf_flags = TF_HASSEGS;
  462         PROC_LOCK(p);
  463         mtx_lock(&psp->ps_mtx);
  464 }
  465 
  466 /*
  467  * System call to cleanup state after a signal
  468  * has been taken.  Reset signal mask and
  469  * stack state from context left by sendsig (above).
  470  * Return to previous pc and psl as specified by
  471  * context left by sendsig. Check carefully to
  472  * make sure that the user has not modified the
  473  * state to gain improper privileges.
  474  *
  475  * MPSAFE
  476  */
  477 int
  478 sys_sigreturn(td, uap)
  479         struct thread *td;
  480         struct sigreturn_args /* {
  481                 const struct __ucontext *sigcntxp;
  482         } */ *uap;
  483 {
  484         ucontext_t uc;
  485         struct pcb *pcb;
  486         struct proc *p;
  487         struct trapframe *regs;
  488         ucontext_t *ucp;
  489         char *xfpustate;
  490         size_t xfpustate_len;
  491         long rflags;
  492         int cs, error, ret;
  493         ksiginfo_t ksi;
  494 
  495         pcb = td->td_pcb;
  496         p = td->td_proc;
  497 
  498         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  499         if (error != 0) {
  500                 uprintf("pid %d (%s): sigreturn copyin failed\n",
  501                     p->p_pid, td->td_name);
  502                 return (error);
  503         }
  504         ucp = &uc;
  505         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
  506                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
  507                     td->td_name, ucp->uc_mcontext.mc_flags);
  508                 return (EINVAL);
  509         }
  510         regs = td->td_frame;
  511         rflags = ucp->uc_mcontext.mc_rflags;
  512         /*
  513          * Don't allow users to change privileged or reserved flags.
  514          */
  515         if (!EFL_SECURE(rflags, regs->tf_rflags)) {
  516                 uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
  517                     td->td_name, rflags);
  518                 return (EINVAL);
  519         }
  520 
  521         /*
  522          * Don't allow users to load a valid privileged %cs.  Let the
  523          * hardware check for invalid selectors, excess privilege in
  524          * other selectors, invalid %eip's and invalid %esp's.
  525          */
  526         cs = ucp->uc_mcontext.mc_cs;
  527         if (!CS_SECURE(cs)) {
  528                 uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
  529                     td->td_name, cs);
  530                 ksiginfo_init_trap(&ksi);
  531                 ksi.ksi_signo = SIGBUS;
  532                 ksi.ksi_code = BUS_OBJERR;
  533                 ksi.ksi_trapno = T_PROTFLT;
  534                 ksi.ksi_addr = (void *)regs->tf_rip;
  535                 trapsignal(td, &ksi);
  536                 return (EINVAL);
  537         }
  538 
  539         if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
  540                 xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
  541                 if (xfpustate_len > cpu_max_ext_state_size -
  542                     sizeof(struct savefpu)) {
  543                         uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
  544                             p->p_pid, td->td_name, xfpustate_len);
  545                         return (EINVAL);
  546                 }
  547                 xfpustate = __builtin_alloca(xfpustate_len);
  548                 error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
  549                     xfpustate, xfpustate_len);
  550                 if (error != 0) {
  551                         uprintf(
  552         "pid %d (%s): sigreturn copying xfpustate failed\n",
  553                             p->p_pid, td->td_name);
  554                         return (error);
  555                 }
  556         } else {
  557                 xfpustate = NULL;
  558                 xfpustate_len = 0;
  559         }
  560         ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
  561         if (ret != 0) {
  562                 uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
  563                     p->p_pid, td->td_name, ret);
  564                 return (ret);
  565         }
  566         bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
  567         update_pcb_bases(pcb);
  568         pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
  569         pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
  570 
  571 #if defined(COMPAT_43)
  572         if (ucp->uc_mcontext.mc_onstack & 1)
  573                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  574         else
  575                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  576 #endif
  577 
  578         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  579         return (EJUSTRETURN);
  580 }
  581 
  582 #ifdef COMPAT_FREEBSD4
  583 int
  584 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
  585 {
  586  
  587         return sys_sigreturn(td, (struct sigreturn_args *)uap);
  588 }
  589 #endif
  590 
  591 /*
  592  * Reset the hardware debug registers if they were in use.
  593  * They won't have any meaning for the newly exec'd process.
  594  */
  595 void
  596 x86_clear_dbregs(struct pcb *pcb)
  597 {
  598         if ((pcb->pcb_flags & PCB_DBREGS) == 0)
  599                 return;
  600 
  601         pcb->pcb_dr0 = 0;
  602         pcb->pcb_dr1 = 0;
  603         pcb->pcb_dr2 = 0;
  604         pcb->pcb_dr3 = 0;
  605         pcb->pcb_dr6 = 0;
  606         pcb->pcb_dr7 = 0;
  607 
  608         if (pcb == curpcb) {
  609                 /*
  610                  * Clear the debug registers on the running CPU,
  611                  * otherwise they will end up affecting the next
  612                  * process we switch to.
  613                  */
  614                 reset_dbregs();
  615         }
  616         clear_pcb_flags(pcb, PCB_DBREGS);
  617 }
  618 
  619 /*
  620  * Reset registers to default values on exec.
  621  */
  622 void
  623 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
  624 {
  625         struct trapframe *regs;
  626         struct pcb *pcb;
  627         register_t saved_rflags;
  628 
  629         regs = td->td_frame;
  630         pcb = td->td_pcb;
  631 
  632         if (td->td_proc->p_md.md_ldt != NULL)
  633                 user_ldt_free(td);
  634 
  635         update_pcb_bases(pcb);
  636         pcb->pcb_fsbase = 0;
  637         pcb->pcb_gsbase = 0;
  638         clear_pcb_flags(pcb, PCB_32BIT);
  639         pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
  640 
  641         saved_rflags = regs->tf_rflags & PSL_T;
  642         bzero((char *)regs, sizeof(struct trapframe));
  643         regs->tf_rip = imgp->entry_addr;
  644         regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
  645         regs->tf_rdi = stack;           /* argv */
  646         regs->tf_rflags = PSL_USER | saved_rflags;
  647         regs->tf_ss = _udatasel;
  648         regs->tf_cs = _ucodesel;
  649         regs->tf_ds = _udatasel;
  650         regs->tf_es = _udatasel;
  651         regs->tf_fs = _ufssel;
  652         regs->tf_gs = _ugssel;
  653         regs->tf_flags = TF_HASSEGS;
  654 
  655         x86_clear_dbregs(pcb);
  656 
  657         /*
  658          * Drop the FP state if we hold it, so that the process gets a
  659          * clean FP state if it uses the FPU again.
  660          */
  661         fpstate_drop(td);
  662 }
  663 
  664 void
  665 cpu_setregs(void)
  666 {
  667         register_t cr0;
  668 
  669         cr0 = rcr0();
  670         /*
  671          * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
  672          * BSP.  See the comments there about why we set them.
  673          */
  674         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
  675         load_cr0(cr0);
  676 }
  677 
  678 /*
  679  * Initialize amd64 and configure to run kernel
  680  */
  681 
  682 /*
  683  * Initialize segments & interrupt table
  684  */
  685 
  686 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */
  687 static struct gate_descriptor idt0[NIDT];
  688 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
  689 
  690 static char dblfault_stack[DBLFAULT_STACK_SIZE] __aligned(16);
  691 static char mce0_stack[MCE_STACK_SIZE] __aligned(16);
  692 static char nmi0_stack[NMI_STACK_SIZE] __aligned(16);
  693 static char dbg0_stack[DBG_STACK_SIZE] __aligned(16);
  694 CTASSERT(sizeof(struct nmi_pcpu) == 16);
  695 
  696 struct amd64tss common_tss[MAXCPU];
  697 
  698 /*
  699  * Software prototypes -- in more palatable form.
  700  *
  701  * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same
  702  * slots as corresponding segments for i386 kernel.
  703  */
  704 struct soft_segment_descriptor gdt_segs[] = {
  705 /* GNULL_SEL    0 Null Descriptor */
  706 {       .ssd_base = 0x0,
  707         .ssd_limit = 0x0,
  708         .ssd_type = 0,
  709         .ssd_dpl = 0,
  710         .ssd_p = 0,
  711         .ssd_long = 0,
  712         .ssd_def32 = 0,
  713         .ssd_gran = 0           },
  714 /* GNULL2_SEL   1 Null Descriptor */
  715 {       .ssd_base = 0x0,
  716         .ssd_limit = 0x0,
  717         .ssd_type = 0,
  718         .ssd_dpl = 0,
  719         .ssd_p = 0,
  720         .ssd_long = 0,
  721         .ssd_def32 = 0,
  722         .ssd_gran = 0           },
  723 /* GUFS32_SEL   2 32 bit %gs Descriptor for user */
  724 {       .ssd_base = 0x0,
  725         .ssd_limit = 0xfffff,
  726         .ssd_type = SDT_MEMRWA,
  727         .ssd_dpl = SEL_UPL,
  728         .ssd_p = 1,
  729         .ssd_long = 0,
  730         .ssd_def32 = 1,
  731         .ssd_gran = 1           },
  732 /* GUGS32_SEL   3 32 bit %fs Descriptor for user */
  733 {       .ssd_base = 0x0,
  734         .ssd_limit = 0xfffff,
  735         .ssd_type = SDT_MEMRWA,
  736         .ssd_dpl = SEL_UPL,
  737         .ssd_p = 1,
  738         .ssd_long = 0,
  739         .ssd_def32 = 1,
  740         .ssd_gran = 1           },
  741 /* GCODE_SEL    4 Code Descriptor for kernel */
  742 {       .ssd_base = 0x0,
  743         .ssd_limit = 0xfffff,
  744         .ssd_type = SDT_MEMERA,
  745         .ssd_dpl = SEL_KPL,
  746         .ssd_p = 1,
  747         .ssd_long = 1,
  748         .ssd_def32 = 0,
  749         .ssd_gran = 1           },
  750 /* GDATA_SEL    5 Data Descriptor for kernel */
  751 {       .ssd_base = 0x0,
  752         .ssd_limit = 0xfffff,
  753         .ssd_type = SDT_MEMRWA,
  754         .ssd_dpl = SEL_KPL,
  755         .ssd_p = 1,
  756         .ssd_long = 1,
  757         .ssd_def32 = 0,
  758         .ssd_gran = 1           },
  759 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */
  760 {       .ssd_base = 0x0,
  761         .ssd_limit = 0xfffff,
  762         .ssd_type = SDT_MEMERA,
  763         .ssd_dpl = SEL_UPL,
  764         .ssd_p = 1,
  765         .ssd_long = 0,
  766         .ssd_def32 = 1,
  767         .ssd_gran = 1           },
  768 /* GUDATA_SEL   7 32/64 bit Data Descriptor for user */
  769 {       .ssd_base = 0x0,
  770         .ssd_limit = 0xfffff,
  771         .ssd_type = SDT_MEMRWA,
  772         .ssd_dpl = SEL_UPL,
  773         .ssd_p = 1,
  774         .ssd_long = 0,
  775         .ssd_def32 = 1,
  776         .ssd_gran = 1           },
  777 /* GUCODE_SEL   8 64 bit Code Descriptor for user */
  778 {       .ssd_base = 0x0,
  779         .ssd_limit = 0xfffff,
  780         .ssd_type = SDT_MEMERA,
  781         .ssd_dpl = SEL_UPL,
  782         .ssd_p = 1,
  783         .ssd_long = 1,
  784         .ssd_def32 = 0,
  785         .ssd_gran = 1           },
  786 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
  787 {       .ssd_base = 0x0,
  788         .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1,
  789         .ssd_type = SDT_SYSTSS,
  790         .ssd_dpl = SEL_KPL,
  791         .ssd_p = 1,
  792         .ssd_long = 0,
  793         .ssd_def32 = 0,
  794         .ssd_gran = 0           },
  795 /* Actually, the TSS is a system descriptor which is double size */
  796 {       .ssd_base = 0x0,
  797         .ssd_limit = 0x0,
  798         .ssd_type = 0,
  799         .ssd_dpl = 0,
  800         .ssd_p = 0,
  801         .ssd_long = 0,
  802         .ssd_def32 = 0,
  803         .ssd_gran = 0           },
  804 /* GUSERLDT_SEL 11 LDT Descriptor */
  805 {       .ssd_base = 0x0,
  806         .ssd_limit = 0x0,
  807         .ssd_type = 0,
  808         .ssd_dpl = 0,
  809         .ssd_p = 0,
  810         .ssd_long = 0,
  811         .ssd_def32 = 0,
  812         .ssd_gran = 0           },
  813 /* GUSERLDT_SEL 12 LDT Descriptor, double size */
  814 {       .ssd_base = 0x0,
  815         .ssd_limit = 0x0,
  816         .ssd_type = 0,
  817         .ssd_dpl = 0,
  818         .ssd_p = 0,
  819         .ssd_long = 0,
  820         .ssd_def32 = 0,
  821         .ssd_gran = 0           },
  822 };
  823 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT");
  824 
  825 void
  826 setidt(int idx, inthand_t *func, int typ, int dpl, int ist)
  827 {
  828         struct gate_descriptor *ip;
  829 
  830         ip = idt + idx;
  831         ip->gd_looffset = (uintptr_t)func;
  832         ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
  833         ip->gd_ist = ist;
  834         ip->gd_xx = 0;
  835         ip->gd_type = typ;
  836         ip->gd_dpl = dpl;
  837         ip->gd_p = 1;
  838         ip->gd_hioffset = ((uintptr_t)func)>>16 ;
  839 }
  840 
  841 extern inthand_t
  842         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
  843         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
  844         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
  845         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
  846         IDTVEC(xmm), IDTVEC(dblfault),
  847         IDTVEC(div_pti), IDTVEC(bpt_pti),
  848         IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
  849         IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
  850         IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti),
  851         IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti),
  852         IDTVEC(xmm_pti),
  853 #ifdef KDTRACE_HOOKS
  854         IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti),
  855 #endif
  856 #ifdef XENHVM
  857         IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti),
  858 #endif
  859         IDTVEC(fast_syscall), IDTVEC(fast_syscall32),
  860         IDTVEC(fast_syscall_pti);
  861 
  862 #ifdef DDB
  863 /*
  864  * Display the index and function name of any IDT entries that don't use
  865  * the default 'rsvd' entry point.
  866  */
  867 DB_SHOW_COMMAND(idt, db_show_idt)
  868 {
  869         struct gate_descriptor *ip;
  870         int idx;
  871         uintptr_t func;
  872 
  873         ip = idt;
  874         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
  875                 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
  876                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
  877                         db_printf("%3d\t", idx);
  878                         db_printsym(func, DB_STGY_PROC);
  879                         db_printf("\n");
  880                 }
  881                 ip++;
  882         }
  883 }
  884 
  885 /* Show privileged registers. */
  886 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
  887 {
  888         struct {
  889                 uint16_t limit;
  890                 uint64_t base;
  891         } __packed idtr, gdtr;
  892         uint16_t ldt, tr;
  893 
  894         __asm __volatile("sidt %0" : "=m" (idtr));
  895         db_printf("idtr\t0x%016lx/%04x\n",
  896             (u_long)idtr.base, (u_int)idtr.limit);
  897         __asm __volatile("sgdt %0" : "=m" (gdtr));
  898         db_printf("gdtr\t0x%016lx/%04x\n",
  899             (u_long)gdtr.base, (u_int)gdtr.limit);
  900         __asm __volatile("sldt %0" : "=r" (ldt));
  901         db_printf("ldtr\t0x%04x\n", ldt);
  902         __asm __volatile("str %0" : "=r" (tr));
  903         db_printf("tr\t0x%04x\n", tr);
  904         db_printf("cr0\t0x%016lx\n", rcr0());
  905         db_printf("cr2\t0x%016lx\n", rcr2());
  906         db_printf("cr3\t0x%016lx\n", rcr3());
  907         db_printf("cr4\t0x%016lx\n", rcr4());
  908         if (rcr4() & CR4_XSAVE)
  909                 db_printf("xcr0\t0x%016lx\n", rxcr(0));
  910         db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER));
  911         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
  912                 db_printf("FEATURES_CTL\t%016lx\n",
  913                     rdmsr(MSR_IA32_FEATURE_CONTROL));
  914         db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR));
  915         db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT));
  916         db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE));
  917 }
  918 
  919 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
  920 {
  921 
  922         db_printf("dr0\t0x%016lx\n", rdr0());
  923         db_printf("dr1\t0x%016lx\n", rdr1());
  924         db_printf("dr2\t0x%016lx\n", rdr2());
  925         db_printf("dr3\t0x%016lx\n", rdr3());
  926         db_printf("dr6\t0x%016lx\n", rdr6());
  927         db_printf("dr7\t0x%016lx\n", rdr7());   
  928 }
  929 #endif
  930 
  931 void
  932 sdtossd(sd, ssd)
  933         struct user_segment_descriptor *sd;
  934         struct soft_segment_descriptor *ssd;
  935 {
  936 
  937         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
  938         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
  939         ssd->ssd_type  = sd->sd_type;
  940         ssd->ssd_dpl   = sd->sd_dpl;
  941         ssd->ssd_p     = sd->sd_p;
  942         ssd->ssd_long  = sd->sd_long;
  943         ssd->ssd_def32 = sd->sd_def32;
  944         ssd->ssd_gran  = sd->sd_gran;
  945 }
  946 
  947 void
  948 ssdtosd(ssd, sd)
  949         struct soft_segment_descriptor *ssd;
  950         struct user_segment_descriptor *sd;
  951 {
  952 
  953         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  954         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
  955         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  956         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  957         sd->sd_type  = ssd->ssd_type;
  958         sd->sd_dpl   = ssd->ssd_dpl;
  959         sd->sd_p     = ssd->ssd_p;
  960         sd->sd_long  = ssd->ssd_long;
  961         sd->sd_def32 = ssd->ssd_def32;
  962         sd->sd_gran  = ssd->ssd_gran;
  963 }
  964 
  965 void
  966 ssdtosyssd(ssd, sd)
  967         struct soft_segment_descriptor *ssd;
  968         struct system_segment_descriptor *sd;
  969 {
  970 
  971         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  972         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
  973         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  974         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  975         sd->sd_type  = ssd->ssd_type;
  976         sd->sd_dpl   = ssd->ssd_dpl;
  977         sd->sd_p     = ssd->ssd_p;
  978         sd->sd_gran  = ssd->ssd_gran;
  979 }
  980 
  981 #if !defined(DEV_ATPIC) && defined(DEV_ISA)
  982 #include <isa/isavar.h>
  983 #include <isa/isareg.h>
  984 /*
  985  * Return a bitmap of the current interrupt requests.  This is 8259-specific
  986  * and is only suitable for use at probe time.
  987  * This is only here to pacify sio.  It is NOT FATAL if this doesn't work.
  988  * It shouldn't be here.  There should probably be an APIC centric
  989  * implementation in the apic driver code, if at all.
  990  */
  991 intrmask_t
  992 isa_irq_pending(void)
  993 {
  994         u_char irr1;
  995         u_char irr2;
  996 
  997         irr1 = inb(IO_ICU1);
  998         irr2 = inb(IO_ICU2);
  999         return ((irr2 << 8) | irr1);
 1000 }
 1001 #endif
 1002 
 1003 u_int basemem;
 1004 
 1005 static int
 1006 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
 1007     int *physmap_idxp)
 1008 {
 1009         int i, insert_idx, physmap_idx;
 1010 
 1011         physmap_idx = *physmap_idxp;
 1012 
 1013         if (length == 0)
 1014                 return (1);
 1015 
 1016         /*
 1017          * Find insertion point while checking for overlap.  Start off by
 1018          * assuming the new entry will be added to the end.
 1019          *
 1020          * NB: physmap_idx points to the next free slot.
 1021          */
 1022         insert_idx = physmap_idx;
 1023         for (i = 0; i <= physmap_idx; i += 2) {
 1024                 if (base < physmap[i + 1]) {
 1025                         if (base + length <= physmap[i]) {
 1026                                 insert_idx = i;
 1027                                 break;
 1028                         }
 1029                         if (boothowto & RB_VERBOSE)
 1030                                 printf(
 1031                     "Overlapping memory regions, ignoring second region\n");
 1032                         return (1);
 1033                 }
 1034         }
 1035 
 1036         /* See if we can prepend to the next entry. */
 1037         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1038                 physmap[insert_idx] = base;
 1039                 return (1);
 1040         }
 1041 
 1042         /* See if we can append to the previous entry. */
 1043         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1044                 physmap[insert_idx - 1] += length;
 1045                 return (1);
 1046         }
 1047 
 1048         physmap_idx += 2;
 1049         *physmap_idxp = physmap_idx;
 1050         if (physmap_idx == PHYSMAP_SIZE) {
 1051                 printf(
 1052                 "Too many segments in the physical address map, giving up\n");
 1053                 return (0);
 1054         }
 1055 
 1056         /*
 1057          * Move the last 'N' entries down to make room for the new
 1058          * entry if needed.
 1059          */
 1060         for (i = (physmap_idx - 2); i > insert_idx; i -= 2) {
 1061                 physmap[i] = physmap[i - 2];
 1062                 physmap[i + 1] = physmap[i - 1];
 1063         }
 1064 
 1065         /* Insert the new entry. */
 1066         physmap[insert_idx] = base;
 1067         physmap[insert_idx + 1] = base + length;
 1068         return (1);
 1069 }
 1070 
 1071 void
 1072 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize,
 1073                       vm_paddr_t *physmap, int *physmap_idx)
 1074 {
 1075         struct bios_smap *smap, *smapend;
 1076 
 1077         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1078 
 1079         for (smap = smapbase; smap < smapend; smap++) {
 1080                 if (boothowto & RB_VERBOSE)
 1081                         printf("SMAP type=%02x base=%016lx len=%016lx\n",
 1082                             smap->type, smap->base, smap->length);
 1083 
 1084                 if (smap->type != SMAP_TYPE_MEMORY)
 1085                         continue;
 1086 
 1087                 if (!add_physmap_entry(smap->base, smap->length, physmap,
 1088                     physmap_idx))
 1089                         break;
 1090         }
 1091 }
 1092 
 1093 static void
 1094 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
 1095     int *physmap_idx)
 1096 {
 1097         struct efi_md *map, *p;
 1098         const char *type;
 1099         size_t efisz;
 1100         int ndesc, i;
 1101 
 1102         static const char *types[] = {
 1103                 "Reserved",
 1104                 "LoaderCode",
 1105                 "LoaderData",
 1106                 "BootServicesCode",
 1107                 "BootServicesData",
 1108                 "RuntimeServicesCode",
 1109                 "RuntimeServicesData",
 1110                 "ConventionalMemory",
 1111                 "UnusableMemory",
 1112                 "ACPIReclaimMemory",
 1113                 "ACPIMemoryNVS",
 1114                 "MemoryMappedIO",
 1115                 "MemoryMappedIOPortSpace",
 1116                 "PalCode",
 1117                 "PersistentMemory"
 1118         };
 1119 
 1120         /*
 1121          * Memory map data provided by UEFI via the GetMemoryMap
 1122          * Boot Services API.
 1123          */
 1124         efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 1125         map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 1126 
 1127         if (efihdr->descriptor_size == 0)
 1128                 return;
 1129         ndesc = efihdr->memory_size / efihdr->descriptor_size;
 1130 
 1131         if (boothowto & RB_VERBOSE)
 1132                 printf("%23s %12s %12s %8s %4s\n",
 1133                     "Type", "Physical", "Virtual", "#Pages", "Attr");
 1134 
 1135         for (i = 0, p = map; i < ndesc; i++,
 1136             p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 1137                 if (boothowto & RB_VERBOSE) {
 1138                         if (p->md_type < nitems(types))
 1139                                 type = types[p->md_type];
 1140                         else
 1141                                 type = "<INVALID>";
 1142                         printf("%23s %012lx %12p %08lx ", type, p->md_phys,
 1143                             p->md_virt, p->md_pages);
 1144                         if (p->md_attr & EFI_MD_ATTR_UC)
 1145                                 printf("UC ");
 1146                         if (p->md_attr & EFI_MD_ATTR_WC)
 1147                                 printf("WC ");
 1148                         if (p->md_attr & EFI_MD_ATTR_WT)
 1149                                 printf("WT ");
 1150                         if (p->md_attr & EFI_MD_ATTR_WB)
 1151                                 printf("WB ");
 1152                         if (p->md_attr & EFI_MD_ATTR_UCE)
 1153                                 printf("UCE ");
 1154                         if (p->md_attr & EFI_MD_ATTR_WP)
 1155                                 printf("WP ");
 1156                         if (p->md_attr & EFI_MD_ATTR_RP)
 1157                                 printf("RP ");
 1158                         if (p->md_attr & EFI_MD_ATTR_XP)
 1159                                 printf("XP ");
 1160                         if (p->md_attr & EFI_MD_ATTR_NV)
 1161                                 printf("NV ");
 1162                         if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 1163                                 printf("MORE_RELIABLE ");
 1164                         if (p->md_attr & EFI_MD_ATTR_RO)
 1165                                 printf("RO ");
 1166                         if (p->md_attr & EFI_MD_ATTR_RT)
 1167                                 printf("RUNTIME");
 1168                         printf("\n");
 1169                 }
 1170 
 1171                 switch (p->md_type) {
 1172                 case EFI_MD_TYPE_CODE:
 1173                 case EFI_MD_TYPE_DATA:
 1174                 case EFI_MD_TYPE_BS_CODE:
 1175                 case EFI_MD_TYPE_BS_DATA:
 1176                 case EFI_MD_TYPE_FREE:
 1177                         /*
 1178                          * We're allowed to use any entry with these types.
 1179                          */
 1180                         break;
 1181                 default:
 1182                         continue;
 1183                 }
 1184 
 1185                 if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE),
 1186                     physmap, physmap_idx))
 1187                         break;
 1188         }
 1189 }
 1190 
 1191 static char bootmethod[16] = "";
 1192 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1193     "System firmware boot method");
 1194 
 1195 static void
 1196 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx)
 1197 {
 1198         struct bios_smap *smap;
 1199         struct efi_map_header *efihdr;
 1200         u_int32_t size;
 1201 
 1202         /*
 1203          * Memory map from INT 15:E820.
 1204          *
 1205          * subr_module.c says:
 1206          * "Consumer may safely assume that size value precedes data."
 1207          * ie: an int32_t immediately precedes smap.
 1208          */
 1209 
 1210         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1211             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1212         smap = (struct bios_smap *)preload_search_info(kmdp,
 1213             MODINFO_METADATA | MODINFOMD_SMAP);
 1214         if (efihdr == NULL && smap == NULL)
 1215                 panic("No BIOS smap or EFI map info from loader!");
 1216 
 1217         if (efihdr != NULL) {
 1218                 add_efi_map_entries(efihdr, physmap, physmap_idx);
 1219                 strlcpy(bootmethod, "UEFI", sizeof(bootmethod));
 1220         } else {
 1221                 size = *((u_int32_t *)smap - 1);
 1222                 bios_add_smap_entries(smap, size, physmap, physmap_idx);
 1223                 strlcpy(bootmethod, "BIOS", sizeof(bootmethod));
 1224         }
 1225 }
 1226 
 1227 #define PAGES_PER_GB    (1024 * 1024 * 1024 / PAGE_SIZE)
 1228 
 1229 /*
 1230  * Populate the (physmap) array with base/bound pairs describing the
 1231  * available physical memory in the system, then test this memory and
 1232  * build the phys_avail array describing the actually-available memory.
 1233  *
 1234  * Total memory size may be set by the kernel environment variable
 1235  * hw.physmem or the compile-time define MAXMEM.
 1236  *
 1237  * XXX first should be vm_paddr_t.
 1238  */
 1239 static void
 1240 getmemsize(caddr_t kmdp, u_int64_t first)
 1241 {
 1242         int i, physmap_idx, pa_indx, da_indx;
 1243         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1244         u_long physmem_start, physmem_tunable, memtest;
 1245         pt_entry_t *pte;
 1246         quad_t dcons_addr, dcons_size;
 1247         int page_counter;
 1248 
 1249         /*
 1250          * Tell the physical memory allocator about pages used to store
 1251          * the kernel and preloaded data.  See kmem_bootstrap_free().
 1252          */
 1253         vm_phys_add_seg((vm_paddr_t)kernphys, trunc_page(first));
 1254 
 1255         bzero(physmap, sizeof(physmap));
 1256         physmap_idx = 0;
 1257 
 1258         init_ops.parse_memmap(kmdp, physmap, &physmap_idx);
 1259         physmap_idx -= 2;
 1260 
 1261         /*
 1262          * Find the 'base memory' segment for SMP
 1263          */
 1264         basemem = 0;
 1265         for (i = 0; i <= physmap_idx; i += 2) {
 1266                 if (physmap[i] <= 0xA0000) {
 1267                         basemem = physmap[i + 1] / 1024;
 1268                         break;
 1269                 }
 1270         }
 1271         if (basemem == 0 || basemem > 640) {
 1272                 if (bootverbose)
 1273                         printf(
 1274                 "Memory map doesn't contain a basemem segment, faking it");
 1275                 basemem = 640;
 1276         }
 1277 
 1278         /*
 1279          * Maxmem isn't the "maximum memory", it's one larger than the
 1280          * highest page of the physical address space.  It should be
 1281          * called something like "Maxphyspage".  We may adjust this
 1282          * based on ``hw.physmem'' and the results of the memory test.
 1283          */
 1284         Maxmem = atop(physmap[physmap_idx + 1]);
 1285 
 1286 #ifdef MAXMEM
 1287         Maxmem = MAXMEM / 4;
 1288 #endif
 1289 
 1290         if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 1291                 Maxmem = atop(physmem_tunable);
 1292 
 1293         /*
 1294          * The boot memory test is disabled by default, as it takes a
 1295          * significant amount of time on large-memory systems, and is
 1296          * unfriendly to virtual machines as it unnecessarily touches all
 1297          * pages.
 1298          *
 1299          * A general name is used as the code may be extended to support
 1300          * additional tests beyond the current "page present" test.
 1301          */
 1302         memtest = 0;
 1303         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 1304 
 1305         /*
 1306          * Don't allow MAXMEM or hw.physmem to extend the amount of memory
 1307          * in the system.
 1308          */
 1309         if (Maxmem > atop(physmap[physmap_idx + 1]))
 1310                 Maxmem = atop(physmap[physmap_idx + 1]);
 1311 
 1312         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1313             (boothowto & RB_VERBOSE))
 1314                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1315 
 1316         /*
 1317          * Make hole for "AP -> long mode" bootstrap code.  The
 1318          * mp_bootaddress vector is only available when the kernel
 1319          * is configured to support APs and APs for the system start
 1320          * in real mode mode (e.g. SMP bare metal).
 1321          */
 1322         if (init_ops.mp_bootaddress)
 1323                 init_ops.mp_bootaddress(physmap, &physmap_idx);
 1324 
 1325         /* call pmap initialization to make new kernel address space */
 1326         pmap_bootstrap(&first);
 1327 
 1328         /*
 1329          * Size up each available chunk of physical memory.
 1330          *
 1331          * XXX Some BIOSes corrupt low 64KB between suspend and resume.
 1332          * By default, mask off the first 16 pages unless we appear to be
 1333          * running in a VM.
 1334          */
 1335         physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT;
 1336         TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start);
 1337         if (physmap[0] < physmem_start) {
 1338                 if (physmem_start < PAGE_SIZE)
 1339                         physmap[0] = PAGE_SIZE;
 1340                 else if (physmem_start >= physmap[1])
 1341                         physmap[0] = round_page(physmap[1] - PAGE_SIZE);
 1342                 else
 1343                         physmap[0] = round_page(physmem_start);
 1344         }
 1345         pa_indx = 0;
 1346         da_indx = 1;
 1347         phys_avail[pa_indx++] = physmap[0];
 1348         phys_avail[pa_indx] = physmap[0];
 1349         dump_avail[da_indx] = physmap[0];
 1350         pte = CMAP1;
 1351 
 1352         /*
 1353          * Get dcons buffer address
 1354          */
 1355         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 1356             getenv_quad("dcons.size", &dcons_size) == 0)
 1357                 dcons_addr = 0;
 1358 
 1359         /*
 1360          * physmap is in bytes, so when converting to page boundaries,
 1361          * round up the start address and round down the end address.
 1362          */
 1363         page_counter = 0;
 1364         if (memtest != 0)
 1365                 printf("Testing system memory");
 1366         for (i = 0; i <= physmap_idx; i += 2) {
 1367                 vm_paddr_t end;
 1368 
 1369                 end = ptoa((vm_paddr_t)Maxmem);
 1370                 if (physmap[i + 1] < end)
 1371                         end = trunc_page(physmap[i + 1]);
 1372                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1373                         int tmp, page_bad, full;
 1374                         int *ptr = (int *)CADDR1;
 1375 
 1376                         full = FALSE;
 1377                         /*
 1378                          * block out kernel memory as not available.
 1379                          */
 1380                         if (pa >= (vm_paddr_t)kernphys && pa < first)
 1381                                 goto do_dump_avail;
 1382 
 1383                         /*
 1384                          * block out dcons buffer
 1385                          */
 1386                         if (dcons_addr > 0
 1387                             && pa >= trunc_page(dcons_addr)
 1388                             && pa < dcons_addr + dcons_size)
 1389                                 goto do_dump_avail;
 1390 
 1391                         page_bad = FALSE;
 1392                         if (memtest == 0)
 1393                                 goto skip_memtest;
 1394 
 1395                         /*
 1396                          * Print a "." every GB to show we're making
 1397                          * progress.
 1398                          */
 1399                         page_counter++;
 1400                         if ((page_counter % PAGES_PER_GB) == 0)
 1401                                 printf(".");
 1402 
 1403                         /*
 1404                          * map page into kernel: valid, read/write,non-cacheable
 1405                          */
 1406                         *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
 1407                         invltlb();
 1408 
 1409                         tmp = *(int *)ptr;
 1410                         /*
 1411                          * Test for alternating 1's and 0's
 1412                          */
 1413                         *(volatile int *)ptr = 0xaaaaaaaa;
 1414                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 1415                                 page_bad = TRUE;
 1416                         /*
 1417                          * Test for alternating 0's and 1's
 1418                          */
 1419                         *(volatile int *)ptr = 0x55555555;
 1420                         if (*(volatile int *)ptr != 0x55555555)
 1421                                 page_bad = TRUE;
 1422                         /*
 1423                          * Test for all 1's
 1424                          */
 1425                         *(volatile int *)ptr = 0xffffffff;
 1426                         if (*(volatile int *)ptr != 0xffffffff)
 1427                                 page_bad = TRUE;
 1428                         /*
 1429                          * Test for all 0's
 1430                          */
 1431                         *(volatile int *)ptr = 0x0;
 1432                         if (*(volatile int *)ptr != 0x0)
 1433                                 page_bad = TRUE;
 1434                         /*
 1435                          * Restore original value.
 1436                          */
 1437                         *(int *)ptr = tmp;
 1438 
 1439 skip_memtest:
 1440                         /*
 1441                          * Adjust array of valid/good pages.
 1442                          */
 1443                         if (page_bad == TRUE)
 1444                                 continue;
 1445                         /*
 1446                          * If this good page is a continuation of the
 1447                          * previous set of good pages, then just increase
 1448                          * the end pointer. Otherwise start a new chunk.
 1449                          * Note that "end" points one higher than end,
 1450                          * making the range >= start and < end.
 1451                          * If we're also doing a speculative memory
 1452                          * test and we at or past the end, bump up Maxmem
 1453                          * so that we keep going. The first bad page
 1454                          * will terminate the loop.
 1455                          */
 1456                         if (phys_avail[pa_indx] == pa) {
 1457                                 phys_avail[pa_indx] += PAGE_SIZE;
 1458                         } else {
 1459                                 pa_indx++;
 1460                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1461                                         printf(
 1462                 "Too many holes in the physical address space, giving up\n");
 1463                                         pa_indx--;
 1464                                         full = TRUE;
 1465                                         goto do_dump_avail;
 1466                                 }
 1467                                 phys_avail[pa_indx++] = pa;     /* start */
 1468                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 1469                         }
 1470                         physmem++;
 1471 do_dump_avail:
 1472                         if (dump_avail[da_indx] == pa) {
 1473                                 dump_avail[da_indx] += PAGE_SIZE;
 1474                         } else {
 1475                                 da_indx++;
 1476                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 1477                                         da_indx--;
 1478                                         goto do_next;
 1479                                 }
 1480                                 dump_avail[da_indx++] = pa; /* start */
 1481                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 1482                         }
 1483 do_next:
 1484                         if (full)
 1485                                 break;
 1486                 }
 1487         }
 1488         *pte = 0;
 1489         invltlb();
 1490         if (memtest != 0)
 1491                 printf("\n");
 1492 
 1493         /*
 1494          * XXX
 1495          * The last chunk must contain at least one page plus the message
 1496          * buffer to avoid complicating other code (message buffer address
 1497          * calculation, etc.).
 1498          */
 1499         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1500             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 1501                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1502                 phys_avail[pa_indx--] = 0;
 1503                 phys_avail[pa_indx--] = 0;
 1504         }
 1505 
 1506         Maxmem = atop(phys_avail[pa_indx]);
 1507 
 1508         /* Trim off space for the message buffer. */
 1509         phys_avail[pa_indx] -= round_page(msgbufsize);
 1510 
 1511         /* Map the message buffer. */
 1512         msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]);
 1513 }
 1514 
 1515 static caddr_t
 1516 native_parse_preload_data(u_int64_t modulep)
 1517 {
 1518         caddr_t kmdp;
 1519         char *envp;
 1520 #ifdef DDB
 1521         vm_offset_t ksym_start;
 1522         vm_offset_t ksym_end;
 1523 #endif
 1524 
 1525         preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
 1526         preload_bootstrap_relocate(KERNBASE);
 1527         kmdp = preload_search_by_type("elf kernel");
 1528         if (kmdp == NULL)
 1529                 kmdp = preload_search_by_type("elf64 kernel");
 1530         boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 1531         envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 1532         if (envp != NULL)
 1533                 envp += KERNBASE;
 1534         init_static_kenv(envp, 0);
 1535 #ifdef DDB
 1536         ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 1537         ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 1538         db_fetch_ksymtab(ksym_start, ksym_end);
 1539 #endif
 1540         efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
 1541 
 1542         return (kmdp);
 1543 }
 1544 
 1545 static void
 1546 amd64_kdb_init(void)
 1547 {
 1548         kdb_init();
 1549 #ifdef KDB
 1550         if (boothowto & RB_KDB)
 1551                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 1552 #endif
 1553 }
 1554 
 1555 /* Set up the fast syscall stuff */
 1556 void
 1557 amd64_conf_fast_syscall(void)
 1558 {
 1559         uint64_t msr;
 1560 
 1561         msr = rdmsr(MSR_EFER) | EFER_SCE;
 1562         wrmsr(MSR_EFER, msr);
 1563         wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) :
 1564             (u_int64_t)IDTVEC(fast_syscall));
 1565         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 1566         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 1567             ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 1568         wrmsr(MSR_STAR, msr);
 1569         wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
 1570 }
 1571 
 1572 void
 1573 amd64_bsp_pcpu_init1(struct pcpu *pc)
 1574 {
 1575 
 1576         PCPU_SET(prvspace, pc);
 1577         PCPU_SET(curthread, &thread0);
 1578         PCPU_SET(tssp, &common_tss[0]);
 1579         PCPU_SET(commontssp, &common_tss[0]);
 1580         PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1581         PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
 1582         PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
 1583         PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
 1584 }
 1585 
 1586 void
 1587 amd64_bsp_pcpu_init2(uint64_t rsp0)
 1588 {
 1589 
 1590         PCPU_SET(rsp0, rsp0);
 1591         PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) +
 1592             PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
 1593         PCPU_SET(curpcb, thread0.td_pcb);
 1594 }
 1595 
 1596 void
 1597 amd64_bsp_ist_init(struct pcpu *pc)
 1598 {
 1599         struct nmi_pcpu *np;
 1600 
 1601         /* doublefault stack space, runs on ist1 */
 1602         common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
 1603 
 1604         /*
 1605          * NMI stack, runs on ist2.  The pcpu pointer is stored just
 1606          * above the start of the ist2 stack.
 1607          */
 1608         np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1;
 1609         np->np_pcpu = (register_t)pc;
 1610         common_tss[0].tss_ist2 = (long)np;
 1611 
 1612         /*
 1613          * MC# stack, runs on ist3.  The pcpu pointer is stored just
 1614          * above the start of the ist3 stack.
 1615          */
 1616         np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1;
 1617         np->np_pcpu = (register_t)pc;
 1618         common_tss[0].tss_ist3 = (long)np;
 1619 
 1620         /*
 1621          * DB# stack, runs on ist4.
 1622          */
 1623         np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1;
 1624         np->np_pcpu = (register_t)pc;
 1625         common_tss[0].tss_ist4 = (long)np;
 1626 }
 1627 
 1628 u_int64_t
 1629 hammer_time(u_int64_t modulep, u_int64_t physfree)
 1630 {
 1631         caddr_t kmdp;
 1632         int gsel_tss, x;
 1633         struct pcpu *pc;
 1634         struct xstate_hdr *xhdr;
 1635         u_int64_t rsp0;
 1636         char *env;
 1637         struct region_descriptor r_gdt;
 1638         size_t kstack0_sz;
 1639         int late_console;
 1640 
 1641         TSRAW(&thread0, TS_ENTER, __func__, NULL);
 1642 
 1643         kmdp = init_ops.parse_preload_data(modulep);
 1644 
 1645         physfree += ucode_load_bsp(physfree + KERNBASE);
 1646         physfree = roundup2(physfree, PAGE_SIZE);
 1647 
 1648         identify_cpu1();
 1649         identify_hypervisor();
 1650         identify_cpu_fixup_bsp();
 1651         identify_cpu2();
 1652         initializecpucache();
 1653 
 1654         /*
 1655          * Check for pti, pcid, and invpcid before ifuncs are
 1656          * resolved, to correctly select the implementation for
 1657          * pmap_activate_sw_mode().
 1658          */
 1659         pti = pti_get_default();
 1660         TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
 1661         TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 1662         if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
 1663                 invpcid_works = (cpu_stdext_feature &
 1664                     CPUID_STDEXT_INVPCID) != 0;
 1665         } else {
 1666                 pmap_pcid_enabled = 0;
 1667         }
 1668 
 1669         link_elf_ireloc(kmdp);
 1670 
 1671         /*
 1672          * This may be done better later if it gets more high level
 1673          * components in it. If so just link td->td_proc here.
 1674          */
 1675         proc_linkup0(&proc0, &thread0);
 1676 
 1677         /* Init basic tunables, hz etc */
 1678         init_param1();
 1679 
 1680         thread0.td_kstack = physfree + KERNBASE;
 1681         thread0.td_kstack_pages = kstack_pages;
 1682         kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 1683         bzero((void *)thread0.td_kstack, kstack0_sz);
 1684         physfree += kstack0_sz;
 1685 
 1686         /*
 1687          * Initialize enough of thread0 for delayed invalidation to
 1688          * work very early.  Rely on thread0.td_base_pri
 1689          * zero-initialization, it is reset to PVM at proc0_init().
 1690          */
 1691         pmap_thread_init_invl_gen(&thread0);
 1692 
 1693         /*
 1694          * make gdt memory segments
 1695          */
 1696         for (x = 0; x < NGDT; x++) {
 1697                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 1698                     x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
 1699                         ssdtosd(&gdt_segs[x], &gdt[x]);
 1700         }
 1701         gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
 1702         ssdtosyssd(&gdt_segs[GPROC0_SEL],
 1703             (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1704 
 1705         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 1706         r_gdt.rd_base =  (long) gdt;
 1707         lgdt(&r_gdt);
 1708         pc = &temp_bsp_pcpu;
 1709 
 1710         wrmsr(MSR_FSBASE, 0);           /* User value */
 1711         wrmsr(MSR_GSBASE, (u_int64_t)pc);
 1712         wrmsr(MSR_KGSBASE, 0);          /* User value while in the kernel */
 1713 
 1714         pcpu_init(pc, 0, sizeof(struct pcpu));
 1715         dpcpu_init((void *)(physfree + KERNBASE), 0);
 1716         physfree += DPCPU_SIZE;
 1717         amd64_bsp_pcpu_init1(pc);
 1718         /* Non-late cninit() and printf() can be moved up to here. */
 1719 
 1720         /*
 1721          * Initialize mutexes.
 1722          *
 1723          * icu_lock: in order to allow an interrupt to occur in a critical
 1724          *           section, to set pcpu->ipending (etc...) properly, we
 1725          *           must be able to get the icu lock, so it can't be
 1726          *           under witness.
 1727          */
 1728         mutex_init();
 1729         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 1730         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
 1731 
 1732         /* exceptions */
 1733         for (x = 0; x < NIDT; x++)
 1734                 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
 1735                     SEL_KPL, 0);
 1736         setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
 1737             SEL_KPL, 0);
 1738         setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
 1739         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
 1740         setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
 1741             SEL_UPL, 0);
 1742         setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT,
 1743             SEL_UPL, 0);
 1744         setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT,
 1745             SEL_KPL, 0);
 1746         setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT,
 1747             SEL_KPL, 0);
 1748         setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT,
 1749             SEL_KPL, 0);
 1750         setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
 1751         setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm),
 1752             SDT_SYSIGT, SEL_KPL, 0);
 1753         setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT,
 1754             SEL_KPL, 0);
 1755         setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing),
 1756             SDT_SYSIGT, SEL_KPL, 0);
 1757         setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT,
 1758             SEL_KPL, 0);
 1759         setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT,
 1760             SEL_KPL, 0);
 1761         setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT,
 1762             SEL_KPL, 0);
 1763         setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT,
 1764             SEL_KPL, 0);
 1765         setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT,
 1766             SEL_KPL, 0);
 1767         setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3);
 1768         setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT,
 1769             SEL_KPL, 0);
 1770 #ifdef KDTRACE_HOOKS
 1771         setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) :
 1772             &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
 1773 #endif
 1774 #ifdef XENHVM
 1775         setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) :
 1776             &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0);
 1777 #endif
 1778         r_idt.rd_limit = sizeof(idt0) - 1;
 1779         r_idt.rd_base = (long) idt;
 1780         lidt(&r_idt);
 1781 
 1782         /*
 1783          * Initialize the clock before the console so that console
 1784          * initialization can use DELAY().
 1785          */
 1786         clock_init();
 1787 
 1788         /*
 1789          * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4)
 1790          * transition).
 1791          * Once bootblocks have updated, we can test directly for
 1792          * efi_systbl != NULL here...
 1793          */
 1794         if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP)
 1795             != NULL)
 1796                 vty_set_preferred(VTY_VT);
 1797 
 1798         TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable);
 1799         TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable);
 1800         TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush",
 1801             &syscall_ret_l1d_flush_mode);
 1802         TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable);
 1803         TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable);
 1804 
 1805         TUNABLE_INT_FETCH("machdep.mitigations.rndgs.enable",
 1806             &x86_rngds_mitg_enable);
 1807 
 1808         finishidentcpu();       /* Final stage of CPU initialization */
 1809         initializecpu();        /* Initialize CPU registers */
 1810 
 1811         amd64_bsp_ist_init(pc);
 1812         
 1813         /* Set the IO permission bitmap (empty due to tss seg limit) */
 1814         common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE;
 1815 
 1816         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 1817         ltr(gsel_tss);
 1818 
 1819         amd64_conf_fast_syscall();
 1820 
 1821         /*
 1822          * We initialize the PCB pointer early so that exception
 1823          * handlers will work.  Also set up td_critnest to short-cut
 1824          * the page fault handler.
 1825          */
 1826         cpu_max_ext_state_size = sizeof(struct savefpu);
 1827         set_top_of_stack_td(&thread0);
 1828         thread0.td_pcb = get_pcb_td(&thread0);
 1829         thread0.td_critnest = 1;
 1830 
 1831         /*
 1832          * The console and kdb should be initialized even earlier than here,
 1833          * but some console drivers don't work until after getmemsize().
 1834          * Default to late console initialization to support these drivers.
 1835          * This loses mainly printf()s in getmemsize() and early debugging.
 1836          */
 1837         late_console = 1;
 1838         TUNABLE_INT_FETCH("debug.late_console", &late_console);
 1839         if (!late_console) {
 1840                 cninit();
 1841                 amd64_kdb_init();
 1842         }
 1843 
 1844         getmemsize(kmdp, physfree);
 1845         init_param2(physmem);
 1846 
 1847         /* now running on new page tables, configured,and u/iom is accessible */
 1848 
 1849 #ifdef DEV_PCI
 1850         /* This call might adjust phys_avail[]. */
 1851         pci_early_quirks();
 1852 #endif
 1853 
 1854         if (late_console)
 1855                 cninit();
 1856 
 1857 #ifdef DEV_ISA
 1858 #ifdef DEV_ATPIC
 1859         elcr_probe();
 1860         atpic_startup();
 1861 #else
 1862         /* Reset and mask the atpics and leave them shut down. */
 1863         atpic_reset();
 1864 
 1865         /*
 1866          * Point the ICU spurious interrupt vectors at the APIC spurious
 1867          * interrupt handler.
 1868          */
 1869         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1870         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1871 #endif
 1872 #else
 1873 #error "have you forgotten the isa device?";
 1874 #endif
 1875 
 1876         if (late_console)
 1877                 amd64_kdb_init();
 1878 
 1879         msgbufinit(msgbufp, msgbufsize);
 1880         fpuinit();
 1881 
 1882         /*
 1883          * Set up thread0 pcb save area after fpuinit calculated fpu save
 1884          * area size.  Zero out the extended state header in fpu save
 1885          * area.
 1886          */
 1887         thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 1888         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 1889         if (use_xsave) {
 1890                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 1891                     1);
 1892                 xhdr->xstate_bv = xsave_mask;
 1893         }
 1894         /* make an initial tss so cpu can get interrupt stack on syscall! */
 1895         rsp0 = thread0.td_md.md_stack_base;
 1896         /* Ensure the stack is aligned to 16 bytes */
 1897         rsp0 &= ~0xFul;
 1898         common_tss[0].tss_rsp0 = rsp0;
 1899         amd64_bsp_pcpu_init2(rsp0);
 1900 
 1901         /* transfer to user mode */
 1902 
 1903         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 1904         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 1905         _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
 1906         _ufssel = GSEL(GUFS32_SEL, SEL_UPL);
 1907         _ugssel = GSEL(GUGS32_SEL, SEL_UPL);
 1908 
 1909         load_ds(_udatasel);
 1910         load_es(_udatasel);
 1911         load_fs(_ufssel);
 1912 
 1913         /* setup proc 0's pcb */
 1914         thread0.td_pcb->pcb_flags = 0;
 1915         thread0.td_frame = &proc0_tf;
 1916 
 1917         env = kern_getenv("kernelname");
 1918         if (env != NULL)
 1919                 strlcpy(kernelname, env, sizeof(kernelname));
 1920 
 1921 #ifdef FDT
 1922         x86_init_fdt();
 1923 #endif
 1924         thread0.td_critnest = 0;
 1925 
 1926         TSEXIT();
 1927 
 1928         /* Location of kernel stack for locore */
 1929         return (thread0.td_md.md_stack_base);
 1930 }
 1931 
 1932 void
 1933 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 1934 {
 1935 
 1936         pcpu->pc_acpi_id = 0xffffffff;
 1937 }
 1938 
 1939 static int
 1940 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1941 {
 1942         struct bios_smap *smapbase;
 1943         struct bios_smap_xattr smap;
 1944         caddr_t kmdp;
 1945         uint32_t *smapattr;
 1946         int count, error, i;
 1947 
 1948         /* Retrieve the system memory map from the loader. */
 1949         kmdp = preload_search_by_type("elf kernel");
 1950         if (kmdp == NULL)
 1951                 kmdp = preload_search_by_type("elf64 kernel");
 1952         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 1953             MODINFO_METADATA | MODINFOMD_SMAP);
 1954         if (smapbase == NULL)
 1955                 return (0);
 1956         smapattr = (uint32_t *)preload_search_info(kmdp,
 1957             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 1958         count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase);
 1959         error = 0;
 1960         for (i = 0; i < count; i++) {
 1961                 smap.base = smapbase[i].base;
 1962                 smap.length = smapbase[i].length;
 1963                 smap.type = smapbase[i].type;
 1964                 if (smapattr != NULL)
 1965                         smap.xattr = smapattr[i];
 1966                 else
 1967                         smap.xattr = 0;
 1968                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 1969         }
 1970         return (error);
 1971 }
 1972 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1973     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 1974 
 1975 static int
 1976 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1977 {
 1978         struct efi_map_header *efihdr;
 1979         caddr_t kmdp;
 1980         uint32_t efisize;
 1981 
 1982         kmdp = preload_search_by_type("elf kernel");
 1983         if (kmdp == NULL)
 1984                 kmdp = preload_search_by_type("elf64 kernel");
 1985         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1986             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1987         if (efihdr == NULL)
 1988                 return (0);
 1989         efisize = *((uint32_t *)efihdr - 1);
 1990         return (SYSCTL_OUT(req, efihdr, efisize));
 1991 }
 1992 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1993     efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map");
 1994 
 1995 void
 1996 spinlock_enter(void)
 1997 {
 1998         struct thread *td;
 1999         register_t flags;
 2000 
 2001         td = curthread;
 2002         if (td->td_md.md_spinlock_count == 0) {
 2003                 flags = intr_disable();
 2004                 td->td_md.md_spinlock_count = 1;
 2005                 td->td_md.md_saved_flags = flags;
 2006                 critical_enter();
 2007         } else
 2008                 td->td_md.md_spinlock_count++;
 2009 }
 2010 
 2011 void
 2012 spinlock_exit(void)
 2013 {
 2014         struct thread *td;
 2015         register_t flags;
 2016 
 2017         td = curthread;
 2018         flags = td->td_md.md_saved_flags;
 2019         td->td_md.md_spinlock_count--;
 2020         if (td->td_md.md_spinlock_count == 0) {
 2021                 critical_exit();
 2022                 intr_restore(flags);
 2023         }
 2024 }
 2025 
 2026 /*
 2027  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2028  * we want to start a backtrace from the function that caused us to enter
 2029  * the debugger. We have the context in the trapframe, but base the trace
 2030  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2031  * enough for a backtrace.
 2032  */
 2033 void
 2034 makectx(struct trapframe *tf, struct pcb *pcb)
 2035 {
 2036 
 2037         pcb->pcb_r12 = tf->tf_r12;
 2038         pcb->pcb_r13 = tf->tf_r13;
 2039         pcb->pcb_r14 = tf->tf_r14;
 2040         pcb->pcb_r15 = tf->tf_r15;
 2041         pcb->pcb_rbp = tf->tf_rbp;
 2042         pcb->pcb_rbx = tf->tf_rbx;
 2043         pcb->pcb_rip = tf->tf_rip;
 2044         pcb->pcb_rsp = tf->tf_rsp;
 2045 }
 2046 
 2047 int
 2048 ptrace_set_pc(struct thread *td, unsigned long addr)
 2049 {
 2050 
 2051         td->td_frame->tf_rip = addr;
 2052         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2053         return (0);
 2054 }
 2055 
 2056 int
 2057 ptrace_single_step(struct thread *td)
 2058 {
 2059 
 2060         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2061         if ((td->td_frame->tf_rflags & PSL_T) == 0) {
 2062                 td->td_frame->tf_rflags |= PSL_T;
 2063                 td->td_dbgflags |= TDB_STEP;
 2064         }
 2065         return (0);
 2066 }
 2067 
 2068 int
 2069 ptrace_clear_single_step(struct thread *td)
 2070 {
 2071 
 2072         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2073         td->td_frame->tf_rflags &= ~PSL_T;
 2074         td->td_dbgflags &= ~TDB_STEP;
 2075         return (0);
 2076 }
 2077 
 2078 int
 2079 fill_regs(struct thread *td, struct reg *regs)
 2080 {
 2081         struct trapframe *tp;
 2082 
 2083         tp = td->td_frame;
 2084         return (fill_frame_regs(tp, regs));
 2085 }
 2086 
 2087 int
 2088 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2089 {
 2090 
 2091         regs->r_r15 = tp->tf_r15;
 2092         regs->r_r14 = tp->tf_r14;
 2093         regs->r_r13 = tp->tf_r13;
 2094         regs->r_r12 = tp->tf_r12;
 2095         regs->r_r11 = tp->tf_r11;
 2096         regs->r_r10 = tp->tf_r10;
 2097         regs->r_r9  = tp->tf_r9;
 2098         regs->r_r8  = tp->tf_r8;
 2099         regs->r_rdi = tp->tf_rdi;
 2100         regs->r_rsi = tp->tf_rsi;
 2101         regs->r_rbp = tp->tf_rbp;
 2102         regs->r_rbx = tp->tf_rbx;
 2103         regs->r_rdx = tp->tf_rdx;
 2104         regs->r_rcx = tp->tf_rcx;
 2105         regs->r_rax = tp->tf_rax;
 2106         regs->r_rip = tp->tf_rip;
 2107         regs->r_cs = tp->tf_cs;
 2108         regs->r_rflags = tp->tf_rflags;
 2109         regs->r_rsp = tp->tf_rsp;
 2110         regs->r_ss = tp->tf_ss;
 2111         if (tp->tf_flags & TF_HASSEGS) {
 2112                 regs->r_ds = tp->tf_ds;
 2113                 regs->r_es = tp->tf_es;
 2114                 regs->r_fs = tp->tf_fs;
 2115                 regs->r_gs = tp->tf_gs;
 2116         } else {
 2117                 regs->r_ds = 0;
 2118                 regs->r_es = 0;
 2119                 regs->r_fs = 0;
 2120                 regs->r_gs = 0;
 2121         }
 2122         regs->r_err = 0;
 2123         regs->r_trapno = 0;
 2124         return (0);
 2125 }
 2126 
 2127 int
 2128 set_regs(struct thread *td, struct reg *regs)
 2129 {
 2130         struct trapframe *tp;
 2131         register_t rflags;
 2132 
 2133         tp = td->td_frame;
 2134         rflags = regs->r_rflags & 0xffffffff;
 2135         if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 2136                 return (EINVAL);
 2137         tp->tf_r15 = regs->r_r15;
 2138         tp->tf_r14 = regs->r_r14;
 2139         tp->tf_r13 = regs->r_r13;
 2140         tp->tf_r12 = regs->r_r12;
 2141         tp->tf_r11 = regs->r_r11;
 2142         tp->tf_r10 = regs->r_r10;
 2143         tp->tf_r9  = regs->r_r9;
 2144         tp->tf_r8  = regs->r_r8;
 2145         tp->tf_rdi = regs->r_rdi;
 2146         tp->tf_rsi = regs->r_rsi;
 2147         tp->tf_rbp = regs->r_rbp;
 2148         tp->tf_rbx = regs->r_rbx;
 2149         tp->tf_rdx = regs->r_rdx;
 2150         tp->tf_rcx = regs->r_rcx;
 2151         tp->tf_rax = regs->r_rax;
 2152         tp->tf_rip = regs->r_rip;
 2153         tp->tf_cs = regs->r_cs;
 2154         tp->tf_rflags = rflags;
 2155         tp->tf_rsp = regs->r_rsp;
 2156         tp->tf_ss = regs->r_ss;
 2157         if (0) {        /* XXXKIB */
 2158                 tp->tf_ds = regs->r_ds;
 2159                 tp->tf_es = regs->r_es;
 2160                 tp->tf_fs = regs->r_fs;
 2161                 tp->tf_gs = regs->r_gs;
 2162                 tp->tf_flags = TF_HASSEGS;
 2163         }
 2164         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2165         return (0);
 2166 }
 2167 
 2168 /* XXX check all this stuff! */
 2169 /* externalize from sv_xmm */
 2170 static void
 2171 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 2172 {
 2173         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2174         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2175         int i;
 2176 
 2177         /* pcb -> fpregs */
 2178         bzero(fpregs, sizeof(*fpregs));
 2179 
 2180         /* FPU control/status */
 2181         penv_fpreg->en_cw = penv_xmm->en_cw;
 2182         penv_fpreg->en_sw = penv_xmm->en_sw;
 2183         penv_fpreg->en_tw = penv_xmm->en_tw;
 2184         penv_fpreg->en_opcode = penv_xmm->en_opcode;
 2185         penv_fpreg->en_rip = penv_xmm->en_rip;
 2186         penv_fpreg->en_rdp = penv_xmm->en_rdp;
 2187         penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 2188         penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 2189 
 2190         /* FPU registers */
 2191         for (i = 0; i < 8; ++i)
 2192                 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 2193 
 2194         /* SSE registers */
 2195         for (i = 0; i < 16; ++i)
 2196                 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 2197 }
 2198 
 2199 /* internalize from fpregs into sv_xmm */
 2200 static void
 2201 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 2202 {
 2203         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2204         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2205         int i;
 2206 
 2207         /* fpregs -> pcb */
 2208         /* FPU control/status */
 2209         penv_xmm->en_cw = penv_fpreg->en_cw;
 2210         penv_xmm->en_sw = penv_fpreg->en_sw;
 2211         penv_xmm->en_tw = penv_fpreg->en_tw;
 2212         penv_xmm->en_opcode = penv_fpreg->en_opcode;
 2213         penv_xmm->en_rip = penv_fpreg->en_rip;
 2214         penv_xmm->en_rdp = penv_fpreg->en_rdp;
 2215         penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 2216         penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 2217 
 2218         /* FPU registers */
 2219         for (i = 0; i < 8; ++i)
 2220                 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 2221 
 2222         /* SSE registers */
 2223         for (i = 0; i < 16; ++i)
 2224                 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 2225 }
 2226 
 2227 /* externalize from td->pcb */
 2228 int
 2229 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2230 {
 2231 
 2232         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2233             P_SHOULDSTOP(td->td_proc),
 2234             ("not suspended thread %p", td));
 2235         fpugetregs(td);
 2236         fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 2237         return (0);
 2238 }
 2239 
 2240 /* internalize to td->pcb */
 2241 int
 2242 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2243 {
 2244 
 2245         critical_enter();
 2246         set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 2247         fpuuserinited(td);
 2248         critical_exit();
 2249         return (0);
 2250 }
 2251 
 2252 /*
 2253  * Get machine context.
 2254  */
 2255 int
 2256 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2257 {
 2258         struct pcb *pcb;
 2259         struct trapframe *tp;
 2260 
 2261         pcb = td->td_pcb;
 2262         tp = td->td_frame;
 2263         PROC_LOCK(curthread->td_proc);
 2264         mcp->mc_onstack = sigonstack(tp->tf_rsp);
 2265         PROC_UNLOCK(curthread->td_proc);
 2266         mcp->mc_r15 = tp->tf_r15;
 2267         mcp->mc_r14 = tp->tf_r14;
 2268         mcp->mc_r13 = tp->tf_r13;
 2269         mcp->mc_r12 = tp->tf_r12;
 2270         mcp->mc_r11 = tp->tf_r11;
 2271         mcp->mc_r10 = tp->tf_r10;
 2272         mcp->mc_r9  = tp->tf_r9;
 2273         mcp->mc_r8  = tp->tf_r8;
 2274         mcp->mc_rdi = tp->tf_rdi;
 2275         mcp->mc_rsi = tp->tf_rsi;
 2276         mcp->mc_rbp = tp->tf_rbp;
 2277         mcp->mc_rbx = tp->tf_rbx;
 2278         mcp->mc_rcx = tp->tf_rcx;
 2279         mcp->mc_rflags = tp->tf_rflags;
 2280         if (flags & GET_MC_CLEAR_RET) {
 2281                 mcp->mc_rax = 0;
 2282                 mcp->mc_rdx = 0;
 2283                 mcp->mc_rflags &= ~PSL_C;
 2284         } else {
 2285                 mcp->mc_rax = tp->tf_rax;
 2286                 mcp->mc_rdx = tp->tf_rdx;
 2287         }
 2288         mcp->mc_rip = tp->tf_rip;
 2289         mcp->mc_cs = tp->tf_cs;
 2290         mcp->mc_rsp = tp->tf_rsp;
 2291         mcp->mc_ss = tp->tf_ss;
 2292         mcp->mc_ds = tp->tf_ds;
 2293         mcp->mc_es = tp->tf_es;
 2294         mcp->mc_fs = tp->tf_fs;
 2295         mcp->mc_gs = tp->tf_gs;
 2296         mcp->mc_flags = tp->tf_flags;
 2297         mcp->mc_len = sizeof(*mcp);
 2298         get_fpcontext(td, mcp, NULL, 0);
 2299         update_pcb_bases(pcb);
 2300         mcp->mc_fsbase = pcb->pcb_fsbase;
 2301         mcp->mc_gsbase = pcb->pcb_gsbase;
 2302         mcp->mc_xfpustate = 0;
 2303         mcp->mc_xfpustate_len = 0;
 2304         bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 2305         return (0);
 2306 }
 2307 
 2308 /*
 2309  * Set machine context.
 2310  *
 2311  * However, we don't set any but the user modifiable flags, and we won't
 2312  * touch the cs selector.
 2313  */
 2314 int
 2315 set_mcontext(struct thread *td, mcontext_t *mcp)
 2316 {
 2317         struct pcb *pcb;
 2318         struct trapframe *tp;
 2319         char *xfpustate;
 2320         long rflags;
 2321         int ret;
 2322 
 2323         pcb = td->td_pcb;
 2324         tp = td->td_frame;
 2325         if (mcp->mc_len != sizeof(*mcp) ||
 2326             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 2327                 return (EINVAL);
 2328         rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 2329             (tp->tf_rflags & ~PSL_USERCHANGE);
 2330         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 2331                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 2332                     sizeof(struct savefpu))
 2333                         return (EINVAL);
 2334                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 2335                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 2336                     mcp->mc_xfpustate_len);
 2337                 if (ret != 0)
 2338                         return (ret);
 2339         } else
 2340                 xfpustate = NULL;
 2341         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 2342         if (ret != 0)
 2343                 return (ret);
 2344         tp->tf_r15 = mcp->mc_r15;
 2345         tp->tf_r14 = mcp->mc_r14;
 2346         tp->tf_r13 = mcp->mc_r13;
 2347         tp->tf_r12 = mcp->mc_r12;
 2348         tp->tf_r11 = mcp->mc_r11;
 2349         tp->tf_r10 = mcp->mc_r10;
 2350         tp->tf_r9  = mcp->mc_r9;
 2351         tp->tf_r8  = mcp->mc_r8;
 2352         tp->tf_rdi = mcp->mc_rdi;
 2353         tp->tf_rsi = mcp->mc_rsi;
 2354         tp->tf_rbp = mcp->mc_rbp;
 2355         tp->tf_rbx = mcp->mc_rbx;
 2356         tp->tf_rdx = mcp->mc_rdx;
 2357         tp->tf_rcx = mcp->mc_rcx;
 2358         tp->tf_rax = mcp->mc_rax;
 2359         tp->tf_rip = mcp->mc_rip;
 2360         tp->tf_rflags = rflags;
 2361         tp->tf_rsp = mcp->mc_rsp;
 2362         tp->tf_ss = mcp->mc_ss;
 2363         tp->tf_flags = mcp->mc_flags;
 2364         if (tp->tf_flags & TF_HASSEGS) {
 2365                 tp->tf_ds = mcp->mc_ds;
 2366                 tp->tf_es = mcp->mc_es;
 2367                 tp->tf_fs = mcp->mc_fs;
 2368                 tp->tf_gs = mcp->mc_gs;
 2369         }
 2370         set_pcb_flags(pcb, PCB_FULL_IRET);
 2371         if (mcp->mc_flags & _MC_HASBASES) {
 2372                 pcb->pcb_fsbase = mcp->mc_fsbase;
 2373                 pcb->pcb_gsbase = mcp->mc_gsbase;
 2374         }
 2375         return (0);
 2376 }
 2377 
 2378 static void
 2379 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 2380     size_t xfpusave_len)
 2381 {
 2382         size_t max_len, len;
 2383 
 2384         mcp->mc_ownedfp = fpugetregs(td);
 2385         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 2386             sizeof(mcp->mc_fpstate));
 2387         mcp->mc_fpformat = fpuformat();
 2388         if (!use_xsave || xfpusave_len == 0)
 2389                 return;
 2390         max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 2391         len = xfpusave_len;
 2392         if (len > max_len) {
 2393                 len = max_len;
 2394                 bzero(xfpusave + max_len, len - max_len);
 2395         }
 2396         mcp->mc_flags |= _MC_HASFPXSTATE;
 2397         mcp->mc_xfpustate_len = len;
 2398         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 2399 }
 2400 
 2401 static int
 2402 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 2403     size_t xfpustate_len)
 2404 {
 2405         int error;
 2406 
 2407         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 2408                 return (0);
 2409         else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 2410                 return (EINVAL);
 2411         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 2412                 /* We don't care what state is left in the FPU or PCB. */
 2413                 fpstate_drop(td);
 2414                 error = 0;
 2415         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 2416             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 2417                 error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
 2418                     xfpustate, xfpustate_len);
 2419         } else
 2420                 return (EINVAL);
 2421         return (error);
 2422 }
 2423 
 2424 void
 2425 fpstate_drop(struct thread *td)
 2426 {
 2427 
 2428         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 2429         critical_enter();
 2430         if (PCPU_GET(fpcurthread) == td)
 2431                 fpudrop();
 2432         /*
 2433          * XXX force a full drop of the fpu.  The above only drops it if we
 2434          * owned it.
 2435          *
 2436          * XXX I don't much like fpugetuserregs()'s semantics of doing a full
 2437          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 2438          * We only need to drop to !PCB_INITDONE in sendsig().  But
 2439          * sendsig() is the only caller of fpugetuserregs()... perhaps we just
 2440          * have too many layers.
 2441          */
 2442         clear_pcb_flags(curthread->td_pcb,
 2443             PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 2444         critical_exit();
 2445 }
 2446 
 2447 int
 2448 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 2449 {
 2450         struct pcb *pcb;
 2451 
 2452         if (td == NULL) {
 2453                 dbregs->dr[0] = rdr0();
 2454                 dbregs->dr[1] = rdr1();
 2455                 dbregs->dr[2] = rdr2();
 2456                 dbregs->dr[3] = rdr3();
 2457                 dbregs->dr[6] = rdr6();
 2458                 dbregs->dr[7] = rdr7();
 2459         } else {
 2460                 pcb = td->td_pcb;
 2461                 dbregs->dr[0] = pcb->pcb_dr0;
 2462                 dbregs->dr[1] = pcb->pcb_dr1;
 2463                 dbregs->dr[2] = pcb->pcb_dr2;
 2464                 dbregs->dr[3] = pcb->pcb_dr3;
 2465                 dbregs->dr[6] = pcb->pcb_dr6;
 2466                 dbregs->dr[7] = pcb->pcb_dr7;
 2467         }
 2468         dbregs->dr[4] = 0;
 2469         dbregs->dr[5] = 0;
 2470         dbregs->dr[8] = 0;
 2471         dbregs->dr[9] = 0;
 2472         dbregs->dr[10] = 0;
 2473         dbregs->dr[11] = 0;
 2474         dbregs->dr[12] = 0;
 2475         dbregs->dr[13] = 0;
 2476         dbregs->dr[14] = 0;
 2477         dbregs->dr[15] = 0;
 2478         return (0);
 2479 }
 2480 
 2481 int
 2482 set_dbregs(struct thread *td, struct dbreg *dbregs)
 2483 {
 2484         struct pcb *pcb;
 2485         int i;
 2486 
 2487         if (td == NULL) {
 2488                 load_dr0(dbregs->dr[0]);
 2489                 load_dr1(dbregs->dr[1]);
 2490                 load_dr2(dbregs->dr[2]);
 2491                 load_dr3(dbregs->dr[3]);
 2492                 load_dr6(dbregs->dr[6]);
 2493                 load_dr7(dbregs->dr[7]);
 2494         } else {
 2495                 /*
 2496                  * Don't let an illegal value for dr7 get set.  Specifically,
 2497                  * check for undefined settings.  Setting these bit patterns
 2498                  * result in undefined behaviour and can lead to an unexpected
 2499                  * TRCTRAP or a general protection fault right here.
 2500                  * Upper bits of dr6 and dr7 must not be set
 2501                  */
 2502                 for (i = 0; i < 4; i++) {
 2503                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 2504                                 return (EINVAL);
 2505                         if (td->td_frame->tf_cs == _ucode32sel &&
 2506                             DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
 2507                                 return (EINVAL);
 2508                 }
 2509                 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 2510                     (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 2511                         return (EINVAL);
 2512 
 2513                 pcb = td->td_pcb;
 2514 
 2515                 /*
 2516                  * Don't let a process set a breakpoint that is not within the
 2517                  * process's address space.  If a process could do this, it
 2518                  * could halt the system by setting a breakpoint in the kernel
 2519                  * (if ddb was enabled).  Thus, we need to check to make sure
 2520                  * that no breakpoints are being enabled for addresses outside
 2521                  * process's address space.
 2522                  *
 2523                  * XXX - what about when the watched area of the user's
 2524                  * address space is written into from within the kernel
 2525                  * ... wouldn't that still cause a breakpoint to be generated
 2526                  * from within kernel mode?
 2527                  */
 2528 
 2529                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 2530                         /* dr0 is enabled */
 2531                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 2532                                 return (EINVAL);
 2533                 }
 2534                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 2535                         /* dr1 is enabled */
 2536                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 2537                                 return (EINVAL);
 2538                 }
 2539                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 2540                         /* dr2 is enabled */
 2541                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 2542                                 return (EINVAL);
 2543                 }
 2544                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 2545                         /* dr3 is enabled */
 2546                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 2547                                 return (EINVAL);
 2548                 }
 2549 
 2550                 pcb->pcb_dr0 = dbregs->dr[0];
 2551                 pcb->pcb_dr1 = dbregs->dr[1];
 2552                 pcb->pcb_dr2 = dbregs->dr[2];
 2553                 pcb->pcb_dr3 = dbregs->dr[3];
 2554                 pcb->pcb_dr6 = dbregs->dr[6];
 2555                 pcb->pcb_dr7 = dbregs->dr[7];
 2556 
 2557                 set_pcb_flags(pcb, PCB_DBREGS);
 2558         }
 2559 
 2560         return (0);
 2561 }
 2562 
 2563 void
 2564 reset_dbregs(void)
 2565 {
 2566 
 2567         load_dr7(0);    /* Turn off the control bits first */
 2568         load_dr0(0);
 2569         load_dr1(0);
 2570         load_dr2(0);
 2571         load_dr3(0);
 2572         load_dr6(0);
 2573 }
 2574 
 2575 /*
 2576  * Return > 0 if a hardware breakpoint has been hit, and the
 2577  * breakpoint was in user space.  Return 0, otherwise.
 2578  */
 2579 int
 2580 user_dbreg_trap(register_t dr6)
 2581 {
 2582         u_int64_t dr7;
 2583         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
 2584         int nbp;            /* number of breakpoints that triggered */
 2585         caddr_t addr[4];    /* breakpoint addresses */
 2586         int i;
 2587 
 2588         bp = dr6 & DBREG_DR6_BMASK;
 2589         if (bp == 0) {
 2590                 /*
 2591                  * None of the breakpoint bits are set meaning this
 2592                  * trap was not caused by any of the debug registers
 2593                  */
 2594                 return 0;
 2595         }
 2596 
 2597         dr7 = rdr7();
 2598         if ((dr7 & 0x000000ff) == 0) {
 2599                 /*
 2600                  * all GE and LE bits in the dr7 register are zero,
 2601                  * thus the trap couldn't have been caused by the
 2602                  * hardware debug registers
 2603                  */
 2604                 return 0;
 2605         }
 2606 
 2607         nbp = 0;
 2608 
 2609         /*
 2610          * at least one of the breakpoints were hit, check to see
 2611          * which ones and if any of them are user space addresses
 2612          */
 2613 
 2614         if (bp & 0x01) {
 2615                 addr[nbp++] = (caddr_t)rdr0();
 2616         }
 2617         if (bp & 0x02) {
 2618                 addr[nbp++] = (caddr_t)rdr1();
 2619         }
 2620         if (bp & 0x04) {
 2621                 addr[nbp++] = (caddr_t)rdr2();
 2622         }
 2623         if (bp & 0x08) {
 2624                 addr[nbp++] = (caddr_t)rdr3();
 2625         }
 2626 
 2627         for (i = 0; i < nbp; i++) {
 2628                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 2629                         /*
 2630                          * addr[i] is in user space
 2631                          */
 2632                         return nbp;
 2633                 }
 2634         }
 2635 
 2636         /*
 2637          * None of the breakpoints are in user space.
 2638          */
 2639         return 0;
 2640 }
 2641 
 2642 /*
 2643  * The pcb_flags is only modified by current thread, or by other threads
 2644  * when current thread is stopped.  However, current thread may change it
 2645  * from the interrupt context in cpu_switch(), or in the trap handler.
 2646  * When we read-modify-write pcb_flags from C sources, compiler may generate
 2647  * code that is not atomic regarding the interrupt handler.  If a trap or
 2648  * interrupt happens and any flag is modified from the handler, it can be
 2649  * clobbered with the cached value later.  Therefore, we implement setting
 2650  * and clearing flags with single-instruction functions, which do not race
 2651  * with possible modification of the flags from the trap or interrupt context,
 2652  * because traps and interrupts are executed only on instruction boundary.
 2653  */
 2654 void
 2655 set_pcb_flags_raw(struct pcb *pcb, const u_int flags)
 2656 {
 2657 
 2658         __asm __volatile("orl %1,%0"
 2659             : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
 2660             : "cc", "memory");
 2661 
 2662 }
 2663 
 2664 /*
 2665  * The support for RDFSBASE, WRFSBASE and similar instructions for %gs
 2666  * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into
 2667  * pcb if user space modified the bases.  We must save on the context
 2668  * switch or if the return to usermode happens through the doreti.
 2669  *
 2670  * Tracking of both events is performed by the pcb flag PCB_FULL_IRET,
 2671  * which have a consequence that the base MSRs must be saved each time
 2672  * the PCB_FULL_IRET flag is set.  We disable interrupts to sync with
 2673  * context switches.
 2674  */
 2675 static void
 2676 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags)
 2677 {
 2678         register_t r;
 2679 
 2680         if (curpcb == pcb &&
 2681             (flags & PCB_FULL_IRET) != 0 &&
 2682             (pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2683                 r = intr_disable();
 2684                 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2685                         if (rfs() == _ufssel)
 2686                                 pcb->pcb_fsbase = rdfsbase();
 2687                         if (rgs() == _ugssel)
 2688                                 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE);
 2689                 }
 2690                 set_pcb_flags_raw(pcb, flags);
 2691                 intr_restore(r);
 2692         } else {
 2693                 set_pcb_flags_raw(pcb, flags);
 2694         }
 2695 }
 2696 
 2697 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int), static)
 2698 {
 2699 
 2700         return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ?
 2701             set_pcb_flags_fsgsbase : set_pcb_flags_raw);
 2702 }
 2703 
 2704 void
 2705 clear_pcb_flags(struct pcb *pcb, const u_int flags)
 2706 {
 2707 
 2708         __asm __volatile("andl %1,%0"
 2709             : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
 2710             : "cc", "memory");
 2711 }
 2712 
 2713 #ifdef KDB
 2714 
 2715 /*
 2716  * Provide inb() and outb() as functions.  They are normally only available as
 2717  * inline functions, thus cannot be called from the debugger.
 2718  */
 2719 
 2720 /* silence compiler warnings */
 2721 u_char inb_(u_short);
 2722 void outb_(u_short, u_char);
 2723 
 2724 u_char
 2725 inb_(u_short port)
 2726 {
 2727         return inb(port);
 2728 }
 2729 
 2730 void
 2731 outb_(u_short port, u_char data)
 2732 {
 2733         outb(port, data);
 2734 }
 2735 
 2736 #endif /* KDB */
 2737 
 2738 #undef memset
 2739 #undef memmove
 2740 #undef memcpy
 2741 
 2742 void    *memset_std(void *buf, int c, size_t len);
 2743 void    *memset_erms(void *buf, int c, size_t len);
 2744 DEFINE_IFUNC(, void *, memset, (void *, int, size_t), static)
 2745 {
 2746 
 2747         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2748             memset_erms : memset_std);
 2749 }
 2750 
 2751 void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
 2752             size_t len);
 2753 void    *memmove_erms(void * _Nonnull dst, const void * _Nonnull src,
 2754             size_t len);
 2755 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
 2756     size_t), static)
 2757 {
 2758 
 2759         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2760             memmove_erms : memmove_std);
 2761 }
 2762 
 2763 void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
 2764             size_t len);
 2765 void    *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src,
 2766             size_t len);
 2767 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t),
 2768     static)
 2769 {
 2770 
 2771         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2772             memcpy_erms : memcpy_std);
 2773 }
 2774 
 2775 void    pagezero_std(void *addr);
 2776 void    pagezero_erms(void *addr);
 2777 DEFINE_IFUNC(, void , pagezero, (void *), static)
 2778 {
 2779 
 2780         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2781             pagezero_erms : pagezero_std);
 2782 }

Cache object: b25f8d4c88c0f10f0678f2acdec90236


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.