The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/machdep.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 2003 Peter Wemm.
    5  * Copyright (c) 1992 Terrence R. Lambert.
    6  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * William Jolitz.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by the University of
   23  *      California, Berkeley and its contributors.
   24  * 4. Neither the name of the University nor the names of its contributors
   25  *    may be used to endorse or promote products derived from this software
   26  *    without specific prior written permission.
   27  *
   28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   38  * SUCH DAMAGE.
   39  *
   40  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   41  */
   42 
   43 #include <sys/cdefs.h>
   44 __FBSDID("$FreeBSD: stable/12/sys/amd64/amd64/machdep.c 341490 2018-12-04 19:06:11Z markj $");
   45 
   46 #include "opt_atpic.h"
   47 #include "opt_cpu.h"
   48 #include "opt_ddb.h"
   49 #include "opt_inet.h"
   50 #include "opt_isa.h"
   51 #include "opt_kstack_pages.h"
   52 #include "opt_maxmem.h"
   53 #include "opt_mp_watchdog.h"
   54 #include "opt_pci.h"
   55 #include "opt_platform.h"
   56 #include "opt_sched.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/proc.h>
   60 #include <sys/systm.h>
   61 #include <sys/bio.h>
   62 #include <sys/buf.h>
   63 #include <sys/bus.h>
   64 #include <sys/callout.h>
   65 #include <sys/cons.h>
   66 #include <sys/cpu.h>
   67 #include <sys/efi.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/exec.h>
   70 #include <sys/imgact.h>
   71 #include <sys/kdb.h>
   72 #include <sys/kernel.h>
   73 #include <sys/ktr.h>
   74 #include <sys/linker.h>
   75 #include <sys/lock.h>
   76 #include <sys/malloc.h>
   77 #include <sys/memrange.h>
   78 #include <sys/msgbuf.h>
   79 #include <sys/mutex.h>
   80 #include <sys/pcpu.h>
   81 #include <sys/ptrace.h>
   82 #include <sys/reboot.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/sched.h>
   85 #include <sys/signalvar.h>
   86 #ifdef SMP
   87 #include <sys/smp.h>
   88 #endif
   89 #include <sys/syscallsubr.h>
   90 #include <sys/sysctl.h>
   91 #include <sys/sysent.h>
   92 #include <sys/sysproto.h>
   93 #include <sys/ucontext.h>
   94 #include <sys/vmmeter.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_extern.h>
   98 #include <vm/vm_kern.h>
   99 #include <vm/vm_page.h>
  100 #include <vm/vm_map.h>
  101 #include <vm/vm_object.h>
  102 #include <vm/vm_pager.h>
  103 #include <vm/vm_param.h>
  104 #include <vm/vm_phys.h>
  105 
  106 #ifdef DDB
  107 #ifndef KDB
  108 #error KDB must be enabled in order for DDB to work!
  109 #endif
  110 #include <ddb/ddb.h>
  111 #include <ddb/db_sym.h>
  112 #endif
  113 
  114 #include <net/netisr.h>
  115 
  116 #include <machine/clock.h>
  117 #include <machine/cpu.h>
  118 #include <machine/cputypes.h>
  119 #include <machine/frame.h>
  120 #include <machine/intr_machdep.h>
  121 #include <x86/mca.h>
  122 #include <machine/md_var.h>
  123 #include <machine/metadata.h>
  124 #include <machine/mp_watchdog.h>
  125 #include <machine/pc/bios.h>
  126 #include <machine/pcb.h>
  127 #include <machine/proc.h>
  128 #include <machine/reg.h>
  129 #include <machine/sigframe.h>
  130 #include <machine/specialreg.h>
  131 #include <machine/trap.h>
  132 #include <machine/tss.h>
  133 #include <x86/ucode.h>
  134 #include <x86/ifunc.h>
  135 #ifdef SMP
  136 #include <machine/smp.h>
  137 #endif
  138 #ifdef FDT
  139 #include <x86/fdt.h>
  140 #endif
  141 
  142 #ifdef DEV_ATPIC
  143 #include <x86/isa/icu.h>
  144 #else
  145 #include <x86/apicvar.h>
  146 #endif
  147 
  148 #include <isa/isareg.h>
  149 #include <isa/rtc.h>
  150 #include <x86/init.h>
  151 
  152 /* Sanity check for __curthread() */
  153 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  154 
  155 /*
  156  * The PTI trampoline stack needs enough space for a hardware trapframe and a
  157  * couple of scratch registers, as well as the trapframe left behind after an
  158  * iret fault.
  159  */
  160 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) -
  161     offsetof(struct pti_frame, pti_rip));
  162 
  163 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
  164 
  165 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  166 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  167 
  168 static void cpu_startup(void *);
  169 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  170     char *xfpusave, size_t xfpusave_len);
  171 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  172     char *xfpustate, size_t xfpustate_len);
  173 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  174 
  175 /* Preload data parse function */
  176 static caddr_t native_parse_preload_data(u_int64_t);
  177 
  178 /* Native function to fetch and parse the e820 map */
  179 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *);
  180 
  181 /* Default init_ops implementation. */
  182 struct init_ops init_ops = {
  183         .parse_preload_data =   native_parse_preload_data,
  184         .early_clock_source_init =      i8254_init,
  185         .early_delay =                  i8254_delay,
  186         .parse_memmap =                 native_parse_memmap,
  187 #ifdef SMP
  188         .mp_bootaddress =               mp_bootaddress,
  189         .start_all_aps =                native_start_all_aps,
  190 #endif
  191 #ifdef DEV_PCI
  192         .msi_init =                     msi_init,
  193 #endif
  194 };
  195 
  196 /*
  197  * Physical address of the EFI System Table. Stashed from the metadata hints
  198  * passed into the kernel and used by the EFI code to call runtime services.
  199  */
  200 vm_paddr_t efi_systbl_phys;
  201 
  202 /* Intel ICH registers */
  203 #define ICH_PMBASE      0x400
  204 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  205 
  206 int     _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
  207 
  208 int cold = 1;
  209 
  210 long Maxmem = 0;
  211 long realmem = 0;
  212 
  213 /*
  214  * The number of PHYSMAP entries must be one less than the number of
  215  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  216  * physical address that is accessible by ISA DMA is split into two
  217  * PHYSSEG entries.
  218  */
  219 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  220 
  221 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  222 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  223 
  224 /* must be 2 less so 0 0 can signal end of chunks */
  225 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
  226 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
  227 
  228 struct kva_md_info kmi;
  229 
  230 static struct trapframe proc0_tf;
  231 struct region_descriptor r_gdt, r_idt;
  232 
  233 struct pcpu __pcpu[MAXCPU];
  234 
  235 struct mtx icu_lock;
  236 
  237 struct mem_range_softc mem_range_softc;
  238 
  239 struct mtx dt_lock;     /* lock for GDT and LDT */
  240 
  241 void (*vmm_resume_p)(void);
  242 
  243 static void
  244 cpu_startup(dummy)
  245         void *dummy;
  246 {
  247         uintmax_t memsize;
  248         char *sysenv;
  249 
  250         /*
  251          * On MacBooks, we need to disallow the legacy USB circuit to
  252          * generate an SMI# because this can cause several problems,
  253          * namely: incorrect CPU frequency detection and failure to
  254          * start the APs.
  255          * We do this by disabling a bit in the SMI_EN (SMI Control and
  256          * Enable register) of the Intel ICH LPC Interface Bridge. 
  257          */
  258         sysenv = kern_getenv("smbios.system.product");
  259         if (sysenv != NULL) {
  260                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  261                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  262                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  263                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  264                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  265                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  266                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  267                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  268                         if (bootverbose)
  269                                 printf("Disabling LEGACY_USB_EN bit on "
  270                                     "Intel ICH.\n");
  271                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  272                 }
  273                 freeenv(sysenv);
  274         }
  275 
  276         /*
  277          * Good {morning,afternoon,evening,night}.
  278          */
  279         startrtclock();
  280         printcpuinfo();
  281 
  282         /*
  283          * Display physical memory if SMBIOS reports reasonable amount.
  284          */
  285         memsize = 0;
  286         sysenv = kern_getenv("smbios.memory.enabled");
  287         if (sysenv != NULL) {
  288                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  289                 freeenv(sysenv);
  290         }
  291         if (memsize < ptoa((uintmax_t)vm_free_count()))
  292                 memsize = ptoa((uintmax_t)Maxmem);
  293         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  294         realmem = atop(memsize);
  295 
  296         /*
  297          * Display any holes after the first chunk of extended memory.
  298          */
  299         if (bootverbose) {
  300                 int indx;
  301 
  302                 printf("Physical memory chunk(s):\n");
  303                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  304                         vm_paddr_t size;
  305 
  306                         size = phys_avail[indx + 1] - phys_avail[indx];
  307                         printf(
  308                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  309                             (uintmax_t)phys_avail[indx],
  310                             (uintmax_t)phys_avail[indx + 1] - 1,
  311                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  312                 }
  313         }
  314 
  315         vm_ksubmap_init(&kmi);
  316 
  317         printf("avail memory = %ju (%ju MB)\n",
  318             ptoa((uintmax_t)vm_free_count()),
  319             ptoa((uintmax_t)vm_free_count()) / 1048576);
  320 #ifdef DEV_PCI
  321         if (bootverbose && intel_graphics_stolen_base != 0)
  322                 printf("intel stolen mem: base %#jx size %ju MB\n",
  323                     (uintmax_t)intel_graphics_stolen_base,
  324                     (uintmax_t)intel_graphics_stolen_size / 1024 / 1024);
  325 #endif
  326 
  327         /*
  328          * Set up buffers, so they can be used to read disk labels.
  329          */
  330         bufinit();
  331         vm_pager_bufferinit();
  332 
  333         cpu_setregs();
  334 }
  335 
  336 /*
  337  * Send an interrupt to process.
  338  *
  339  * Stack is set up to allow sigcode stored
  340  * at top to call routine, followed by call
  341  * to sigreturn routine below.  After sigreturn
  342  * resets the signal mask, the stack, and the
  343  * frame pointer, it returns to the user
  344  * specified pc, psl.
  345  */
  346 void
  347 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  348 {
  349         struct sigframe sf, *sfp;
  350         struct pcb *pcb;
  351         struct proc *p;
  352         struct thread *td;
  353         struct sigacts *psp;
  354         char *sp;
  355         struct trapframe *regs;
  356         char *xfpusave;
  357         size_t xfpusave_len;
  358         int sig;
  359         int oonstack;
  360 
  361         td = curthread;
  362         pcb = td->td_pcb;
  363         p = td->td_proc;
  364         PROC_LOCK_ASSERT(p, MA_OWNED);
  365         sig = ksi->ksi_signo;
  366         psp = p->p_sigacts;
  367         mtx_assert(&psp->ps_mtx, MA_OWNED);
  368         regs = td->td_frame;
  369         oonstack = sigonstack(regs->tf_rsp);
  370 
  371         if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
  372                 xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
  373                 xfpusave = __builtin_alloca(xfpusave_len);
  374         } else {
  375                 xfpusave_len = 0;
  376                 xfpusave = NULL;
  377         }
  378 
  379         /* Save user context. */
  380         bzero(&sf, sizeof(sf));
  381         sf.sf_uc.uc_sigmask = *mask;
  382         sf.sf_uc.uc_stack = td->td_sigstk;
  383         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  384             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  385         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  386         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
  387         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  388         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  389         fpstate_drop(td);
  390         update_pcb_bases(pcb);
  391         sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
  392         sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
  393         bzero(sf.sf_uc.uc_mcontext.mc_spare,
  394             sizeof(sf.sf_uc.uc_mcontext.mc_spare));
  395         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  396 
  397         /* Allocate space for the signal handler context. */
  398         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  399             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  400                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  401 #if defined(COMPAT_43)
  402                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  403 #endif
  404         } else
  405                 sp = (char *)regs->tf_rsp - 128;
  406         if (xfpusave != NULL) {
  407                 sp -= xfpusave_len;
  408                 sp = (char *)((unsigned long)sp & ~0x3Ful);
  409                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  410         }
  411         sp -= sizeof(struct sigframe);
  412         /* Align to 16 bytes. */
  413         sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
  414 
  415         /* Build the argument list for the signal handler. */
  416         regs->tf_rdi = sig;                     /* arg 1 in %rdi */
  417         regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */
  418         bzero(&sf.sf_si, sizeof(sf.sf_si));
  419         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  420                 /* Signal handler installed with SA_SIGINFO. */
  421                 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */
  422                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  423 
  424                 /* Fill in POSIX parts */
  425                 sf.sf_si = ksi->ksi_info;
  426                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  427                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  428         } else {
  429                 /* Old FreeBSD-style arguments. */
  430                 regs->tf_rsi = ksi->ksi_code;   /* arg 2 in %rsi */
  431                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  432                 sf.sf_ahu.sf_handler = catcher;
  433         }
  434         mtx_unlock(&psp->ps_mtx);
  435         PROC_UNLOCK(p);
  436 
  437         /*
  438          * Copy the sigframe out to the user's stack.
  439          */
  440         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  441             (xfpusave != NULL && copyout(xfpusave,
  442             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  443             != 0)) {
  444 #ifdef DEBUG
  445                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  446 #endif
  447                 PROC_LOCK(p);
  448                 sigexit(td, SIGILL);
  449         }
  450 
  451         regs->tf_rsp = (long)sfp;
  452         regs->tf_rip = p->p_sysent->sv_sigcode_base;
  453         regs->tf_rflags &= ~(PSL_T | PSL_D);
  454         regs->tf_cs = _ucodesel;
  455         regs->tf_ds = _udatasel;
  456         regs->tf_ss = _udatasel;
  457         regs->tf_es = _udatasel;
  458         regs->tf_fs = _ufssel;
  459         regs->tf_gs = _ugssel;
  460         regs->tf_flags = TF_HASSEGS;
  461         PROC_LOCK(p);
  462         mtx_lock(&psp->ps_mtx);
  463 }
  464 
  465 /*
  466  * System call to cleanup state after a signal
  467  * has been taken.  Reset signal mask and
  468  * stack state from context left by sendsig (above).
  469  * Return to previous pc and psl as specified by
  470  * context left by sendsig. Check carefully to
  471  * make sure that the user has not modified the
  472  * state to gain improper privileges.
  473  *
  474  * MPSAFE
  475  */
  476 int
  477 sys_sigreturn(td, uap)
  478         struct thread *td;
  479         struct sigreturn_args /* {
  480                 const struct __ucontext *sigcntxp;
  481         } */ *uap;
  482 {
  483         ucontext_t uc;
  484         struct pcb *pcb;
  485         struct proc *p;
  486         struct trapframe *regs;
  487         ucontext_t *ucp;
  488         char *xfpustate;
  489         size_t xfpustate_len;
  490         long rflags;
  491         int cs, error, ret;
  492         ksiginfo_t ksi;
  493 
  494         pcb = td->td_pcb;
  495         p = td->td_proc;
  496 
  497         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  498         if (error != 0) {
  499                 uprintf("pid %d (%s): sigreturn copyin failed\n",
  500                     p->p_pid, td->td_name);
  501                 return (error);
  502         }
  503         ucp = &uc;
  504         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
  505                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
  506                     td->td_name, ucp->uc_mcontext.mc_flags);
  507                 return (EINVAL);
  508         }
  509         regs = td->td_frame;
  510         rflags = ucp->uc_mcontext.mc_rflags;
  511         /*
  512          * Don't allow users to change privileged or reserved flags.
  513          */
  514         if (!EFL_SECURE(rflags, regs->tf_rflags)) {
  515                 uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
  516                     td->td_name, rflags);
  517                 return (EINVAL);
  518         }
  519 
  520         /*
  521          * Don't allow users to load a valid privileged %cs.  Let the
  522          * hardware check for invalid selectors, excess privilege in
  523          * other selectors, invalid %eip's and invalid %esp's.
  524          */
  525         cs = ucp->uc_mcontext.mc_cs;
  526         if (!CS_SECURE(cs)) {
  527                 uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
  528                     td->td_name, cs);
  529                 ksiginfo_init_trap(&ksi);
  530                 ksi.ksi_signo = SIGBUS;
  531                 ksi.ksi_code = BUS_OBJERR;
  532                 ksi.ksi_trapno = T_PROTFLT;
  533                 ksi.ksi_addr = (void *)regs->tf_rip;
  534                 trapsignal(td, &ksi);
  535                 return (EINVAL);
  536         }
  537 
  538         if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
  539                 xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
  540                 if (xfpustate_len > cpu_max_ext_state_size -
  541                     sizeof(struct savefpu)) {
  542                         uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
  543                             p->p_pid, td->td_name, xfpustate_len);
  544                         return (EINVAL);
  545                 }
  546                 xfpustate = __builtin_alloca(xfpustate_len);
  547                 error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
  548                     xfpustate, xfpustate_len);
  549                 if (error != 0) {
  550                         uprintf(
  551         "pid %d (%s): sigreturn copying xfpustate failed\n",
  552                             p->p_pid, td->td_name);
  553                         return (error);
  554                 }
  555         } else {
  556                 xfpustate = NULL;
  557                 xfpustate_len = 0;
  558         }
  559         ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
  560         if (ret != 0) {
  561                 uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
  562                     p->p_pid, td->td_name, ret);
  563                 return (ret);
  564         }
  565         bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
  566         update_pcb_bases(pcb);
  567         pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
  568         pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
  569 
  570 #if defined(COMPAT_43)
  571         if (ucp->uc_mcontext.mc_onstack & 1)
  572                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  573         else
  574                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  575 #endif
  576 
  577         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  578         return (EJUSTRETURN);
  579 }
  580 
  581 #ifdef COMPAT_FREEBSD4
  582 int
  583 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
  584 {
  585  
  586         return sys_sigreturn(td, (struct sigreturn_args *)uap);
  587 }
  588 #endif
  589 
  590 /*
  591  * Reset registers to default values on exec.
  592  */
  593 void
  594 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
  595 {
  596         struct trapframe *regs;
  597         struct pcb *pcb;
  598         register_t saved_rflags;
  599 
  600         regs = td->td_frame;
  601         pcb = td->td_pcb;
  602 
  603         if (td->td_proc->p_md.md_ldt != NULL)
  604                 user_ldt_free(td);
  605 
  606         update_pcb_bases(pcb);
  607         pcb->pcb_fsbase = 0;
  608         pcb->pcb_gsbase = 0;
  609         clear_pcb_flags(pcb, PCB_32BIT);
  610         pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
  611 
  612         saved_rflags = regs->tf_rflags & PSL_T;
  613         bzero((char *)regs, sizeof(struct trapframe));
  614         regs->tf_rip = imgp->entry_addr;
  615         regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
  616         regs->tf_rdi = stack;           /* argv */
  617         regs->tf_rflags = PSL_USER | saved_rflags;
  618         regs->tf_ss = _udatasel;
  619         regs->tf_cs = _ucodesel;
  620         regs->tf_ds = _udatasel;
  621         regs->tf_es = _udatasel;
  622         regs->tf_fs = _ufssel;
  623         regs->tf_gs = _ugssel;
  624         regs->tf_flags = TF_HASSEGS;
  625 
  626         /*
  627          * Reset the hardware debug registers if they were in use.
  628          * They won't have any meaning for the newly exec'd process.
  629          */
  630         if (pcb->pcb_flags & PCB_DBREGS) {
  631                 pcb->pcb_dr0 = 0;
  632                 pcb->pcb_dr1 = 0;
  633                 pcb->pcb_dr2 = 0;
  634                 pcb->pcb_dr3 = 0;
  635                 pcb->pcb_dr6 = 0;
  636                 pcb->pcb_dr7 = 0;
  637                 if (pcb == curpcb) {
  638                         /*
  639                          * Clear the debug registers on the running
  640                          * CPU, otherwise they will end up affecting
  641                          * the next process we switch to.
  642                          */
  643                         reset_dbregs();
  644                 }
  645                 clear_pcb_flags(pcb, PCB_DBREGS);
  646         }
  647 
  648         /*
  649          * Drop the FP state if we hold it, so that the process gets a
  650          * clean FP state if it uses the FPU again.
  651          */
  652         fpstate_drop(td);
  653 }
  654 
  655 void
  656 cpu_setregs(void)
  657 {
  658         register_t cr0;
  659 
  660         cr0 = rcr0();
  661         /*
  662          * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
  663          * BSP.  See the comments there about why we set them.
  664          */
  665         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
  666         load_cr0(cr0);
  667 }
  668 
  669 /*
  670  * Initialize amd64 and configure to run kernel
  671  */
  672 
  673 /*
  674  * Initialize segments & interrupt table
  675  */
  676 
  677 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */
  678 static struct gate_descriptor idt0[NIDT];
  679 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
  680 
  681 static char dblfault_stack[PAGE_SIZE] __aligned(16);
  682 static char mce0_stack[PAGE_SIZE] __aligned(16);
  683 static char nmi0_stack[PAGE_SIZE] __aligned(16);
  684 static char dbg0_stack[PAGE_SIZE] __aligned(16);
  685 CTASSERT(sizeof(struct nmi_pcpu) == 16);
  686 
  687 struct amd64tss common_tss[MAXCPU];
  688 
  689 /*
  690  * Software prototypes -- in more palatable form.
  691  *
  692  * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same
  693  * slots as corresponding segments for i386 kernel.
  694  */
  695 struct soft_segment_descriptor gdt_segs[] = {
  696 /* GNULL_SEL    0 Null Descriptor */
  697 {       .ssd_base = 0x0,
  698         .ssd_limit = 0x0,
  699         .ssd_type = 0,
  700         .ssd_dpl = 0,
  701         .ssd_p = 0,
  702         .ssd_long = 0,
  703         .ssd_def32 = 0,
  704         .ssd_gran = 0           },
  705 /* GNULL2_SEL   1 Null Descriptor */
  706 {       .ssd_base = 0x0,
  707         .ssd_limit = 0x0,
  708         .ssd_type = 0,
  709         .ssd_dpl = 0,
  710         .ssd_p = 0,
  711         .ssd_long = 0,
  712         .ssd_def32 = 0,
  713         .ssd_gran = 0           },
  714 /* GUFS32_SEL   2 32 bit %gs Descriptor for user */
  715 {       .ssd_base = 0x0,
  716         .ssd_limit = 0xfffff,
  717         .ssd_type = SDT_MEMRWA,
  718         .ssd_dpl = SEL_UPL,
  719         .ssd_p = 1,
  720         .ssd_long = 0,
  721         .ssd_def32 = 1,
  722         .ssd_gran = 1           },
  723 /* GUGS32_SEL   3 32 bit %fs Descriptor for user */
  724 {       .ssd_base = 0x0,
  725         .ssd_limit = 0xfffff,
  726         .ssd_type = SDT_MEMRWA,
  727         .ssd_dpl = SEL_UPL,
  728         .ssd_p = 1,
  729         .ssd_long = 0,
  730         .ssd_def32 = 1,
  731         .ssd_gran = 1           },
  732 /* GCODE_SEL    4 Code Descriptor for kernel */
  733 {       .ssd_base = 0x0,
  734         .ssd_limit = 0xfffff,
  735         .ssd_type = SDT_MEMERA,
  736         .ssd_dpl = SEL_KPL,
  737         .ssd_p = 1,
  738         .ssd_long = 1,
  739         .ssd_def32 = 0,
  740         .ssd_gran = 1           },
  741 /* GDATA_SEL    5 Data Descriptor for kernel */
  742 {       .ssd_base = 0x0,
  743         .ssd_limit = 0xfffff,
  744         .ssd_type = SDT_MEMRWA,
  745         .ssd_dpl = SEL_KPL,
  746         .ssd_p = 1,
  747         .ssd_long = 1,
  748         .ssd_def32 = 0,
  749         .ssd_gran = 1           },
  750 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */
  751 {       .ssd_base = 0x0,
  752         .ssd_limit = 0xfffff,
  753         .ssd_type = SDT_MEMERA,
  754         .ssd_dpl = SEL_UPL,
  755         .ssd_p = 1,
  756         .ssd_long = 0,
  757         .ssd_def32 = 1,
  758         .ssd_gran = 1           },
  759 /* GUDATA_SEL   7 32/64 bit Data Descriptor for user */
  760 {       .ssd_base = 0x0,
  761         .ssd_limit = 0xfffff,
  762         .ssd_type = SDT_MEMRWA,
  763         .ssd_dpl = SEL_UPL,
  764         .ssd_p = 1,
  765         .ssd_long = 0,
  766         .ssd_def32 = 1,
  767         .ssd_gran = 1           },
  768 /* GUCODE_SEL   8 64 bit Code Descriptor for user */
  769 {       .ssd_base = 0x0,
  770         .ssd_limit = 0xfffff,
  771         .ssd_type = SDT_MEMERA,
  772         .ssd_dpl = SEL_UPL,
  773         .ssd_p = 1,
  774         .ssd_long = 1,
  775         .ssd_def32 = 0,
  776         .ssd_gran = 1           },
  777 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
  778 {       .ssd_base = 0x0,
  779         .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1,
  780         .ssd_type = SDT_SYSTSS,
  781         .ssd_dpl = SEL_KPL,
  782         .ssd_p = 1,
  783         .ssd_long = 0,
  784         .ssd_def32 = 0,
  785         .ssd_gran = 0           },
  786 /* Actually, the TSS is a system descriptor which is double size */
  787 {       .ssd_base = 0x0,
  788         .ssd_limit = 0x0,
  789         .ssd_type = 0,
  790         .ssd_dpl = 0,
  791         .ssd_p = 0,
  792         .ssd_long = 0,
  793         .ssd_def32 = 0,
  794         .ssd_gran = 0           },
  795 /* GUSERLDT_SEL 11 LDT Descriptor */
  796 {       .ssd_base = 0x0,
  797         .ssd_limit = 0x0,
  798         .ssd_type = 0,
  799         .ssd_dpl = 0,
  800         .ssd_p = 0,
  801         .ssd_long = 0,
  802         .ssd_def32 = 0,
  803         .ssd_gran = 0           },
  804 /* GUSERLDT_SEL 12 LDT Descriptor, double size */
  805 {       .ssd_base = 0x0,
  806         .ssd_limit = 0x0,
  807         .ssd_type = 0,
  808         .ssd_dpl = 0,
  809         .ssd_p = 0,
  810         .ssd_long = 0,
  811         .ssd_def32 = 0,
  812         .ssd_gran = 0           },
  813 };
  814 
  815 void
  816 setidt(int idx, inthand_t *func, int typ, int dpl, int ist)
  817 {
  818         struct gate_descriptor *ip;
  819 
  820         ip = idt + idx;
  821         ip->gd_looffset = (uintptr_t)func;
  822         ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
  823         ip->gd_ist = ist;
  824         ip->gd_xx = 0;
  825         ip->gd_type = typ;
  826         ip->gd_dpl = dpl;
  827         ip->gd_p = 1;
  828         ip->gd_hioffset = ((uintptr_t)func)>>16 ;
  829 }
  830 
  831 extern inthand_t
  832         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
  833         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
  834         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
  835         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
  836         IDTVEC(xmm), IDTVEC(dblfault),
  837         IDTVEC(div_pti), IDTVEC(bpt_pti),
  838         IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
  839         IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
  840         IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti),
  841         IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti),
  842         IDTVEC(xmm_pti),
  843 #ifdef KDTRACE_HOOKS
  844         IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti),
  845 #endif
  846 #ifdef XENHVM
  847         IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti),
  848 #endif
  849         IDTVEC(fast_syscall), IDTVEC(fast_syscall32),
  850         IDTVEC(fast_syscall_pti);
  851 
  852 #ifdef DDB
  853 /*
  854  * Display the index and function name of any IDT entries that don't use
  855  * the default 'rsvd' entry point.
  856  */
  857 DB_SHOW_COMMAND(idt, db_show_idt)
  858 {
  859         struct gate_descriptor *ip;
  860         int idx;
  861         uintptr_t func;
  862 
  863         ip = idt;
  864         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
  865                 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
  866                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
  867                         db_printf("%3d\t", idx);
  868                         db_printsym(func, DB_STGY_PROC);
  869                         db_printf("\n");
  870                 }
  871                 ip++;
  872         }
  873 }
  874 
  875 /* Show privileged registers. */
  876 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
  877 {
  878         struct {
  879                 uint16_t limit;
  880                 uint64_t base;
  881         } __packed idtr, gdtr;
  882         uint16_t ldt, tr;
  883 
  884         __asm __volatile("sidt %0" : "=m" (idtr));
  885         db_printf("idtr\t0x%016lx/%04x\n",
  886             (u_long)idtr.base, (u_int)idtr.limit);
  887         __asm __volatile("sgdt %0" : "=m" (gdtr));
  888         db_printf("gdtr\t0x%016lx/%04x\n",
  889             (u_long)gdtr.base, (u_int)gdtr.limit);
  890         __asm __volatile("sldt %0" : "=r" (ldt));
  891         db_printf("ldtr\t0x%04x\n", ldt);
  892         __asm __volatile("str %0" : "=r" (tr));
  893         db_printf("tr\t0x%04x\n", tr);
  894         db_printf("cr0\t0x%016lx\n", rcr0());
  895         db_printf("cr2\t0x%016lx\n", rcr2());
  896         db_printf("cr3\t0x%016lx\n", rcr3());
  897         db_printf("cr4\t0x%016lx\n", rcr4());
  898         if (rcr4() & CR4_XSAVE)
  899                 db_printf("xcr0\t0x%016lx\n", rxcr(0));
  900         db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER));
  901         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
  902                 db_printf("FEATURES_CTL\t%016lx\n",
  903                     rdmsr(MSR_IA32_FEATURE_CONTROL));
  904         db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR));
  905         db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT));
  906         db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE));
  907 }
  908 
  909 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
  910 {
  911 
  912         db_printf("dr0\t0x%016lx\n", rdr0());
  913         db_printf("dr1\t0x%016lx\n", rdr1());
  914         db_printf("dr2\t0x%016lx\n", rdr2());
  915         db_printf("dr3\t0x%016lx\n", rdr3());
  916         db_printf("dr6\t0x%016lx\n", rdr6());
  917         db_printf("dr7\t0x%016lx\n", rdr7());   
  918 }
  919 #endif
  920 
  921 void
  922 sdtossd(sd, ssd)
  923         struct user_segment_descriptor *sd;
  924         struct soft_segment_descriptor *ssd;
  925 {
  926 
  927         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
  928         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
  929         ssd->ssd_type  = sd->sd_type;
  930         ssd->ssd_dpl   = sd->sd_dpl;
  931         ssd->ssd_p     = sd->sd_p;
  932         ssd->ssd_long  = sd->sd_long;
  933         ssd->ssd_def32 = sd->sd_def32;
  934         ssd->ssd_gran  = sd->sd_gran;
  935 }
  936 
  937 void
  938 ssdtosd(ssd, sd)
  939         struct soft_segment_descriptor *ssd;
  940         struct user_segment_descriptor *sd;
  941 {
  942 
  943         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  944         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
  945         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  946         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  947         sd->sd_type  = ssd->ssd_type;
  948         sd->sd_dpl   = ssd->ssd_dpl;
  949         sd->sd_p     = ssd->ssd_p;
  950         sd->sd_long  = ssd->ssd_long;
  951         sd->sd_def32 = ssd->ssd_def32;
  952         sd->sd_gran  = ssd->ssd_gran;
  953 }
  954 
  955 void
  956 ssdtosyssd(ssd, sd)
  957         struct soft_segment_descriptor *ssd;
  958         struct system_segment_descriptor *sd;
  959 {
  960 
  961         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  962         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
  963         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  964         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  965         sd->sd_type  = ssd->ssd_type;
  966         sd->sd_dpl   = ssd->ssd_dpl;
  967         sd->sd_p     = ssd->ssd_p;
  968         sd->sd_gran  = ssd->ssd_gran;
  969 }
  970 
  971 #if !defined(DEV_ATPIC) && defined(DEV_ISA)
  972 #include <isa/isavar.h>
  973 #include <isa/isareg.h>
  974 /*
  975  * Return a bitmap of the current interrupt requests.  This is 8259-specific
  976  * and is only suitable for use at probe time.
  977  * This is only here to pacify sio.  It is NOT FATAL if this doesn't work.
  978  * It shouldn't be here.  There should probably be an APIC centric
  979  * implementation in the apic driver code, if at all.
  980  */
  981 intrmask_t
  982 isa_irq_pending(void)
  983 {
  984         u_char irr1;
  985         u_char irr2;
  986 
  987         irr1 = inb(IO_ICU1);
  988         irr2 = inb(IO_ICU2);
  989         return ((irr2 << 8) | irr1);
  990 }
  991 #endif
  992 
  993 u_int basemem;
  994 
  995 static int
  996 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
  997     int *physmap_idxp)
  998 {
  999         int i, insert_idx, physmap_idx;
 1000 
 1001         physmap_idx = *physmap_idxp;
 1002 
 1003         if (length == 0)
 1004                 return (1);
 1005 
 1006         /*
 1007          * Find insertion point while checking for overlap.  Start off by
 1008          * assuming the new entry will be added to the end.
 1009          *
 1010          * NB: physmap_idx points to the next free slot.
 1011          */
 1012         insert_idx = physmap_idx;
 1013         for (i = 0; i <= physmap_idx; i += 2) {
 1014                 if (base < physmap[i + 1]) {
 1015                         if (base + length <= physmap[i]) {
 1016                                 insert_idx = i;
 1017                                 break;
 1018                         }
 1019                         if (boothowto & RB_VERBOSE)
 1020                                 printf(
 1021                     "Overlapping memory regions, ignoring second region\n");
 1022                         return (1);
 1023                 }
 1024         }
 1025 
 1026         /* See if we can prepend to the next entry. */
 1027         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1028                 physmap[insert_idx] = base;
 1029                 return (1);
 1030         }
 1031 
 1032         /* See if we can append to the previous entry. */
 1033         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1034                 physmap[insert_idx - 1] += length;
 1035                 return (1);
 1036         }
 1037 
 1038         physmap_idx += 2;
 1039         *physmap_idxp = physmap_idx;
 1040         if (physmap_idx == PHYSMAP_SIZE) {
 1041                 printf(
 1042                 "Too many segments in the physical address map, giving up\n");
 1043                 return (0);
 1044         }
 1045 
 1046         /*
 1047          * Move the last 'N' entries down to make room for the new
 1048          * entry if needed.
 1049          */
 1050         for (i = (physmap_idx - 2); i > insert_idx; i -= 2) {
 1051                 physmap[i] = physmap[i - 2];
 1052                 physmap[i + 1] = physmap[i - 1];
 1053         }
 1054 
 1055         /* Insert the new entry. */
 1056         physmap[insert_idx] = base;
 1057         physmap[insert_idx + 1] = base + length;
 1058         return (1);
 1059 }
 1060 
 1061 void
 1062 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize,
 1063                       vm_paddr_t *physmap, int *physmap_idx)
 1064 {
 1065         struct bios_smap *smap, *smapend;
 1066 
 1067         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1068 
 1069         for (smap = smapbase; smap < smapend; smap++) {
 1070                 if (boothowto & RB_VERBOSE)
 1071                         printf("SMAP type=%02x base=%016lx len=%016lx\n",
 1072                             smap->type, smap->base, smap->length);
 1073 
 1074                 if (smap->type != SMAP_TYPE_MEMORY)
 1075                         continue;
 1076 
 1077                 if (!add_physmap_entry(smap->base, smap->length, physmap,
 1078                     physmap_idx))
 1079                         break;
 1080         }
 1081 }
 1082 
 1083 static void
 1084 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
 1085     int *physmap_idx)
 1086 {
 1087         struct efi_md *map, *p;
 1088         const char *type;
 1089         size_t efisz;
 1090         int ndesc, i;
 1091 
 1092         static const char *types[] = {
 1093                 "Reserved",
 1094                 "LoaderCode",
 1095                 "LoaderData",
 1096                 "BootServicesCode",
 1097                 "BootServicesData",
 1098                 "RuntimeServicesCode",
 1099                 "RuntimeServicesData",
 1100                 "ConventionalMemory",
 1101                 "UnusableMemory",
 1102                 "ACPIReclaimMemory",
 1103                 "ACPIMemoryNVS",
 1104                 "MemoryMappedIO",
 1105                 "MemoryMappedIOPortSpace",
 1106                 "PalCode",
 1107                 "PersistentMemory"
 1108         };
 1109 
 1110         /*
 1111          * Memory map data provided by UEFI via the GetMemoryMap
 1112          * Boot Services API.
 1113          */
 1114         efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 1115         map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 1116 
 1117         if (efihdr->descriptor_size == 0)
 1118                 return;
 1119         ndesc = efihdr->memory_size / efihdr->descriptor_size;
 1120 
 1121         if (boothowto & RB_VERBOSE)
 1122                 printf("%23s %12s %12s %8s %4s\n",
 1123                     "Type", "Physical", "Virtual", "#Pages", "Attr");
 1124 
 1125         for (i = 0, p = map; i < ndesc; i++,
 1126             p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 1127                 if (boothowto & RB_VERBOSE) {
 1128                         if (p->md_type < nitems(types))
 1129                                 type = types[p->md_type];
 1130                         else
 1131                                 type = "<INVALID>";
 1132                         printf("%23s %012lx %12p %08lx ", type, p->md_phys,
 1133                             p->md_virt, p->md_pages);
 1134                         if (p->md_attr & EFI_MD_ATTR_UC)
 1135                                 printf("UC ");
 1136                         if (p->md_attr & EFI_MD_ATTR_WC)
 1137                                 printf("WC ");
 1138                         if (p->md_attr & EFI_MD_ATTR_WT)
 1139                                 printf("WT ");
 1140                         if (p->md_attr & EFI_MD_ATTR_WB)
 1141                                 printf("WB ");
 1142                         if (p->md_attr & EFI_MD_ATTR_UCE)
 1143                                 printf("UCE ");
 1144                         if (p->md_attr & EFI_MD_ATTR_WP)
 1145                                 printf("WP ");
 1146                         if (p->md_attr & EFI_MD_ATTR_RP)
 1147                                 printf("RP ");
 1148                         if (p->md_attr & EFI_MD_ATTR_XP)
 1149                                 printf("XP ");
 1150                         if (p->md_attr & EFI_MD_ATTR_NV)
 1151                                 printf("NV ");
 1152                         if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 1153                                 printf("MORE_RELIABLE ");
 1154                         if (p->md_attr & EFI_MD_ATTR_RO)
 1155                                 printf("RO ");
 1156                         if (p->md_attr & EFI_MD_ATTR_RT)
 1157                                 printf("RUNTIME");
 1158                         printf("\n");
 1159                 }
 1160 
 1161                 switch (p->md_type) {
 1162                 case EFI_MD_TYPE_CODE:
 1163                 case EFI_MD_TYPE_DATA:
 1164                 case EFI_MD_TYPE_BS_CODE:
 1165                 case EFI_MD_TYPE_BS_DATA:
 1166                 case EFI_MD_TYPE_FREE:
 1167                         /*
 1168                          * We're allowed to use any entry with these types.
 1169                          */
 1170                         break;
 1171                 default:
 1172                         continue;
 1173                 }
 1174 
 1175                 if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE),
 1176                     physmap, physmap_idx))
 1177                         break;
 1178         }
 1179 }
 1180 
 1181 static char bootmethod[16] = "";
 1182 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1183     "System firmware boot method");
 1184 
 1185 static void
 1186 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx)
 1187 {
 1188         struct bios_smap *smap;
 1189         struct efi_map_header *efihdr;
 1190         u_int32_t size;
 1191 
 1192         /*
 1193          * Memory map from INT 15:E820.
 1194          *
 1195          * subr_module.c says:
 1196          * "Consumer may safely assume that size value precedes data."
 1197          * ie: an int32_t immediately precedes smap.
 1198          */
 1199 
 1200         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1201             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1202         smap = (struct bios_smap *)preload_search_info(kmdp,
 1203             MODINFO_METADATA | MODINFOMD_SMAP);
 1204         if (efihdr == NULL && smap == NULL)
 1205                 panic("No BIOS smap or EFI map info from loader!");
 1206 
 1207         if (efihdr != NULL) {
 1208                 add_efi_map_entries(efihdr, physmap, physmap_idx);
 1209                 strlcpy(bootmethod, "UEFI", sizeof(bootmethod));
 1210         } else {
 1211                 size = *((u_int32_t *)smap - 1);
 1212                 bios_add_smap_entries(smap, size, physmap, physmap_idx);
 1213                 strlcpy(bootmethod, "BIOS", sizeof(bootmethod));
 1214         }
 1215 }
 1216 
 1217 #define PAGES_PER_GB    (1024 * 1024 * 1024 / PAGE_SIZE)
 1218 
 1219 /*
 1220  * Populate the (physmap) array with base/bound pairs describing the
 1221  * available physical memory in the system, then test this memory and
 1222  * build the phys_avail array describing the actually-available memory.
 1223  *
 1224  * Total memory size may be set by the kernel environment variable
 1225  * hw.physmem or the compile-time define MAXMEM.
 1226  *
 1227  * XXX first should be vm_paddr_t.
 1228  */
 1229 static void
 1230 getmemsize(caddr_t kmdp, u_int64_t first)
 1231 {
 1232         int i, physmap_idx, pa_indx, da_indx;
 1233         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1234         u_long physmem_start, physmem_tunable, memtest;
 1235         pt_entry_t *pte;
 1236         quad_t dcons_addr, dcons_size;
 1237         int page_counter;
 1238 
 1239         /*
 1240          * Tell the physical memory allocator about pages used to store
 1241          * the kernel and preloaded data.  See kmem_bootstrap_free().
 1242          */
 1243         vm_phys_add_seg((vm_paddr_t)kernphys, trunc_page(first));
 1244 
 1245         bzero(physmap, sizeof(physmap));
 1246         physmap_idx = 0;
 1247 
 1248         init_ops.parse_memmap(kmdp, physmap, &physmap_idx);
 1249         physmap_idx -= 2;
 1250 
 1251         /*
 1252          * Find the 'base memory' segment for SMP
 1253          */
 1254         basemem = 0;
 1255         for (i = 0; i <= physmap_idx; i += 2) {
 1256                 if (physmap[i] <= 0xA0000) {
 1257                         basemem = physmap[i + 1] / 1024;
 1258                         break;
 1259                 }
 1260         }
 1261         if (basemem == 0 || basemem > 640) {
 1262                 if (bootverbose)
 1263                         printf(
 1264                 "Memory map doesn't contain a basemem segment, faking it");
 1265                 basemem = 640;
 1266         }
 1267 
 1268         /*
 1269          * Maxmem isn't the "maximum memory", it's one larger than the
 1270          * highest page of the physical address space.  It should be
 1271          * called something like "Maxphyspage".  We may adjust this
 1272          * based on ``hw.physmem'' and the results of the memory test.
 1273          */
 1274         Maxmem = atop(physmap[physmap_idx + 1]);
 1275 
 1276 #ifdef MAXMEM
 1277         Maxmem = MAXMEM / 4;
 1278 #endif
 1279 
 1280         if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 1281                 Maxmem = atop(physmem_tunable);
 1282 
 1283         /*
 1284          * The boot memory test is disabled by default, as it takes a
 1285          * significant amount of time on large-memory systems, and is
 1286          * unfriendly to virtual machines as it unnecessarily touches all
 1287          * pages.
 1288          *
 1289          * A general name is used as the code may be extended to support
 1290          * additional tests beyond the current "page present" test.
 1291          */
 1292         memtest = 0;
 1293         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 1294 
 1295         /*
 1296          * Don't allow MAXMEM or hw.physmem to extend the amount of memory
 1297          * in the system.
 1298          */
 1299         if (Maxmem > atop(physmap[physmap_idx + 1]))
 1300                 Maxmem = atop(physmap[physmap_idx + 1]);
 1301 
 1302         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1303             (boothowto & RB_VERBOSE))
 1304                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1305 
 1306         /*
 1307          * Make hole for "AP -> long mode" bootstrap code.  The
 1308          * mp_bootaddress vector is only available when the kernel
 1309          * is configured to support APs and APs for the system start
 1310          * in real mode mode (e.g. SMP bare metal).
 1311          */
 1312         if (init_ops.mp_bootaddress)
 1313                 init_ops.mp_bootaddress(physmap, &physmap_idx);
 1314 
 1315         /* call pmap initialization to make new kernel address space */
 1316         pmap_bootstrap(&first);
 1317 
 1318         /*
 1319          * Size up each available chunk of physical memory.
 1320          *
 1321          * XXX Some BIOSes corrupt low 64KB between suspend and resume.
 1322          * By default, mask off the first 16 pages unless we appear to be
 1323          * running in a VM.
 1324          */
 1325         physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT;
 1326         TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start);
 1327         if (physmap[0] < physmem_start) {
 1328                 if (physmem_start < PAGE_SIZE)
 1329                         physmap[0] = PAGE_SIZE;
 1330                 else if (physmem_start >= physmap[1])
 1331                         physmap[0] = round_page(physmap[1] - PAGE_SIZE);
 1332                 else
 1333                         physmap[0] = round_page(physmem_start);
 1334         }
 1335         pa_indx = 0;
 1336         da_indx = 1;
 1337         phys_avail[pa_indx++] = physmap[0];
 1338         phys_avail[pa_indx] = physmap[0];
 1339         dump_avail[da_indx] = physmap[0];
 1340         pte = CMAP1;
 1341 
 1342         /*
 1343          * Get dcons buffer address
 1344          */
 1345         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 1346             getenv_quad("dcons.size", &dcons_size) == 0)
 1347                 dcons_addr = 0;
 1348 
 1349         /*
 1350          * physmap is in bytes, so when converting to page boundaries,
 1351          * round up the start address and round down the end address.
 1352          */
 1353         page_counter = 0;
 1354         if (memtest != 0)
 1355                 printf("Testing system memory");
 1356         for (i = 0; i <= physmap_idx; i += 2) {
 1357                 vm_paddr_t end;
 1358 
 1359                 end = ptoa((vm_paddr_t)Maxmem);
 1360                 if (physmap[i + 1] < end)
 1361                         end = trunc_page(physmap[i + 1]);
 1362                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1363                         int tmp, page_bad, full;
 1364                         int *ptr = (int *)CADDR1;
 1365 
 1366                         full = FALSE;
 1367                         /*
 1368                          * block out kernel memory as not available.
 1369                          */
 1370                         if (pa >= (vm_paddr_t)kernphys && pa < first)
 1371                                 goto do_dump_avail;
 1372 
 1373                         /*
 1374                          * block out dcons buffer
 1375                          */
 1376                         if (dcons_addr > 0
 1377                             && pa >= trunc_page(dcons_addr)
 1378                             && pa < dcons_addr + dcons_size)
 1379                                 goto do_dump_avail;
 1380 
 1381                         page_bad = FALSE;
 1382                         if (memtest == 0)
 1383                                 goto skip_memtest;
 1384 
 1385                         /*
 1386                          * Print a "." every GB to show we're making
 1387                          * progress.
 1388                          */
 1389                         page_counter++;
 1390                         if ((page_counter % PAGES_PER_GB) == 0)
 1391                                 printf(".");
 1392 
 1393                         /*
 1394                          * map page into kernel: valid, read/write,non-cacheable
 1395                          */
 1396                         *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
 1397                         invltlb();
 1398 
 1399                         tmp = *(int *)ptr;
 1400                         /*
 1401                          * Test for alternating 1's and 0's
 1402                          */
 1403                         *(volatile int *)ptr = 0xaaaaaaaa;
 1404                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 1405                                 page_bad = TRUE;
 1406                         /*
 1407                          * Test for alternating 0's and 1's
 1408                          */
 1409                         *(volatile int *)ptr = 0x55555555;
 1410                         if (*(volatile int *)ptr != 0x55555555)
 1411                                 page_bad = TRUE;
 1412                         /*
 1413                          * Test for all 1's
 1414                          */
 1415                         *(volatile int *)ptr = 0xffffffff;
 1416                         if (*(volatile int *)ptr != 0xffffffff)
 1417                                 page_bad = TRUE;
 1418                         /*
 1419                          * Test for all 0's
 1420                          */
 1421                         *(volatile int *)ptr = 0x0;
 1422                         if (*(volatile int *)ptr != 0x0)
 1423                                 page_bad = TRUE;
 1424                         /*
 1425                          * Restore original value.
 1426                          */
 1427                         *(int *)ptr = tmp;
 1428 
 1429 skip_memtest:
 1430                         /*
 1431                          * Adjust array of valid/good pages.
 1432                          */
 1433                         if (page_bad == TRUE)
 1434                                 continue;
 1435                         /*
 1436                          * If this good page is a continuation of the
 1437                          * previous set of good pages, then just increase
 1438                          * the end pointer. Otherwise start a new chunk.
 1439                          * Note that "end" points one higher than end,
 1440                          * making the range >= start and < end.
 1441                          * If we're also doing a speculative memory
 1442                          * test and we at or past the end, bump up Maxmem
 1443                          * so that we keep going. The first bad page
 1444                          * will terminate the loop.
 1445                          */
 1446                         if (phys_avail[pa_indx] == pa) {
 1447                                 phys_avail[pa_indx] += PAGE_SIZE;
 1448                         } else {
 1449                                 pa_indx++;
 1450                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1451                                         printf(
 1452                 "Too many holes in the physical address space, giving up\n");
 1453                                         pa_indx--;
 1454                                         full = TRUE;
 1455                                         goto do_dump_avail;
 1456                                 }
 1457                                 phys_avail[pa_indx++] = pa;     /* start */
 1458                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 1459                         }
 1460                         physmem++;
 1461 do_dump_avail:
 1462                         if (dump_avail[da_indx] == pa) {
 1463                                 dump_avail[da_indx] += PAGE_SIZE;
 1464                         } else {
 1465                                 da_indx++;
 1466                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 1467                                         da_indx--;
 1468                                         goto do_next;
 1469                                 }
 1470                                 dump_avail[da_indx++] = pa; /* start */
 1471                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 1472                         }
 1473 do_next:
 1474                         if (full)
 1475                                 break;
 1476                 }
 1477         }
 1478         *pte = 0;
 1479         invltlb();
 1480         if (memtest != 0)
 1481                 printf("\n");
 1482 
 1483         /*
 1484          * XXX
 1485          * The last chunk must contain at least one page plus the message
 1486          * buffer to avoid complicating other code (message buffer address
 1487          * calculation, etc.).
 1488          */
 1489         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1490             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 1491                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1492                 phys_avail[pa_indx--] = 0;
 1493                 phys_avail[pa_indx--] = 0;
 1494         }
 1495 
 1496         Maxmem = atop(phys_avail[pa_indx]);
 1497 
 1498         /* Trim off space for the message buffer. */
 1499         phys_avail[pa_indx] -= round_page(msgbufsize);
 1500 
 1501         /* Map the message buffer. */
 1502         msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]);
 1503 }
 1504 
 1505 static caddr_t
 1506 native_parse_preload_data(u_int64_t modulep)
 1507 {
 1508         caddr_t kmdp;
 1509         char *envp;
 1510 #ifdef DDB
 1511         vm_offset_t ksym_start;
 1512         vm_offset_t ksym_end;
 1513 #endif
 1514 
 1515         preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
 1516         preload_bootstrap_relocate(KERNBASE);
 1517         kmdp = preload_search_by_type("elf kernel");
 1518         if (kmdp == NULL)
 1519                 kmdp = preload_search_by_type("elf64 kernel");
 1520         boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 1521         envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 1522         if (envp != NULL)
 1523                 envp += KERNBASE;
 1524         init_static_kenv(envp, 0);
 1525 #ifdef DDB
 1526         ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 1527         ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 1528         db_fetch_ksymtab(ksym_start, ksym_end);
 1529 #endif
 1530         efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
 1531 
 1532         return (kmdp);
 1533 }
 1534 
 1535 static void
 1536 amd64_kdb_init(void)
 1537 {
 1538         kdb_init();
 1539 #ifdef KDB
 1540         if (boothowto & RB_KDB)
 1541                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 1542 #endif
 1543 }
 1544 
 1545 /* Set up the fast syscall stuff */
 1546 void
 1547 amd64_conf_fast_syscall(void)
 1548 {
 1549         uint64_t msr;
 1550 
 1551         msr = rdmsr(MSR_EFER) | EFER_SCE;
 1552         wrmsr(MSR_EFER, msr);
 1553         wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) :
 1554             (u_int64_t)IDTVEC(fast_syscall));
 1555         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 1556         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 1557             ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 1558         wrmsr(MSR_STAR, msr);
 1559         wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
 1560 }
 1561 
 1562 u_int64_t
 1563 hammer_time(u_int64_t modulep, u_int64_t physfree)
 1564 {
 1565         caddr_t kmdp;
 1566         int gsel_tss, x;
 1567         struct pcpu *pc;
 1568         struct nmi_pcpu *np;
 1569         struct xstate_hdr *xhdr;
 1570         u_int64_t rsp0;
 1571         char *env;
 1572         size_t kstack0_sz;
 1573         int late_console;
 1574 
 1575         TSRAW(&thread0, TS_ENTER, __func__, NULL);
 1576 
 1577         kmdp = init_ops.parse_preload_data(modulep);
 1578 
 1579         physfree += ucode_load_bsp(physfree + KERNBASE);
 1580         physfree = roundup2(physfree, PAGE_SIZE);
 1581 
 1582         identify_cpu1();
 1583         identify_hypervisor();
 1584         identify_cpu_fixup_bsp();
 1585         identify_cpu2();
 1586         initializecpucache();
 1587 
 1588         /*
 1589          * Check for pti, pcid, and invpcid before ifuncs are
 1590          * resolved, to correctly select the implementation for
 1591          * pmap_activate_sw_mode().
 1592          */
 1593         pti = pti_get_default();
 1594         TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
 1595         TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 1596         if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
 1597                 invpcid_works = (cpu_stdext_feature &
 1598                     CPUID_STDEXT_INVPCID) != 0;
 1599         } else {
 1600                 pmap_pcid_enabled = 0;
 1601         }
 1602 
 1603         link_elf_ireloc(kmdp);
 1604 
 1605         /*
 1606          * This may be done better later if it gets more high level
 1607          * components in it. If so just link td->td_proc here.
 1608          */
 1609         proc_linkup0(&proc0, &thread0);
 1610 
 1611         /* Init basic tunables, hz etc */
 1612         init_param1();
 1613 
 1614         thread0.td_kstack = physfree + KERNBASE;
 1615         thread0.td_kstack_pages = kstack_pages;
 1616         kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 1617         bzero((void *)thread0.td_kstack, kstack0_sz);
 1618         physfree += kstack0_sz;
 1619 
 1620         /*
 1621          * make gdt memory segments
 1622          */
 1623         for (x = 0; x < NGDT; x++) {
 1624                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 1625                     x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
 1626                         ssdtosd(&gdt_segs[x], &gdt[x]);
 1627         }
 1628         gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
 1629         ssdtosyssd(&gdt_segs[GPROC0_SEL],
 1630             (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1631 
 1632         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 1633         r_gdt.rd_base =  (long) gdt;
 1634         lgdt(&r_gdt);
 1635         pc = &__pcpu[0];
 1636 
 1637         wrmsr(MSR_FSBASE, 0);           /* User value */
 1638         wrmsr(MSR_GSBASE, (u_int64_t)pc);
 1639         wrmsr(MSR_KGSBASE, 0);          /* User value while in the kernel */
 1640 
 1641         pcpu_init(pc, 0, sizeof(struct pcpu));
 1642         dpcpu_init((void *)(physfree + KERNBASE), 0);
 1643         physfree += DPCPU_SIZE;
 1644         PCPU_SET(prvspace, pc);
 1645         PCPU_SET(curthread, &thread0);
 1646         /* Non-late cninit() and printf() can be moved up to here. */
 1647         PCPU_SET(tssp, &common_tss[0]);
 1648         PCPU_SET(commontssp, &common_tss[0]);
 1649         PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1650         PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
 1651         PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
 1652         PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
 1653 
 1654         /*
 1655          * Initialize mutexes.
 1656          *
 1657          * icu_lock: in order to allow an interrupt to occur in a critical
 1658          *           section, to set pcpu->ipending (etc...) properly, we
 1659          *           must be able to get the icu lock, so it can't be
 1660          *           under witness.
 1661          */
 1662         mutex_init();
 1663         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 1664         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
 1665 
 1666         /* exceptions */
 1667         for (x = 0; x < NIDT; x++)
 1668                 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
 1669                     SEL_KPL, 0);
 1670         setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
 1671             SEL_KPL, 0);
 1672         setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
 1673         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
 1674         setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
 1675             SEL_UPL, 0);
 1676         setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT,
 1677             SEL_UPL, 0);
 1678         setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT,
 1679             SEL_KPL, 0);
 1680         setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT,
 1681             SEL_KPL, 0);
 1682         setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT,
 1683             SEL_KPL, 0);
 1684         setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
 1685         setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm),
 1686             SDT_SYSIGT, SEL_KPL, 0);
 1687         setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT,
 1688             SEL_KPL, 0);
 1689         setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing),
 1690             SDT_SYSIGT, SEL_KPL, 0);
 1691         setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT,
 1692             SEL_KPL, 0);
 1693         setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT,
 1694             SEL_KPL, 0);
 1695         setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT,
 1696             SEL_KPL, 0);
 1697         setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT,
 1698             SEL_KPL, 0);
 1699         setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT,
 1700             SEL_KPL, 0);
 1701         setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3);
 1702         setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT,
 1703             SEL_KPL, 0);
 1704 #ifdef KDTRACE_HOOKS
 1705         setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) :
 1706             &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
 1707 #endif
 1708 #ifdef XENHVM
 1709         setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) :
 1710             &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0);
 1711 #endif
 1712         r_idt.rd_limit = sizeof(idt0) - 1;
 1713         r_idt.rd_base = (long) idt;
 1714         lidt(&r_idt);
 1715 
 1716         /*
 1717          * Initialize the clock before the console so that console
 1718          * initialization can use DELAY().
 1719          */
 1720         clock_init();
 1721 
 1722         /*
 1723          * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4)
 1724          * transition).
 1725          * Once bootblocks have updated, we can test directly for
 1726          * efi_systbl != NULL here...
 1727          */
 1728         if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP)
 1729             != NULL)
 1730                 vty_set_preferred(VTY_VT);
 1731 
 1732         TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable);
 1733         TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable);
 1734         TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush",
 1735             &syscall_ret_l1d_flush_mode);
 1736 
 1737         finishidentcpu();       /* Final stage of CPU initialization */
 1738         initializecpu();        /* Initialize CPU registers */
 1739 
 1740         /* doublefault stack space, runs on ist1 */
 1741         common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
 1742 
 1743         /*
 1744          * NMI stack, runs on ist2.  The pcpu pointer is stored just
 1745          * above the start of the ist2 stack.
 1746          */
 1747         np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
 1748         np->np_pcpu = (register_t) pc;
 1749         common_tss[0].tss_ist2 = (long) np;
 1750 
 1751         /*
 1752          * MC# stack, runs on ist3.  The pcpu pointer is stored just
 1753          * above the start of the ist3 stack.
 1754          */
 1755         np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1;
 1756         np->np_pcpu = (register_t) pc;
 1757         common_tss[0].tss_ist3 = (long) np;
 1758 
 1759         /*
 1760          * DB# stack, runs on ist4.
 1761          */
 1762         np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1;
 1763         np->np_pcpu = (register_t) pc;
 1764         common_tss[0].tss_ist4 = (long) np;
 1765         
 1766         /* Set the IO permission bitmap (empty due to tss seg limit) */
 1767         common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE;
 1768 
 1769         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 1770         ltr(gsel_tss);
 1771 
 1772         amd64_conf_fast_syscall();
 1773 
 1774         /*
 1775          * Temporary forge some valid pointer to PCB, for exception
 1776          * handlers.  It is reinitialized properly below after FPU is
 1777          * set up.  Also set up td_critnest to short-cut the page
 1778          * fault handler.
 1779          */
 1780         cpu_max_ext_state_size = sizeof(struct savefpu);
 1781         thread0.td_pcb = get_pcb_td(&thread0);
 1782         thread0.td_critnest = 1;
 1783 
 1784         /*
 1785          * The console and kdb should be initialized even earlier than here,
 1786          * but some console drivers don't work until after getmemsize().
 1787          * Default to late console initialization to support these drivers.
 1788          * This loses mainly printf()s in getmemsize() and early debugging.
 1789          */
 1790         late_console = 1;
 1791         TUNABLE_INT_FETCH("debug.late_console", &late_console);
 1792         if (!late_console) {
 1793                 cninit();
 1794                 amd64_kdb_init();
 1795         }
 1796 
 1797         getmemsize(kmdp, physfree);
 1798         init_param2(physmem);
 1799 
 1800         /* now running on new page tables, configured,and u/iom is accessible */
 1801 
 1802 #ifdef DEV_PCI
 1803         /* This call might adjust phys_avail[]. */
 1804         pci_early_quirks();
 1805 #endif
 1806 
 1807         if (late_console)
 1808                 cninit();
 1809 
 1810 #ifdef DEV_ISA
 1811 #ifdef DEV_ATPIC
 1812         elcr_probe();
 1813         atpic_startup();
 1814 #else
 1815         /* Reset and mask the atpics and leave them shut down. */
 1816         atpic_reset();
 1817 
 1818         /*
 1819          * Point the ICU spurious interrupt vectors at the APIC spurious
 1820          * interrupt handler.
 1821          */
 1822         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1823         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1824 #endif
 1825 #else
 1826 #error "have you forgotten the isa device?";
 1827 #endif
 1828 
 1829         if (late_console)
 1830                 amd64_kdb_init();
 1831 
 1832         msgbufinit(msgbufp, msgbufsize);
 1833         fpuinit();
 1834 
 1835         /*
 1836          * Set up thread0 pcb after fpuinit calculated pcb + fpu save
 1837          * area size.  Zero out the extended state header in fpu save
 1838          * area.
 1839          */
 1840         thread0.td_pcb = get_pcb_td(&thread0);
 1841         thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 1842         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 1843         if (use_xsave) {
 1844                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 1845                     1);
 1846                 xhdr->xstate_bv = xsave_mask;
 1847         }
 1848         /* make an initial tss so cpu can get interrupt stack on syscall! */
 1849         rsp0 = (vm_offset_t)thread0.td_pcb;
 1850         /* Ensure the stack is aligned to 16 bytes */
 1851         rsp0 &= ~0xFul;
 1852         common_tss[0].tss_rsp0 = rsp0;
 1853         PCPU_SET(rsp0, rsp0);
 1854         PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) +
 1855             PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
 1856         PCPU_SET(curpcb, thread0.td_pcb);
 1857 
 1858         /* transfer to user mode */
 1859 
 1860         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 1861         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 1862         _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
 1863         _ufssel = GSEL(GUFS32_SEL, SEL_UPL);
 1864         _ugssel = GSEL(GUGS32_SEL, SEL_UPL);
 1865 
 1866         load_ds(_udatasel);
 1867         load_es(_udatasel);
 1868         load_fs(_ufssel);
 1869 
 1870         /* setup proc 0's pcb */
 1871         thread0.td_pcb->pcb_flags = 0;
 1872         thread0.td_frame = &proc0_tf;
 1873 
 1874         env = kern_getenv("kernelname");
 1875         if (env != NULL)
 1876                 strlcpy(kernelname, env, sizeof(kernelname));
 1877 
 1878         cpu_probe_amdc1e();
 1879 
 1880 #ifdef FDT
 1881         x86_init_fdt();
 1882 #endif
 1883         thread0.td_critnest = 0;
 1884 
 1885         TSEXIT();
 1886 
 1887         /* Location of kernel stack for locore */
 1888         return ((u_int64_t)thread0.td_pcb);
 1889 }
 1890 
 1891 void
 1892 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 1893 {
 1894 
 1895         pcpu->pc_acpi_id = 0xffffffff;
 1896 }
 1897 
 1898 static int
 1899 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1900 {
 1901         struct bios_smap *smapbase;
 1902         struct bios_smap_xattr smap;
 1903         caddr_t kmdp;
 1904         uint32_t *smapattr;
 1905         int count, error, i;
 1906 
 1907         /* Retrieve the system memory map from the loader. */
 1908         kmdp = preload_search_by_type("elf kernel");
 1909         if (kmdp == NULL)
 1910                 kmdp = preload_search_by_type("elf64 kernel");
 1911         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 1912             MODINFO_METADATA | MODINFOMD_SMAP);
 1913         if (smapbase == NULL)
 1914                 return (0);
 1915         smapattr = (uint32_t *)preload_search_info(kmdp,
 1916             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 1917         count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase);
 1918         error = 0;
 1919         for (i = 0; i < count; i++) {
 1920                 smap.base = smapbase[i].base;
 1921                 smap.length = smapbase[i].length;
 1922                 smap.type = smapbase[i].type;
 1923                 if (smapattr != NULL)
 1924                         smap.xattr = smapattr[i];
 1925                 else
 1926                         smap.xattr = 0;
 1927                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 1928         }
 1929         return (error);
 1930 }
 1931 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1932     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 1933 
 1934 static int
 1935 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1936 {
 1937         struct efi_map_header *efihdr;
 1938         caddr_t kmdp;
 1939         uint32_t efisize;
 1940 
 1941         kmdp = preload_search_by_type("elf kernel");
 1942         if (kmdp == NULL)
 1943                 kmdp = preload_search_by_type("elf64 kernel");
 1944         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1945             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1946         if (efihdr == NULL)
 1947                 return (0);
 1948         efisize = *((uint32_t *)efihdr - 1);
 1949         return (SYSCTL_OUT(req, efihdr, efisize));
 1950 }
 1951 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1952     efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map");
 1953 
 1954 void
 1955 spinlock_enter(void)
 1956 {
 1957         struct thread *td;
 1958         register_t flags;
 1959 
 1960         td = curthread;
 1961         if (td->td_md.md_spinlock_count == 0) {
 1962                 flags = intr_disable();
 1963                 td->td_md.md_spinlock_count = 1;
 1964                 td->td_md.md_saved_flags = flags;
 1965                 critical_enter();
 1966         } else
 1967                 td->td_md.md_spinlock_count++;
 1968 }
 1969 
 1970 void
 1971 spinlock_exit(void)
 1972 {
 1973         struct thread *td;
 1974         register_t flags;
 1975 
 1976         td = curthread;
 1977         flags = td->td_md.md_saved_flags;
 1978         td->td_md.md_spinlock_count--;
 1979         if (td->td_md.md_spinlock_count == 0) {
 1980                 critical_exit();
 1981                 intr_restore(flags);
 1982         }
 1983 }
 1984 
 1985 /*
 1986  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 1987  * we want to start a backtrace from the function that caused us to enter
 1988  * the debugger. We have the context in the trapframe, but base the trace
 1989  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 1990  * enough for a backtrace.
 1991  */
 1992 void
 1993 makectx(struct trapframe *tf, struct pcb *pcb)
 1994 {
 1995 
 1996         pcb->pcb_r12 = tf->tf_r12;
 1997         pcb->pcb_r13 = tf->tf_r13;
 1998         pcb->pcb_r14 = tf->tf_r14;
 1999         pcb->pcb_r15 = tf->tf_r15;
 2000         pcb->pcb_rbp = tf->tf_rbp;
 2001         pcb->pcb_rbx = tf->tf_rbx;
 2002         pcb->pcb_rip = tf->tf_rip;
 2003         pcb->pcb_rsp = tf->tf_rsp;
 2004 }
 2005 
 2006 int
 2007 ptrace_set_pc(struct thread *td, unsigned long addr)
 2008 {
 2009 
 2010         td->td_frame->tf_rip = addr;
 2011         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2012         return (0);
 2013 }
 2014 
 2015 int
 2016 ptrace_single_step(struct thread *td)
 2017 {
 2018 
 2019         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2020         if ((td->td_frame->tf_rflags & PSL_T) == 0) {
 2021                 td->td_frame->tf_rflags |= PSL_T;
 2022                 td->td_dbgflags |= TDB_STEP;
 2023         }
 2024         return (0);
 2025 }
 2026 
 2027 int
 2028 ptrace_clear_single_step(struct thread *td)
 2029 {
 2030 
 2031         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2032         td->td_frame->tf_rflags &= ~PSL_T;
 2033         td->td_dbgflags &= ~TDB_STEP;
 2034         return (0);
 2035 }
 2036 
 2037 int
 2038 fill_regs(struct thread *td, struct reg *regs)
 2039 {
 2040         struct trapframe *tp;
 2041 
 2042         tp = td->td_frame;
 2043         return (fill_frame_regs(tp, regs));
 2044 }
 2045 
 2046 int
 2047 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2048 {
 2049 
 2050         regs->r_r15 = tp->tf_r15;
 2051         regs->r_r14 = tp->tf_r14;
 2052         regs->r_r13 = tp->tf_r13;
 2053         regs->r_r12 = tp->tf_r12;
 2054         regs->r_r11 = tp->tf_r11;
 2055         regs->r_r10 = tp->tf_r10;
 2056         regs->r_r9  = tp->tf_r9;
 2057         regs->r_r8  = tp->tf_r8;
 2058         regs->r_rdi = tp->tf_rdi;
 2059         regs->r_rsi = tp->tf_rsi;
 2060         regs->r_rbp = tp->tf_rbp;
 2061         regs->r_rbx = tp->tf_rbx;
 2062         regs->r_rdx = tp->tf_rdx;
 2063         regs->r_rcx = tp->tf_rcx;
 2064         regs->r_rax = tp->tf_rax;
 2065         regs->r_rip = tp->tf_rip;
 2066         regs->r_cs = tp->tf_cs;
 2067         regs->r_rflags = tp->tf_rflags;
 2068         regs->r_rsp = tp->tf_rsp;
 2069         regs->r_ss = tp->tf_ss;
 2070         if (tp->tf_flags & TF_HASSEGS) {
 2071                 regs->r_ds = tp->tf_ds;
 2072                 regs->r_es = tp->tf_es;
 2073                 regs->r_fs = tp->tf_fs;
 2074                 regs->r_gs = tp->tf_gs;
 2075         } else {
 2076                 regs->r_ds = 0;
 2077                 regs->r_es = 0;
 2078                 regs->r_fs = 0;
 2079                 regs->r_gs = 0;
 2080         }
 2081         regs->r_err = 0;
 2082         regs->r_trapno = 0;
 2083         return (0);
 2084 }
 2085 
 2086 int
 2087 set_regs(struct thread *td, struct reg *regs)
 2088 {
 2089         struct trapframe *tp;
 2090         register_t rflags;
 2091 
 2092         tp = td->td_frame;
 2093         rflags = regs->r_rflags & 0xffffffff;
 2094         if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 2095                 return (EINVAL);
 2096         tp->tf_r15 = regs->r_r15;
 2097         tp->tf_r14 = regs->r_r14;
 2098         tp->tf_r13 = regs->r_r13;
 2099         tp->tf_r12 = regs->r_r12;
 2100         tp->tf_r11 = regs->r_r11;
 2101         tp->tf_r10 = regs->r_r10;
 2102         tp->tf_r9  = regs->r_r9;
 2103         tp->tf_r8  = regs->r_r8;
 2104         tp->tf_rdi = regs->r_rdi;
 2105         tp->tf_rsi = regs->r_rsi;
 2106         tp->tf_rbp = regs->r_rbp;
 2107         tp->tf_rbx = regs->r_rbx;
 2108         tp->tf_rdx = regs->r_rdx;
 2109         tp->tf_rcx = regs->r_rcx;
 2110         tp->tf_rax = regs->r_rax;
 2111         tp->tf_rip = regs->r_rip;
 2112         tp->tf_cs = regs->r_cs;
 2113         tp->tf_rflags = rflags;
 2114         tp->tf_rsp = regs->r_rsp;
 2115         tp->tf_ss = regs->r_ss;
 2116         if (0) {        /* XXXKIB */
 2117                 tp->tf_ds = regs->r_ds;
 2118                 tp->tf_es = regs->r_es;
 2119                 tp->tf_fs = regs->r_fs;
 2120                 tp->tf_gs = regs->r_gs;
 2121                 tp->tf_flags = TF_HASSEGS;
 2122         }
 2123         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2124         return (0);
 2125 }
 2126 
 2127 /* XXX check all this stuff! */
 2128 /* externalize from sv_xmm */
 2129 static void
 2130 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 2131 {
 2132         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2133         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2134         int i;
 2135 
 2136         /* pcb -> fpregs */
 2137         bzero(fpregs, sizeof(*fpregs));
 2138 
 2139         /* FPU control/status */
 2140         penv_fpreg->en_cw = penv_xmm->en_cw;
 2141         penv_fpreg->en_sw = penv_xmm->en_sw;
 2142         penv_fpreg->en_tw = penv_xmm->en_tw;
 2143         penv_fpreg->en_opcode = penv_xmm->en_opcode;
 2144         penv_fpreg->en_rip = penv_xmm->en_rip;
 2145         penv_fpreg->en_rdp = penv_xmm->en_rdp;
 2146         penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 2147         penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 2148 
 2149         /* FPU registers */
 2150         for (i = 0; i < 8; ++i)
 2151                 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 2152 
 2153         /* SSE registers */
 2154         for (i = 0; i < 16; ++i)
 2155                 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 2156 }
 2157 
 2158 /* internalize from fpregs into sv_xmm */
 2159 static void
 2160 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 2161 {
 2162         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2163         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2164         int i;
 2165 
 2166         /* fpregs -> pcb */
 2167         /* FPU control/status */
 2168         penv_xmm->en_cw = penv_fpreg->en_cw;
 2169         penv_xmm->en_sw = penv_fpreg->en_sw;
 2170         penv_xmm->en_tw = penv_fpreg->en_tw;
 2171         penv_xmm->en_opcode = penv_fpreg->en_opcode;
 2172         penv_xmm->en_rip = penv_fpreg->en_rip;
 2173         penv_xmm->en_rdp = penv_fpreg->en_rdp;
 2174         penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 2175         penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 2176 
 2177         /* FPU registers */
 2178         for (i = 0; i < 8; ++i)
 2179                 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 2180 
 2181         /* SSE registers */
 2182         for (i = 0; i < 16; ++i)
 2183                 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 2184 }
 2185 
 2186 /* externalize from td->pcb */
 2187 int
 2188 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2189 {
 2190 
 2191         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2192             P_SHOULDSTOP(td->td_proc),
 2193             ("not suspended thread %p", td));
 2194         fpugetregs(td);
 2195         fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 2196         return (0);
 2197 }
 2198 
 2199 /* internalize to td->pcb */
 2200 int
 2201 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2202 {
 2203 
 2204         critical_enter();
 2205         set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 2206         fpuuserinited(td);
 2207         critical_exit();
 2208         return (0);
 2209 }
 2210 
 2211 /*
 2212  * Get machine context.
 2213  */
 2214 int
 2215 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2216 {
 2217         struct pcb *pcb;
 2218         struct trapframe *tp;
 2219 
 2220         pcb = td->td_pcb;
 2221         tp = td->td_frame;
 2222         PROC_LOCK(curthread->td_proc);
 2223         mcp->mc_onstack = sigonstack(tp->tf_rsp);
 2224         PROC_UNLOCK(curthread->td_proc);
 2225         mcp->mc_r15 = tp->tf_r15;
 2226         mcp->mc_r14 = tp->tf_r14;
 2227         mcp->mc_r13 = tp->tf_r13;
 2228         mcp->mc_r12 = tp->tf_r12;
 2229         mcp->mc_r11 = tp->tf_r11;
 2230         mcp->mc_r10 = tp->tf_r10;
 2231         mcp->mc_r9  = tp->tf_r9;
 2232         mcp->mc_r8  = tp->tf_r8;
 2233         mcp->mc_rdi = tp->tf_rdi;
 2234         mcp->mc_rsi = tp->tf_rsi;
 2235         mcp->mc_rbp = tp->tf_rbp;
 2236         mcp->mc_rbx = tp->tf_rbx;
 2237         mcp->mc_rcx = tp->tf_rcx;
 2238         mcp->mc_rflags = tp->tf_rflags;
 2239         if (flags & GET_MC_CLEAR_RET) {
 2240                 mcp->mc_rax = 0;
 2241                 mcp->mc_rdx = 0;
 2242                 mcp->mc_rflags &= ~PSL_C;
 2243         } else {
 2244                 mcp->mc_rax = tp->tf_rax;
 2245                 mcp->mc_rdx = tp->tf_rdx;
 2246         }
 2247         mcp->mc_rip = tp->tf_rip;
 2248         mcp->mc_cs = tp->tf_cs;
 2249         mcp->mc_rsp = tp->tf_rsp;
 2250         mcp->mc_ss = tp->tf_ss;
 2251         mcp->mc_ds = tp->tf_ds;
 2252         mcp->mc_es = tp->tf_es;
 2253         mcp->mc_fs = tp->tf_fs;
 2254         mcp->mc_gs = tp->tf_gs;
 2255         mcp->mc_flags = tp->tf_flags;
 2256         mcp->mc_len = sizeof(*mcp);
 2257         get_fpcontext(td, mcp, NULL, 0);
 2258         update_pcb_bases(pcb);
 2259         mcp->mc_fsbase = pcb->pcb_fsbase;
 2260         mcp->mc_gsbase = pcb->pcb_gsbase;
 2261         mcp->mc_xfpustate = 0;
 2262         mcp->mc_xfpustate_len = 0;
 2263         bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 2264         return (0);
 2265 }
 2266 
 2267 /*
 2268  * Set machine context.
 2269  *
 2270  * However, we don't set any but the user modifiable flags, and we won't
 2271  * touch the cs selector.
 2272  */
 2273 int
 2274 set_mcontext(struct thread *td, mcontext_t *mcp)
 2275 {
 2276         struct pcb *pcb;
 2277         struct trapframe *tp;
 2278         char *xfpustate;
 2279         long rflags;
 2280         int ret;
 2281 
 2282         pcb = td->td_pcb;
 2283         tp = td->td_frame;
 2284         if (mcp->mc_len != sizeof(*mcp) ||
 2285             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 2286                 return (EINVAL);
 2287         rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 2288             (tp->tf_rflags & ~PSL_USERCHANGE);
 2289         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 2290                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 2291                     sizeof(struct savefpu))
 2292                         return (EINVAL);
 2293                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 2294                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 2295                     mcp->mc_xfpustate_len);
 2296                 if (ret != 0)
 2297                         return (ret);
 2298         } else
 2299                 xfpustate = NULL;
 2300         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 2301         if (ret != 0)
 2302                 return (ret);
 2303         tp->tf_r15 = mcp->mc_r15;
 2304         tp->tf_r14 = mcp->mc_r14;
 2305         tp->tf_r13 = mcp->mc_r13;
 2306         tp->tf_r12 = mcp->mc_r12;
 2307         tp->tf_r11 = mcp->mc_r11;
 2308         tp->tf_r10 = mcp->mc_r10;
 2309         tp->tf_r9  = mcp->mc_r9;
 2310         tp->tf_r8  = mcp->mc_r8;
 2311         tp->tf_rdi = mcp->mc_rdi;
 2312         tp->tf_rsi = mcp->mc_rsi;
 2313         tp->tf_rbp = mcp->mc_rbp;
 2314         tp->tf_rbx = mcp->mc_rbx;
 2315         tp->tf_rdx = mcp->mc_rdx;
 2316         tp->tf_rcx = mcp->mc_rcx;
 2317         tp->tf_rax = mcp->mc_rax;
 2318         tp->tf_rip = mcp->mc_rip;
 2319         tp->tf_rflags = rflags;
 2320         tp->tf_rsp = mcp->mc_rsp;
 2321         tp->tf_ss = mcp->mc_ss;
 2322         tp->tf_flags = mcp->mc_flags;
 2323         if (tp->tf_flags & TF_HASSEGS) {
 2324                 tp->tf_ds = mcp->mc_ds;
 2325                 tp->tf_es = mcp->mc_es;
 2326                 tp->tf_fs = mcp->mc_fs;
 2327                 tp->tf_gs = mcp->mc_gs;
 2328         }
 2329         set_pcb_flags(pcb, PCB_FULL_IRET);
 2330         if (mcp->mc_flags & _MC_HASBASES) {
 2331                 pcb->pcb_fsbase = mcp->mc_fsbase;
 2332                 pcb->pcb_gsbase = mcp->mc_gsbase;
 2333         }
 2334         return (0);
 2335 }
 2336 
 2337 static void
 2338 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 2339     size_t xfpusave_len)
 2340 {
 2341         size_t max_len, len;
 2342 
 2343         mcp->mc_ownedfp = fpugetregs(td);
 2344         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 2345             sizeof(mcp->mc_fpstate));
 2346         mcp->mc_fpformat = fpuformat();
 2347         if (!use_xsave || xfpusave_len == 0)
 2348                 return;
 2349         max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 2350         len = xfpusave_len;
 2351         if (len > max_len) {
 2352                 len = max_len;
 2353                 bzero(xfpusave + max_len, len - max_len);
 2354         }
 2355         mcp->mc_flags |= _MC_HASFPXSTATE;
 2356         mcp->mc_xfpustate_len = len;
 2357         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 2358 }
 2359 
 2360 static int
 2361 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 2362     size_t xfpustate_len)
 2363 {
 2364         int error;
 2365 
 2366         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 2367                 return (0);
 2368         else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 2369                 return (EINVAL);
 2370         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 2371                 /* We don't care what state is left in the FPU or PCB. */
 2372                 fpstate_drop(td);
 2373                 error = 0;
 2374         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 2375             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 2376                 error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
 2377                     xfpustate, xfpustate_len);
 2378         } else
 2379                 return (EINVAL);
 2380         return (error);
 2381 }
 2382 
 2383 void
 2384 fpstate_drop(struct thread *td)
 2385 {
 2386 
 2387         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 2388         critical_enter();
 2389         if (PCPU_GET(fpcurthread) == td)
 2390                 fpudrop();
 2391         /*
 2392          * XXX force a full drop of the fpu.  The above only drops it if we
 2393          * owned it.
 2394          *
 2395          * XXX I don't much like fpugetuserregs()'s semantics of doing a full
 2396          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 2397          * We only need to drop to !PCB_INITDONE in sendsig().  But
 2398          * sendsig() is the only caller of fpugetuserregs()... perhaps we just
 2399          * have too many layers.
 2400          */
 2401         clear_pcb_flags(curthread->td_pcb,
 2402             PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 2403         critical_exit();
 2404 }
 2405 
 2406 int
 2407 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 2408 {
 2409         struct pcb *pcb;
 2410 
 2411         if (td == NULL) {
 2412                 dbregs->dr[0] = rdr0();
 2413                 dbregs->dr[1] = rdr1();
 2414                 dbregs->dr[2] = rdr2();
 2415                 dbregs->dr[3] = rdr3();
 2416                 dbregs->dr[6] = rdr6();
 2417                 dbregs->dr[7] = rdr7();
 2418         } else {
 2419                 pcb = td->td_pcb;
 2420                 dbregs->dr[0] = pcb->pcb_dr0;
 2421                 dbregs->dr[1] = pcb->pcb_dr1;
 2422                 dbregs->dr[2] = pcb->pcb_dr2;
 2423                 dbregs->dr[3] = pcb->pcb_dr3;
 2424                 dbregs->dr[6] = pcb->pcb_dr6;
 2425                 dbregs->dr[7] = pcb->pcb_dr7;
 2426         }
 2427         dbregs->dr[4] = 0;
 2428         dbregs->dr[5] = 0;
 2429         dbregs->dr[8] = 0;
 2430         dbregs->dr[9] = 0;
 2431         dbregs->dr[10] = 0;
 2432         dbregs->dr[11] = 0;
 2433         dbregs->dr[12] = 0;
 2434         dbregs->dr[13] = 0;
 2435         dbregs->dr[14] = 0;
 2436         dbregs->dr[15] = 0;
 2437         return (0);
 2438 }
 2439 
 2440 int
 2441 set_dbregs(struct thread *td, struct dbreg *dbregs)
 2442 {
 2443         struct pcb *pcb;
 2444         int i;
 2445 
 2446         if (td == NULL) {
 2447                 load_dr0(dbregs->dr[0]);
 2448                 load_dr1(dbregs->dr[1]);
 2449                 load_dr2(dbregs->dr[2]);
 2450                 load_dr3(dbregs->dr[3]);
 2451                 load_dr6(dbregs->dr[6]);
 2452                 load_dr7(dbregs->dr[7]);
 2453         } else {
 2454                 /*
 2455                  * Don't let an illegal value for dr7 get set.  Specifically,
 2456                  * check for undefined settings.  Setting these bit patterns
 2457                  * result in undefined behaviour and can lead to an unexpected
 2458                  * TRCTRAP or a general protection fault right here.
 2459                  * Upper bits of dr6 and dr7 must not be set
 2460                  */
 2461                 for (i = 0; i < 4; i++) {
 2462                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 2463                                 return (EINVAL);
 2464                         if (td->td_frame->tf_cs == _ucode32sel &&
 2465                             DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
 2466                                 return (EINVAL);
 2467                 }
 2468                 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 2469                     (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 2470                         return (EINVAL);
 2471 
 2472                 pcb = td->td_pcb;
 2473 
 2474                 /*
 2475                  * Don't let a process set a breakpoint that is not within the
 2476                  * process's address space.  If a process could do this, it
 2477                  * could halt the system by setting a breakpoint in the kernel
 2478                  * (if ddb was enabled).  Thus, we need to check to make sure
 2479                  * that no breakpoints are being enabled for addresses outside
 2480                  * process's address space.
 2481                  *
 2482                  * XXX - what about when the watched area of the user's
 2483                  * address space is written into from within the kernel
 2484                  * ... wouldn't that still cause a breakpoint to be generated
 2485                  * from within kernel mode?
 2486                  */
 2487 
 2488                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 2489                         /* dr0 is enabled */
 2490                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 2491                                 return (EINVAL);
 2492                 }
 2493                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 2494                         /* dr1 is enabled */
 2495                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 2496                                 return (EINVAL);
 2497                 }
 2498                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 2499                         /* dr2 is enabled */
 2500                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 2501                                 return (EINVAL);
 2502                 }
 2503                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 2504                         /* dr3 is enabled */
 2505                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 2506                                 return (EINVAL);
 2507                 }
 2508 
 2509                 pcb->pcb_dr0 = dbregs->dr[0];
 2510                 pcb->pcb_dr1 = dbregs->dr[1];
 2511                 pcb->pcb_dr2 = dbregs->dr[2];
 2512                 pcb->pcb_dr3 = dbregs->dr[3];
 2513                 pcb->pcb_dr6 = dbregs->dr[6];
 2514                 pcb->pcb_dr7 = dbregs->dr[7];
 2515 
 2516                 set_pcb_flags(pcb, PCB_DBREGS);
 2517         }
 2518 
 2519         return (0);
 2520 }
 2521 
 2522 void
 2523 reset_dbregs(void)
 2524 {
 2525 
 2526         load_dr7(0);    /* Turn off the control bits first */
 2527         load_dr0(0);
 2528         load_dr1(0);
 2529         load_dr2(0);
 2530         load_dr3(0);
 2531         load_dr6(0);
 2532 }
 2533 
 2534 /*
 2535  * Return > 0 if a hardware breakpoint has been hit, and the
 2536  * breakpoint was in user space.  Return 0, otherwise.
 2537  */
 2538 int
 2539 user_dbreg_trap(register_t dr6)
 2540 {
 2541         u_int64_t dr7;
 2542         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
 2543         int nbp;            /* number of breakpoints that triggered */
 2544         caddr_t addr[4];    /* breakpoint addresses */
 2545         int i;
 2546 
 2547         bp = dr6 & DBREG_DR6_BMASK;
 2548         if (bp == 0) {
 2549                 /*
 2550                  * None of the breakpoint bits are set meaning this
 2551                  * trap was not caused by any of the debug registers
 2552                  */
 2553                 return 0;
 2554         }
 2555 
 2556         dr7 = rdr7();
 2557         if ((dr7 & 0x000000ff) == 0) {
 2558                 /*
 2559                  * all GE and LE bits in the dr7 register are zero,
 2560                  * thus the trap couldn't have been caused by the
 2561                  * hardware debug registers
 2562                  */
 2563                 return 0;
 2564         }
 2565 
 2566         nbp = 0;
 2567 
 2568         /*
 2569          * at least one of the breakpoints were hit, check to see
 2570          * which ones and if any of them are user space addresses
 2571          */
 2572 
 2573         if (bp & 0x01) {
 2574                 addr[nbp++] = (caddr_t)rdr0();
 2575         }
 2576         if (bp & 0x02) {
 2577                 addr[nbp++] = (caddr_t)rdr1();
 2578         }
 2579         if (bp & 0x04) {
 2580                 addr[nbp++] = (caddr_t)rdr2();
 2581         }
 2582         if (bp & 0x08) {
 2583                 addr[nbp++] = (caddr_t)rdr3();
 2584         }
 2585 
 2586         for (i = 0; i < nbp; i++) {
 2587                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 2588                         /*
 2589                          * addr[i] is in user space
 2590                          */
 2591                         return nbp;
 2592                 }
 2593         }
 2594 
 2595         /*
 2596          * None of the breakpoints are in user space.
 2597          */
 2598         return 0;
 2599 }
 2600 
 2601 /*
 2602  * The pcb_flags is only modified by current thread, or by other threads
 2603  * when current thread is stopped.  However, current thread may change it
 2604  * from the interrupt context in cpu_switch(), or in the trap handler.
 2605  * When we read-modify-write pcb_flags from C sources, compiler may generate
 2606  * code that is not atomic regarding the interrupt handler.  If a trap or
 2607  * interrupt happens and any flag is modified from the handler, it can be
 2608  * clobbered with the cached value later.  Therefore, we implement setting
 2609  * and clearing flags with single-instruction functions, which do not race
 2610  * with possible modification of the flags from the trap or interrupt context,
 2611  * because traps and interrupts are executed only on instruction boundary.
 2612  */
 2613 void
 2614 set_pcb_flags_raw(struct pcb *pcb, const u_int flags)
 2615 {
 2616 
 2617         __asm __volatile("orl %1,%0"
 2618             : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
 2619             : "cc", "memory");
 2620 
 2621 }
 2622 
 2623 /*
 2624  * The support for RDFSBASE, WRFSBASE and similar instructions for %gs
 2625  * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into
 2626  * pcb if user space modified the bases.  We must save on the context
 2627  * switch or if the return to usermode happens through the doreti.
 2628  *
 2629  * Tracking of both events is performed by the pcb flag PCB_FULL_IRET,
 2630  * which have a consequence that the base MSRs must be saved each time
 2631  * the PCB_FULL_IRET flag is set.  We disable interrupts to sync with
 2632  * context switches.
 2633  */
 2634 static void
 2635 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags)
 2636 {
 2637         register_t r;
 2638 
 2639         if (curpcb == pcb &&
 2640             (flags & PCB_FULL_IRET) != 0 &&
 2641             (pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2642                 r = intr_disable();
 2643                 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2644                         if (rfs() == _ufssel)
 2645                                 pcb->pcb_fsbase = rdfsbase();
 2646                         if (rgs() == _ugssel)
 2647                                 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE);
 2648                 }
 2649                 set_pcb_flags_raw(pcb, flags);
 2650                 intr_restore(r);
 2651         } else {
 2652                 set_pcb_flags_raw(pcb, flags);
 2653         }
 2654 }
 2655 
 2656 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int), static)
 2657 {
 2658 
 2659         return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ?
 2660             set_pcb_flags_fsgsbase : set_pcb_flags_raw);
 2661 }
 2662 
 2663 void
 2664 clear_pcb_flags(struct pcb *pcb, const u_int flags)
 2665 {
 2666 
 2667         __asm __volatile("andl %1,%0"
 2668             : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
 2669             : "cc", "memory");
 2670 }
 2671 
 2672 #ifdef KDB
 2673 
 2674 /*
 2675  * Provide inb() and outb() as functions.  They are normally only available as
 2676  * inline functions, thus cannot be called from the debugger.
 2677  */
 2678 
 2679 /* silence compiler warnings */
 2680 u_char inb_(u_short);
 2681 void outb_(u_short, u_char);
 2682 
 2683 u_char
 2684 inb_(u_short port)
 2685 {
 2686         return inb(port);
 2687 }
 2688 
 2689 void
 2690 outb_(u_short port, u_char data)
 2691 {
 2692         outb(port, data);
 2693 }
 2694 
 2695 #endif /* KDB */
 2696 
 2697 #undef memset
 2698 #undef memmove
 2699 #undef memcpy
 2700 
 2701 void    *memset_std(void *buf, int c, size_t len);
 2702 void    *memset_erms(void *buf, int c, size_t len);
 2703 DEFINE_IFUNC(, void *, memset, (void *, int, size_t), static)
 2704 {
 2705 
 2706         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2707             memset_erms : memset_std);
 2708 }
 2709 
 2710 void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
 2711             size_t len);
 2712 void    *memmove_erms(void * _Nonnull dst, const void * _Nonnull src,
 2713             size_t len);
 2714 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
 2715     size_t), static)
 2716 {
 2717 
 2718         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2719             memmove_erms : memmove_std);
 2720 }
 2721 
 2722 void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
 2723             size_t len);
 2724 void    *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src,
 2725             size_t len);
 2726 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t),
 2727     static)
 2728 {
 2729 
 2730         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2731             memcpy_erms : memcpy_std);
 2732 }
 2733 
 2734 void    pagezero_std(void *addr);
 2735 void    pagezero_erms(void *addr);
 2736 DEFINE_IFUNC(, void , pagezero, (void *), static)
 2737 {
 2738 
 2739         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2740             pagezero_erms : pagezero_std);
 2741 }

Cache object: b25f8d4c88c0f10f0678f2acdec90236


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.