The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/machdep.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 2003 Peter Wemm.
    5  * Copyright (c) 1992 Terrence R. Lambert.
    6  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * William Jolitz.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by the University of
   23  *      California, Berkeley and its contributors.
   24  * 4. Neither the name of the University nor the names of its contributors
   25  *    may be used to endorse or promote products derived from this software
   26  *    without specific prior written permission.
   27  *
   28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   38  * SUCH DAMAGE.
   39  *
   40  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   41  */
   42 
   43 #include <sys/cdefs.h>
   44 __FBSDID("$FreeBSD: head/sys/amd64/amd64/machdep.c 341442 2018-12-03 20:54:17Z markj $");
   45 
   46 #include "opt_atpic.h"
   47 #include "opt_cpu.h"
   48 #include "opt_ddb.h"
   49 #include "opt_inet.h"
   50 #include "opt_isa.h"
   51 #include "opt_kstack_pages.h"
   52 #include "opt_maxmem.h"
   53 #include "opt_mp_watchdog.h"
   54 #include "opt_pci.h"
   55 #include "opt_platform.h"
   56 #include "opt_sched.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/proc.h>
   60 #include <sys/systm.h>
   61 #include <sys/bio.h>
   62 #include <sys/buf.h>
   63 #include <sys/bus.h>
   64 #include <sys/callout.h>
   65 #include <sys/cons.h>
   66 #include <sys/cpu.h>
   67 #include <sys/efi.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/exec.h>
   70 #include <sys/imgact.h>
   71 #include <sys/kdb.h>
   72 #include <sys/kernel.h>
   73 #include <sys/ktr.h>
   74 #include <sys/linker.h>
   75 #include <sys/lock.h>
   76 #include <sys/malloc.h>
   77 #include <sys/memrange.h>
   78 #include <sys/msgbuf.h>
   79 #include <sys/mutex.h>
   80 #include <sys/pcpu.h>
   81 #include <sys/ptrace.h>
   82 #include <sys/reboot.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/sched.h>
   85 #include <sys/signalvar.h>
   86 #ifdef SMP
   87 #include <sys/smp.h>
   88 #endif
   89 #include <sys/syscallsubr.h>
   90 #include <sys/sysctl.h>
   91 #include <sys/sysent.h>
   92 #include <sys/sysproto.h>
   93 #include <sys/ucontext.h>
   94 #include <sys/vmmeter.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_extern.h>
   98 #include <vm/vm_kern.h>
   99 #include <vm/vm_page.h>
  100 #include <vm/vm_map.h>
  101 #include <vm/vm_object.h>
  102 #include <vm/vm_pager.h>
  103 #include <vm/vm_param.h>
  104 #include <vm/vm_phys.h>
  105 
  106 #ifdef DDB
  107 #ifndef KDB
  108 #error KDB must be enabled in order for DDB to work!
  109 #endif
  110 #include <ddb/ddb.h>
  111 #include <ddb/db_sym.h>
  112 #endif
  113 
  114 #include <net/netisr.h>
  115 
  116 #include <machine/clock.h>
  117 #include <machine/cpu.h>
  118 #include <machine/cputypes.h>
  119 #include <machine/frame.h>
  120 #include <machine/intr_machdep.h>
  121 #include <x86/mca.h>
  122 #include <machine/md_var.h>
  123 #include <machine/metadata.h>
  124 #include <machine/mp_watchdog.h>
  125 #include <machine/pc/bios.h>
  126 #include <machine/pcb.h>
  127 #include <machine/proc.h>
  128 #include <machine/reg.h>
  129 #include <machine/sigframe.h>
  130 #include <machine/specialreg.h>
  131 #include <machine/trap.h>
  132 #include <machine/tss.h>
  133 #include <x86/ucode.h>
  134 #include <x86/ifunc.h>
  135 #ifdef SMP
  136 #include <machine/smp.h>
  137 #endif
  138 #ifdef FDT
  139 #include <x86/fdt.h>
  140 #endif
  141 
  142 #ifdef DEV_ATPIC
  143 #include <x86/isa/icu.h>
  144 #else
  145 #include <x86/apicvar.h>
  146 #endif
  147 
  148 #include <isa/isareg.h>
  149 #include <isa/rtc.h>
  150 #include <x86/init.h>
  151 
  152 /* Sanity check for __curthread() */
  153 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  154 
  155 /*
  156  * The PTI trampoline stack needs enough space for a hardware trapframe and a
  157  * couple of scratch registers, as well as the trapframe left behind after an
  158  * iret fault.
  159  */
  160 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) -
  161     offsetof(struct pti_frame, pti_rip));
  162 
  163 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
  164 
  165 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  166 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  167 
  168 static void cpu_startup(void *);
  169 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  170     char *xfpusave, size_t xfpusave_len);
  171 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  172     char *xfpustate, size_t xfpustate_len);
  173 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  174 
  175 /* Preload data parse function */
  176 static caddr_t native_parse_preload_data(u_int64_t);
  177 
  178 /* Native function to fetch and parse the e820 map */
  179 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *);
  180 
  181 /* Default init_ops implementation. */
  182 struct init_ops init_ops = {
  183         .parse_preload_data =   native_parse_preload_data,
  184         .early_clock_source_init =      i8254_init,
  185         .early_delay =                  i8254_delay,
  186         .parse_memmap =                 native_parse_memmap,
  187 #ifdef SMP
  188         .mp_bootaddress =               mp_bootaddress,
  189         .start_all_aps =                native_start_all_aps,
  190 #endif
  191 #ifdef DEV_PCI
  192         .msi_init =                     msi_init,
  193 #endif
  194 };
  195 
  196 /*
  197  * Physical address of the EFI System Table. Stashed from the metadata hints
  198  * passed into the kernel and used by the EFI code to call runtime services.
  199  */
  200 vm_paddr_t efi_systbl_phys;
  201 
  202 /* Intel ICH registers */
  203 #define ICH_PMBASE      0x400
  204 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  205 
  206 int     _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
  207 
  208 int cold = 1;
  209 
  210 long Maxmem = 0;
  211 long realmem = 0;
  212 
  213 /*
  214  * The number of PHYSMAP entries must be one less than the number of
  215  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  216  * physical address that is accessible by ISA DMA is split into two
  217  * PHYSSEG entries.
  218  */
  219 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  220 
  221 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  222 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  223 
  224 /* must be 2 less so 0 0 can signal end of chunks */
  225 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
  226 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
  227 
  228 struct kva_md_info kmi;
  229 
  230 static struct trapframe proc0_tf;
  231 struct region_descriptor r_gdt, r_idt;
  232 
  233 struct pcpu __pcpu[MAXCPU];
  234 
  235 struct mtx icu_lock;
  236 
  237 struct mem_range_softc mem_range_softc;
  238 
  239 struct mtx dt_lock;     /* lock for GDT and LDT */
  240 
  241 void (*vmm_resume_p)(void);
  242 
  243 static void
  244 cpu_startup(dummy)
  245         void *dummy;
  246 {
  247         uintmax_t memsize;
  248         char *sysenv;
  249 
  250         /*
  251          * On MacBooks, we need to disallow the legacy USB circuit to
  252          * generate an SMI# because this can cause several problems,
  253          * namely: incorrect CPU frequency detection and failure to
  254          * start the APs.
  255          * We do this by disabling a bit in the SMI_EN (SMI Control and
  256          * Enable register) of the Intel ICH LPC Interface Bridge. 
  257          */
  258         sysenv = kern_getenv("smbios.system.product");
  259         if (sysenv != NULL) {
  260                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  261                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  262                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  263                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  264                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  265                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  266                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  267                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  268                         if (bootverbose)
  269                                 printf("Disabling LEGACY_USB_EN bit on "
  270                                     "Intel ICH.\n");
  271                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  272                 }
  273                 freeenv(sysenv);
  274         }
  275 
  276         /*
  277          * Good {morning,afternoon,evening,night}.
  278          */
  279         startrtclock();
  280         printcpuinfo();
  281 
  282         /*
  283          * Display physical memory if SMBIOS reports reasonable amount.
  284          */
  285         memsize = 0;
  286         sysenv = kern_getenv("smbios.memory.enabled");
  287         if (sysenv != NULL) {
  288                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  289                 freeenv(sysenv);
  290         }
  291         if (memsize < ptoa((uintmax_t)vm_free_count()))
  292                 memsize = ptoa((uintmax_t)Maxmem);
  293         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  294         realmem = atop(memsize);
  295 
  296         /*
  297          * Display any holes after the first chunk of extended memory.
  298          */
  299         if (bootverbose) {
  300                 int indx;
  301 
  302                 printf("Physical memory chunk(s):\n");
  303                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  304                         vm_paddr_t size;
  305 
  306                         size = phys_avail[indx + 1] - phys_avail[indx];
  307                         printf(
  308                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  309                             (uintmax_t)phys_avail[indx],
  310                             (uintmax_t)phys_avail[indx + 1] - 1,
  311                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  312                 }
  313         }
  314 
  315         vm_ksubmap_init(&kmi);
  316 
  317         printf("avail memory = %ju (%ju MB)\n",
  318             ptoa((uintmax_t)vm_free_count()),
  319             ptoa((uintmax_t)vm_free_count()) / 1048576);
  320 #ifdef DEV_PCI
  321         if (bootverbose && intel_graphics_stolen_base != 0)
  322                 printf("intel stolen mem: base %#jx size %ju MB\n",
  323                     (uintmax_t)intel_graphics_stolen_base,
  324                     (uintmax_t)intel_graphics_stolen_size / 1024 / 1024);
  325 #endif
  326 
  327         /*
  328          * Set up buffers, so they can be used to read disk labels.
  329          */
  330         bufinit();
  331         vm_pager_bufferinit();
  332 
  333         cpu_setregs();
  334 }
  335 
  336 /*
  337  * Send an interrupt to process.
  338  *
  339  * Stack is set up to allow sigcode stored
  340  * at top to call routine, followed by call
  341  * to sigreturn routine below.  After sigreturn
  342  * resets the signal mask, the stack, and the
  343  * frame pointer, it returns to the user
  344  * specified pc, psl.
  345  */
  346 void
  347 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  348 {
  349         struct sigframe sf, *sfp;
  350         struct pcb *pcb;
  351         struct proc *p;
  352         struct thread *td;
  353         struct sigacts *psp;
  354         char *sp;
  355         struct trapframe *regs;
  356         char *xfpusave;
  357         size_t xfpusave_len;
  358         int sig;
  359         int oonstack;
  360 
  361         td = curthread;
  362         pcb = td->td_pcb;
  363         p = td->td_proc;
  364         PROC_LOCK_ASSERT(p, MA_OWNED);
  365         sig = ksi->ksi_signo;
  366         psp = p->p_sigacts;
  367         mtx_assert(&psp->ps_mtx, MA_OWNED);
  368         regs = td->td_frame;
  369         oonstack = sigonstack(regs->tf_rsp);
  370 
  371         if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
  372                 xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
  373                 xfpusave = __builtin_alloca(xfpusave_len);
  374         } else {
  375                 xfpusave_len = 0;
  376                 xfpusave = NULL;
  377         }
  378 
  379         /* Save user context. */
  380         bzero(&sf, sizeof(sf));
  381         sf.sf_uc.uc_sigmask = *mask;
  382         sf.sf_uc.uc_stack = td->td_sigstk;
  383         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  384             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  385         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  386         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
  387         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  388         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  389         fpstate_drop(td);
  390         update_pcb_bases(pcb);
  391         sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
  392         sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
  393         bzero(sf.sf_uc.uc_mcontext.mc_spare,
  394             sizeof(sf.sf_uc.uc_mcontext.mc_spare));
  395 
  396         /* Allocate space for the signal handler context. */
  397         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  398             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  399                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  400 #if defined(COMPAT_43)
  401                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  402 #endif
  403         } else
  404                 sp = (char *)regs->tf_rsp - 128;
  405         if (xfpusave != NULL) {
  406                 sp -= xfpusave_len;
  407                 sp = (char *)((unsigned long)sp & ~0x3Ful);
  408                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  409         }
  410         sp -= sizeof(struct sigframe);
  411         /* Align to 16 bytes. */
  412         sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
  413 
  414         /* Build the argument list for the signal handler. */
  415         regs->tf_rdi = sig;                     /* arg 1 in %rdi */
  416         regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */
  417         bzero(&sf.sf_si, sizeof(sf.sf_si));
  418         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  419                 /* Signal handler installed with SA_SIGINFO. */
  420                 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */
  421                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  422 
  423                 /* Fill in POSIX parts */
  424                 sf.sf_si = ksi->ksi_info;
  425                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  426                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  427         } else {
  428                 /* Old FreeBSD-style arguments. */
  429                 regs->tf_rsi = ksi->ksi_code;   /* arg 2 in %rsi */
  430                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  431                 sf.sf_ahu.sf_handler = catcher;
  432         }
  433         mtx_unlock(&psp->ps_mtx);
  434         PROC_UNLOCK(p);
  435 
  436         /*
  437          * Copy the sigframe out to the user's stack.
  438          */
  439         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  440             (xfpusave != NULL && copyout(xfpusave,
  441             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  442             != 0)) {
  443 #ifdef DEBUG
  444                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  445 #endif
  446                 PROC_LOCK(p);
  447                 sigexit(td, SIGILL);
  448         }
  449 
  450         regs->tf_rsp = (long)sfp;
  451         regs->tf_rip = p->p_sysent->sv_sigcode_base;
  452         regs->tf_rflags &= ~(PSL_T | PSL_D);
  453         regs->tf_cs = _ucodesel;
  454         regs->tf_ds = _udatasel;
  455         regs->tf_ss = _udatasel;
  456         regs->tf_es = _udatasel;
  457         regs->tf_fs = _ufssel;
  458         regs->tf_gs = _ugssel;
  459         regs->tf_flags = TF_HASSEGS;
  460         PROC_LOCK(p);
  461         mtx_lock(&psp->ps_mtx);
  462 }
  463 
  464 /*
  465  * System call to cleanup state after a signal
  466  * has been taken.  Reset signal mask and
  467  * stack state from context left by sendsig (above).
  468  * Return to previous pc and psl as specified by
  469  * context left by sendsig. Check carefully to
  470  * make sure that the user has not modified the
  471  * state to gain improper privileges.
  472  *
  473  * MPSAFE
  474  */
  475 int
  476 sys_sigreturn(td, uap)
  477         struct thread *td;
  478         struct sigreturn_args /* {
  479                 const struct __ucontext *sigcntxp;
  480         } */ *uap;
  481 {
  482         ucontext_t uc;
  483         struct pcb *pcb;
  484         struct proc *p;
  485         struct trapframe *regs;
  486         ucontext_t *ucp;
  487         char *xfpustate;
  488         size_t xfpustate_len;
  489         long rflags;
  490         int cs, error, ret;
  491         ksiginfo_t ksi;
  492 
  493         pcb = td->td_pcb;
  494         p = td->td_proc;
  495 
  496         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  497         if (error != 0) {
  498                 uprintf("pid %d (%s): sigreturn copyin failed\n",
  499                     p->p_pid, td->td_name);
  500                 return (error);
  501         }
  502         ucp = &uc;
  503         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
  504                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
  505                     td->td_name, ucp->uc_mcontext.mc_flags);
  506                 return (EINVAL);
  507         }
  508         regs = td->td_frame;
  509         rflags = ucp->uc_mcontext.mc_rflags;
  510         /*
  511          * Don't allow users to change privileged or reserved flags.
  512          */
  513         if (!EFL_SECURE(rflags, regs->tf_rflags)) {
  514                 uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
  515                     td->td_name, rflags);
  516                 return (EINVAL);
  517         }
  518 
  519         /*
  520          * Don't allow users to load a valid privileged %cs.  Let the
  521          * hardware check for invalid selectors, excess privilege in
  522          * other selectors, invalid %eip's and invalid %esp's.
  523          */
  524         cs = ucp->uc_mcontext.mc_cs;
  525         if (!CS_SECURE(cs)) {
  526                 uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
  527                     td->td_name, cs);
  528                 ksiginfo_init_trap(&ksi);
  529                 ksi.ksi_signo = SIGBUS;
  530                 ksi.ksi_code = BUS_OBJERR;
  531                 ksi.ksi_trapno = T_PROTFLT;
  532                 ksi.ksi_addr = (void *)regs->tf_rip;
  533                 trapsignal(td, &ksi);
  534                 return (EINVAL);
  535         }
  536 
  537         if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
  538                 xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
  539                 if (xfpustate_len > cpu_max_ext_state_size -
  540                     sizeof(struct savefpu)) {
  541                         uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
  542                             p->p_pid, td->td_name, xfpustate_len);
  543                         return (EINVAL);
  544                 }
  545                 xfpustate = __builtin_alloca(xfpustate_len);
  546                 error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
  547                     xfpustate, xfpustate_len);
  548                 if (error != 0) {
  549                         uprintf(
  550         "pid %d (%s): sigreturn copying xfpustate failed\n",
  551                             p->p_pid, td->td_name);
  552                         return (error);
  553                 }
  554         } else {
  555                 xfpustate = NULL;
  556                 xfpustate_len = 0;
  557         }
  558         ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
  559         if (ret != 0) {
  560                 uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
  561                     p->p_pid, td->td_name, ret);
  562                 return (ret);
  563         }
  564         bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
  565         update_pcb_bases(pcb);
  566         pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
  567         pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
  568 
  569 #if defined(COMPAT_43)
  570         if (ucp->uc_mcontext.mc_onstack & 1)
  571                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  572         else
  573                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  574 #endif
  575 
  576         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  577         return (EJUSTRETURN);
  578 }
  579 
  580 #ifdef COMPAT_FREEBSD4
  581 int
  582 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
  583 {
  584  
  585         return sys_sigreturn(td, (struct sigreturn_args *)uap);
  586 }
  587 #endif
  588 
  589 /*
  590  * Reset registers to default values on exec.
  591  */
  592 void
  593 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
  594 {
  595         struct trapframe *regs;
  596         struct pcb *pcb;
  597         register_t saved_rflags;
  598 
  599         regs = td->td_frame;
  600         pcb = td->td_pcb;
  601 
  602         if (td->td_proc->p_md.md_ldt != NULL)
  603                 user_ldt_free(td);
  604 
  605         update_pcb_bases(pcb);
  606         pcb->pcb_fsbase = 0;
  607         pcb->pcb_gsbase = 0;
  608         clear_pcb_flags(pcb, PCB_32BIT);
  609         pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
  610 
  611         saved_rflags = regs->tf_rflags & PSL_T;
  612         bzero((char *)regs, sizeof(struct trapframe));
  613         regs->tf_rip = imgp->entry_addr;
  614         regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
  615         regs->tf_rdi = stack;           /* argv */
  616         regs->tf_rflags = PSL_USER | saved_rflags;
  617         regs->tf_ss = _udatasel;
  618         regs->tf_cs = _ucodesel;
  619         regs->tf_ds = _udatasel;
  620         regs->tf_es = _udatasel;
  621         regs->tf_fs = _ufssel;
  622         regs->tf_gs = _ugssel;
  623         regs->tf_flags = TF_HASSEGS;
  624 
  625         /*
  626          * Reset the hardware debug registers if they were in use.
  627          * They won't have any meaning for the newly exec'd process.
  628          */
  629         if (pcb->pcb_flags & PCB_DBREGS) {
  630                 pcb->pcb_dr0 = 0;
  631                 pcb->pcb_dr1 = 0;
  632                 pcb->pcb_dr2 = 0;
  633                 pcb->pcb_dr3 = 0;
  634                 pcb->pcb_dr6 = 0;
  635                 pcb->pcb_dr7 = 0;
  636                 if (pcb == curpcb) {
  637                         /*
  638                          * Clear the debug registers on the running
  639                          * CPU, otherwise they will end up affecting
  640                          * the next process we switch to.
  641                          */
  642                         reset_dbregs();
  643                 }
  644                 clear_pcb_flags(pcb, PCB_DBREGS);
  645         }
  646 
  647         /*
  648          * Drop the FP state if we hold it, so that the process gets a
  649          * clean FP state if it uses the FPU again.
  650          */
  651         fpstate_drop(td);
  652 }
  653 
  654 void
  655 cpu_setregs(void)
  656 {
  657         register_t cr0;
  658 
  659         cr0 = rcr0();
  660         /*
  661          * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
  662          * BSP.  See the comments there about why we set them.
  663          */
  664         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
  665         load_cr0(cr0);
  666 }
  667 
  668 /*
  669  * Initialize amd64 and configure to run kernel
  670  */
  671 
  672 /*
  673  * Initialize segments & interrupt table
  674  */
  675 
  676 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */
  677 static struct gate_descriptor idt0[NIDT];
  678 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
  679 
  680 static char dblfault_stack[PAGE_SIZE] __aligned(16);
  681 static char mce0_stack[PAGE_SIZE] __aligned(16);
  682 static char nmi0_stack[PAGE_SIZE] __aligned(16);
  683 static char dbg0_stack[PAGE_SIZE] __aligned(16);
  684 CTASSERT(sizeof(struct nmi_pcpu) == 16);
  685 
  686 struct amd64tss common_tss[MAXCPU];
  687 
  688 /*
  689  * Software prototypes -- in more palatable form.
  690  *
  691  * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same
  692  * slots as corresponding segments for i386 kernel.
  693  */
  694 struct soft_segment_descriptor gdt_segs[] = {
  695 /* GNULL_SEL    0 Null Descriptor */
  696 {       .ssd_base = 0x0,
  697         .ssd_limit = 0x0,
  698         .ssd_type = 0,
  699         .ssd_dpl = 0,
  700         .ssd_p = 0,
  701         .ssd_long = 0,
  702         .ssd_def32 = 0,
  703         .ssd_gran = 0           },
  704 /* GNULL2_SEL   1 Null Descriptor */
  705 {       .ssd_base = 0x0,
  706         .ssd_limit = 0x0,
  707         .ssd_type = 0,
  708         .ssd_dpl = 0,
  709         .ssd_p = 0,
  710         .ssd_long = 0,
  711         .ssd_def32 = 0,
  712         .ssd_gran = 0           },
  713 /* GUFS32_SEL   2 32 bit %gs Descriptor for user */
  714 {       .ssd_base = 0x0,
  715         .ssd_limit = 0xfffff,
  716         .ssd_type = SDT_MEMRWA,
  717         .ssd_dpl = SEL_UPL,
  718         .ssd_p = 1,
  719         .ssd_long = 0,
  720         .ssd_def32 = 1,
  721         .ssd_gran = 1           },
  722 /* GUGS32_SEL   3 32 bit %fs Descriptor for user */
  723 {       .ssd_base = 0x0,
  724         .ssd_limit = 0xfffff,
  725         .ssd_type = SDT_MEMRWA,
  726         .ssd_dpl = SEL_UPL,
  727         .ssd_p = 1,
  728         .ssd_long = 0,
  729         .ssd_def32 = 1,
  730         .ssd_gran = 1           },
  731 /* GCODE_SEL    4 Code Descriptor for kernel */
  732 {       .ssd_base = 0x0,
  733         .ssd_limit = 0xfffff,
  734         .ssd_type = SDT_MEMERA,
  735         .ssd_dpl = SEL_KPL,
  736         .ssd_p = 1,
  737         .ssd_long = 1,
  738         .ssd_def32 = 0,
  739         .ssd_gran = 1           },
  740 /* GDATA_SEL    5 Data Descriptor for kernel */
  741 {       .ssd_base = 0x0,
  742         .ssd_limit = 0xfffff,
  743         .ssd_type = SDT_MEMRWA,
  744         .ssd_dpl = SEL_KPL,
  745         .ssd_p = 1,
  746         .ssd_long = 1,
  747         .ssd_def32 = 0,
  748         .ssd_gran = 1           },
  749 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */
  750 {       .ssd_base = 0x0,
  751         .ssd_limit = 0xfffff,
  752         .ssd_type = SDT_MEMERA,
  753         .ssd_dpl = SEL_UPL,
  754         .ssd_p = 1,
  755         .ssd_long = 0,
  756         .ssd_def32 = 1,
  757         .ssd_gran = 1           },
  758 /* GUDATA_SEL   7 32/64 bit Data Descriptor for user */
  759 {       .ssd_base = 0x0,
  760         .ssd_limit = 0xfffff,
  761         .ssd_type = SDT_MEMRWA,
  762         .ssd_dpl = SEL_UPL,
  763         .ssd_p = 1,
  764         .ssd_long = 0,
  765         .ssd_def32 = 1,
  766         .ssd_gran = 1           },
  767 /* GUCODE_SEL   8 64 bit Code Descriptor for user */
  768 {       .ssd_base = 0x0,
  769         .ssd_limit = 0xfffff,
  770         .ssd_type = SDT_MEMERA,
  771         .ssd_dpl = SEL_UPL,
  772         .ssd_p = 1,
  773         .ssd_long = 1,
  774         .ssd_def32 = 0,
  775         .ssd_gran = 1           },
  776 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
  777 {       .ssd_base = 0x0,
  778         .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1,
  779         .ssd_type = SDT_SYSTSS,
  780         .ssd_dpl = SEL_KPL,
  781         .ssd_p = 1,
  782         .ssd_long = 0,
  783         .ssd_def32 = 0,
  784         .ssd_gran = 0           },
  785 /* Actually, the TSS is a system descriptor which is double size */
  786 {       .ssd_base = 0x0,
  787         .ssd_limit = 0x0,
  788         .ssd_type = 0,
  789         .ssd_dpl = 0,
  790         .ssd_p = 0,
  791         .ssd_long = 0,
  792         .ssd_def32 = 0,
  793         .ssd_gran = 0           },
  794 /* GUSERLDT_SEL 11 LDT Descriptor */
  795 {       .ssd_base = 0x0,
  796         .ssd_limit = 0x0,
  797         .ssd_type = 0,
  798         .ssd_dpl = 0,
  799         .ssd_p = 0,
  800         .ssd_long = 0,
  801         .ssd_def32 = 0,
  802         .ssd_gran = 0           },
  803 /* GUSERLDT_SEL 12 LDT Descriptor, double size */
  804 {       .ssd_base = 0x0,
  805         .ssd_limit = 0x0,
  806         .ssd_type = 0,
  807         .ssd_dpl = 0,
  808         .ssd_p = 0,
  809         .ssd_long = 0,
  810         .ssd_def32 = 0,
  811         .ssd_gran = 0           },
  812 };
  813 
  814 void
  815 setidt(int idx, inthand_t *func, int typ, int dpl, int ist)
  816 {
  817         struct gate_descriptor *ip;
  818 
  819         ip = idt + idx;
  820         ip->gd_looffset = (uintptr_t)func;
  821         ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
  822         ip->gd_ist = ist;
  823         ip->gd_xx = 0;
  824         ip->gd_type = typ;
  825         ip->gd_dpl = dpl;
  826         ip->gd_p = 1;
  827         ip->gd_hioffset = ((uintptr_t)func)>>16 ;
  828 }
  829 
  830 extern inthand_t
  831         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
  832         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
  833         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
  834         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
  835         IDTVEC(xmm), IDTVEC(dblfault),
  836         IDTVEC(div_pti), IDTVEC(bpt_pti),
  837         IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
  838         IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
  839         IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti),
  840         IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti),
  841         IDTVEC(xmm_pti),
  842 #ifdef KDTRACE_HOOKS
  843         IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti),
  844 #endif
  845 #ifdef XENHVM
  846         IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti),
  847 #endif
  848         IDTVEC(fast_syscall), IDTVEC(fast_syscall32),
  849         IDTVEC(fast_syscall_pti);
  850 
  851 #ifdef DDB
  852 /*
  853  * Display the index and function name of any IDT entries that don't use
  854  * the default 'rsvd' entry point.
  855  */
  856 DB_SHOW_COMMAND(idt, db_show_idt)
  857 {
  858         struct gate_descriptor *ip;
  859         int idx;
  860         uintptr_t func;
  861 
  862         ip = idt;
  863         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
  864                 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
  865                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
  866                         db_printf("%3d\t", idx);
  867                         db_printsym(func, DB_STGY_PROC);
  868                         db_printf("\n");
  869                 }
  870                 ip++;
  871         }
  872 }
  873 
  874 /* Show privileged registers. */
  875 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
  876 {
  877         struct {
  878                 uint16_t limit;
  879                 uint64_t base;
  880         } __packed idtr, gdtr;
  881         uint16_t ldt, tr;
  882 
  883         __asm __volatile("sidt %0" : "=m" (idtr));
  884         db_printf("idtr\t0x%016lx/%04x\n",
  885             (u_long)idtr.base, (u_int)idtr.limit);
  886         __asm __volatile("sgdt %0" : "=m" (gdtr));
  887         db_printf("gdtr\t0x%016lx/%04x\n",
  888             (u_long)gdtr.base, (u_int)gdtr.limit);
  889         __asm __volatile("sldt %0" : "=r" (ldt));
  890         db_printf("ldtr\t0x%04x\n", ldt);
  891         __asm __volatile("str %0" : "=r" (tr));
  892         db_printf("tr\t0x%04x\n", tr);
  893         db_printf("cr0\t0x%016lx\n", rcr0());
  894         db_printf("cr2\t0x%016lx\n", rcr2());
  895         db_printf("cr3\t0x%016lx\n", rcr3());
  896         db_printf("cr4\t0x%016lx\n", rcr4());
  897         if (rcr4() & CR4_XSAVE)
  898                 db_printf("xcr0\t0x%016lx\n", rxcr(0));
  899         db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER));
  900         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
  901                 db_printf("FEATURES_CTL\t%016lx\n",
  902                     rdmsr(MSR_IA32_FEATURE_CONTROL));
  903         db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR));
  904         db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT));
  905         db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE));
  906 }
  907 
  908 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
  909 {
  910 
  911         db_printf("dr0\t0x%016lx\n", rdr0());
  912         db_printf("dr1\t0x%016lx\n", rdr1());
  913         db_printf("dr2\t0x%016lx\n", rdr2());
  914         db_printf("dr3\t0x%016lx\n", rdr3());
  915         db_printf("dr6\t0x%016lx\n", rdr6());
  916         db_printf("dr7\t0x%016lx\n", rdr7());   
  917 }
  918 #endif
  919 
  920 void
  921 sdtossd(sd, ssd)
  922         struct user_segment_descriptor *sd;
  923         struct soft_segment_descriptor *ssd;
  924 {
  925 
  926         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
  927         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
  928         ssd->ssd_type  = sd->sd_type;
  929         ssd->ssd_dpl   = sd->sd_dpl;
  930         ssd->ssd_p     = sd->sd_p;
  931         ssd->ssd_long  = sd->sd_long;
  932         ssd->ssd_def32 = sd->sd_def32;
  933         ssd->ssd_gran  = sd->sd_gran;
  934 }
  935 
  936 void
  937 ssdtosd(ssd, sd)
  938         struct soft_segment_descriptor *ssd;
  939         struct user_segment_descriptor *sd;
  940 {
  941 
  942         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  943         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
  944         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  945         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  946         sd->sd_type  = ssd->ssd_type;
  947         sd->sd_dpl   = ssd->ssd_dpl;
  948         sd->sd_p     = ssd->ssd_p;
  949         sd->sd_long  = ssd->ssd_long;
  950         sd->sd_def32 = ssd->ssd_def32;
  951         sd->sd_gran  = ssd->ssd_gran;
  952 }
  953 
  954 void
  955 ssdtosyssd(ssd, sd)
  956         struct soft_segment_descriptor *ssd;
  957         struct system_segment_descriptor *sd;
  958 {
  959 
  960         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  961         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
  962         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  963         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  964         sd->sd_type  = ssd->ssd_type;
  965         sd->sd_dpl   = ssd->ssd_dpl;
  966         sd->sd_p     = ssd->ssd_p;
  967         sd->sd_gran  = ssd->ssd_gran;
  968 }
  969 
  970 #if !defined(DEV_ATPIC) && defined(DEV_ISA)
  971 #include <isa/isavar.h>
  972 #include <isa/isareg.h>
  973 /*
  974  * Return a bitmap of the current interrupt requests.  This is 8259-specific
  975  * and is only suitable for use at probe time.
  976  * This is only here to pacify sio.  It is NOT FATAL if this doesn't work.
  977  * It shouldn't be here.  There should probably be an APIC centric
  978  * implementation in the apic driver code, if at all.
  979  */
  980 intrmask_t
  981 isa_irq_pending(void)
  982 {
  983         u_char irr1;
  984         u_char irr2;
  985 
  986         irr1 = inb(IO_ICU1);
  987         irr2 = inb(IO_ICU2);
  988         return ((irr2 << 8) | irr1);
  989 }
  990 #endif
  991 
  992 u_int basemem;
  993 
  994 static int
  995 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
  996     int *physmap_idxp)
  997 {
  998         int i, insert_idx, physmap_idx;
  999 
 1000         physmap_idx = *physmap_idxp;
 1001 
 1002         if (length == 0)
 1003                 return (1);
 1004 
 1005         /*
 1006          * Find insertion point while checking for overlap.  Start off by
 1007          * assuming the new entry will be added to the end.
 1008          *
 1009          * NB: physmap_idx points to the next free slot.
 1010          */
 1011         insert_idx = physmap_idx;
 1012         for (i = 0; i <= physmap_idx; i += 2) {
 1013                 if (base < physmap[i + 1]) {
 1014                         if (base + length <= physmap[i]) {
 1015                                 insert_idx = i;
 1016                                 break;
 1017                         }
 1018                         if (boothowto & RB_VERBOSE)
 1019                                 printf(
 1020                     "Overlapping memory regions, ignoring second region\n");
 1021                         return (1);
 1022                 }
 1023         }
 1024 
 1025         /* See if we can prepend to the next entry. */
 1026         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1027                 physmap[insert_idx] = base;
 1028                 return (1);
 1029         }
 1030 
 1031         /* See if we can append to the previous entry. */
 1032         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1033                 physmap[insert_idx - 1] += length;
 1034                 return (1);
 1035         }
 1036 
 1037         physmap_idx += 2;
 1038         *physmap_idxp = physmap_idx;
 1039         if (physmap_idx == PHYSMAP_SIZE) {
 1040                 printf(
 1041                 "Too many segments in the physical address map, giving up\n");
 1042                 return (0);
 1043         }
 1044 
 1045         /*
 1046          * Move the last 'N' entries down to make room for the new
 1047          * entry if needed.
 1048          */
 1049         for (i = (physmap_idx - 2); i > insert_idx; i -= 2) {
 1050                 physmap[i] = physmap[i - 2];
 1051                 physmap[i + 1] = physmap[i - 1];
 1052         }
 1053 
 1054         /* Insert the new entry. */
 1055         physmap[insert_idx] = base;
 1056         physmap[insert_idx + 1] = base + length;
 1057         return (1);
 1058 }
 1059 
 1060 void
 1061 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize,
 1062                       vm_paddr_t *physmap, int *physmap_idx)
 1063 {
 1064         struct bios_smap *smap, *smapend;
 1065 
 1066         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1067 
 1068         for (smap = smapbase; smap < smapend; smap++) {
 1069                 if (boothowto & RB_VERBOSE)
 1070                         printf("SMAP type=%02x base=%016lx len=%016lx\n",
 1071                             smap->type, smap->base, smap->length);
 1072 
 1073                 if (smap->type != SMAP_TYPE_MEMORY)
 1074                         continue;
 1075 
 1076                 if (!add_physmap_entry(smap->base, smap->length, physmap,
 1077                     physmap_idx))
 1078                         break;
 1079         }
 1080 }
 1081 
 1082 static void
 1083 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
 1084     int *physmap_idx)
 1085 {
 1086         struct efi_md *map, *p;
 1087         const char *type;
 1088         size_t efisz;
 1089         int ndesc, i;
 1090 
 1091         static const char *types[] = {
 1092                 "Reserved",
 1093                 "LoaderCode",
 1094                 "LoaderData",
 1095                 "BootServicesCode",
 1096                 "BootServicesData",
 1097                 "RuntimeServicesCode",
 1098                 "RuntimeServicesData",
 1099                 "ConventionalMemory",
 1100                 "UnusableMemory",
 1101                 "ACPIReclaimMemory",
 1102                 "ACPIMemoryNVS",
 1103                 "MemoryMappedIO",
 1104                 "MemoryMappedIOPortSpace",
 1105                 "PalCode",
 1106                 "PersistentMemory"
 1107         };
 1108 
 1109         /*
 1110          * Memory map data provided by UEFI via the GetMemoryMap
 1111          * Boot Services API.
 1112          */
 1113         efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 1114         map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 1115 
 1116         if (efihdr->descriptor_size == 0)
 1117                 return;
 1118         ndesc = efihdr->memory_size / efihdr->descriptor_size;
 1119 
 1120         if (boothowto & RB_VERBOSE)
 1121                 printf("%23s %12s %12s %8s %4s\n",
 1122                     "Type", "Physical", "Virtual", "#Pages", "Attr");
 1123 
 1124         for (i = 0, p = map; i < ndesc; i++,
 1125             p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 1126                 if (boothowto & RB_VERBOSE) {
 1127                         if (p->md_type < nitems(types))
 1128                                 type = types[p->md_type];
 1129                         else
 1130                                 type = "<INVALID>";
 1131                         printf("%23s %012lx %12p %08lx ", type, p->md_phys,
 1132                             p->md_virt, p->md_pages);
 1133                         if (p->md_attr & EFI_MD_ATTR_UC)
 1134                                 printf("UC ");
 1135                         if (p->md_attr & EFI_MD_ATTR_WC)
 1136                                 printf("WC ");
 1137                         if (p->md_attr & EFI_MD_ATTR_WT)
 1138                                 printf("WT ");
 1139                         if (p->md_attr & EFI_MD_ATTR_WB)
 1140                                 printf("WB ");
 1141                         if (p->md_attr & EFI_MD_ATTR_UCE)
 1142                                 printf("UCE ");
 1143                         if (p->md_attr & EFI_MD_ATTR_WP)
 1144                                 printf("WP ");
 1145                         if (p->md_attr & EFI_MD_ATTR_RP)
 1146                                 printf("RP ");
 1147                         if (p->md_attr & EFI_MD_ATTR_XP)
 1148                                 printf("XP ");
 1149                         if (p->md_attr & EFI_MD_ATTR_NV)
 1150                                 printf("NV ");
 1151                         if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 1152                                 printf("MORE_RELIABLE ");
 1153                         if (p->md_attr & EFI_MD_ATTR_RO)
 1154                                 printf("RO ");
 1155                         if (p->md_attr & EFI_MD_ATTR_RT)
 1156                                 printf("RUNTIME");
 1157                         printf("\n");
 1158                 }
 1159 
 1160                 switch (p->md_type) {
 1161                 case EFI_MD_TYPE_CODE:
 1162                 case EFI_MD_TYPE_DATA:
 1163                 case EFI_MD_TYPE_BS_CODE:
 1164                 case EFI_MD_TYPE_BS_DATA:
 1165                 case EFI_MD_TYPE_FREE:
 1166                         /*
 1167                          * We're allowed to use any entry with these types.
 1168                          */
 1169                         break;
 1170                 default:
 1171                         continue;
 1172                 }
 1173 
 1174                 if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE),
 1175                     physmap, physmap_idx))
 1176                         break;
 1177         }
 1178 }
 1179 
 1180 static char bootmethod[16] = "";
 1181 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1182     "System firmware boot method");
 1183 
 1184 static void
 1185 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx)
 1186 {
 1187         struct bios_smap *smap;
 1188         struct efi_map_header *efihdr;
 1189         u_int32_t size;
 1190 
 1191         /*
 1192          * Memory map from INT 15:E820.
 1193          *
 1194          * subr_module.c says:
 1195          * "Consumer may safely assume that size value precedes data."
 1196          * ie: an int32_t immediately precedes smap.
 1197          */
 1198 
 1199         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1200             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1201         smap = (struct bios_smap *)preload_search_info(kmdp,
 1202             MODINFO_METADATA | MODINFOMD_SMAP);
 1203         if (efihdr == NULL && smap == NULL)
 1204                 panic("No BIOS smap or EFI map info from loader!");
 1205 
 1206         if (efihdr != NULL) {
 1207                 add_efi_map_entries(efihdr, physmap, physmap_idx);
 1208                 strlcpy(bootmethod, "UEFI", sizeof(bootmethod));
 1209         } else {
 1210                 size = *((u_int32_t *)smap - 1);
 1211                 bios_add_smap_entries(smap, size, physmap, physmap_idx);
 1212                 strlcpy(bootmethod, "BIOS", sizeof(bootmethod));
 1213         }
 1214 }
 1215 
 1216 #define PAGES_PER_GB    (1024 * 1024 * 1024 / PAGE_SIZE)
 1217 
 1218 /*
 1219  * Populate the (physmap) array with base/bound pairs describing the
 1220  * available physical memory in the system, then test this memory and
 1221  * build the phys_avail array describing the actually-available memory.
 1222  *
 1223  * Total memory size may be set by the kernel environment variable
 1224  * hw.physmem or the compile-time define MAXMEM.
 1225  *
 1226  * XXX first should be vm_paddr_t.
 1227  */
 1228 static void
 1229 getmemsize(caddr_t kmdp, u_int64_t first)
 1230 {
 1231         int i, physmap_idx, pa_indx, da_indx;
 1232         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1233         u_long physmem_start, physmem_tunable, memtest;
 1234         pt_entry_t *pte;
 1235         quad_t dcons_addr, dcons_size;
 1236         int page_counter;
 1237 
 1238         /*
 1239          * Tell the physical memory allocator about pages used to store
 1240          * the kernel and preloaded data.  See kmem_bootstrap_free().
 1241          */
 1242         vm_phys_add_seg((vm_paddr_t)kernphys, trunc_page(first));
 1243 
 1244         bzero(physmap, sizeof(physmap));
 1245         physmap_idx = 0;
 1246 
 1247         init_ops.parse_memmap(kmdp, physmap, &physmap_idx);
 1248         physmap_idx -= 2;
 1249 
 1250         /*
 1251          * Find the 'base memory' segment for SMP
 1252          */
 1253         basemem = 0;
 1254         for (i = 0; i <= physmap_idx; i += 2) {
 1255                 if (physmap[i] <= 0xA0000) {
 1256                         basemem = physmap[i + 1] / 1024;
 1257                         break;
 1258                 }
 1259         }
 1260         if (basemem == 0 || basemem > 640) {
 1261                 if (bootverbose)
 1262                         printf(
 1263                 "Memory map doesn't contain a basemem segment, faking it");
 1264                 basemem = 640;
 1265         }
 1266 
 1267         /*
 1268          * Maxmem isn't the "maximum memory", it's one larger than the
 1269          * highest page of the physical address space.  It should be
 1270          * called something like "Maxphyspage".  We may adjust this
 1271          * based on ``hw.physmem'' and the results of the memory test.
 1272          */
 1273         Maxmem = atop(physmap[physmap_idx + 1]);
 1274 
 1275 #ifdef MAXMEM
 1276         Maxmem = MAXMEM / 4;
 1277 #endif
 1278 
 1279         if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 1280                 Maxmem = atop(physmem_tunable);
 1281 
 1282         /*
 1283          * The boot memory test is disabled by default, as it takes a
 1284          * significant amount of time on large-memory systems, and is
 1285          * unfriendly to virtual machines as it unnecessarily touches all
 1286          * pages.
 1287          *
 1288          * A general name is used as the code may be extended to support
 1289          * additional tests beyond the current "page present" test.
 1290          */
 1291         memtest = 0;
 1292         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 1293 
 1294         /*
 1295          * Don't allow MAXMEM or hw.physmem to extend the amount of memory
 1296          * in the system.
 1297          */
 1298         if (Maxmem > atop(physmap[physmap_idx + 1]))
 1299                 Maxmem = atop(physmap[physmap_idx + 1]);
 1300 
 1301         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1302             (boothowto & RB_VERBOSE))
 1303                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1304 
 1305         /*
 1306          * Make hole for "AP -> long mode" bootstrap code.  The
 1307          * mp_bootaddress vector is only available when the kernel
 1308          * is configured to support APs and APs for the system start
 1309          * in real mode mode (e.g. SMP bare metal).
 1310          */
 1311         if (init_ops.mp_bootaddress)
 1312                 init_ops.mp_bootaddress(physmap, &physmap_idx);
 1313 
 1314         /* call pmap initialization to make new kernel address space */
 1315         pmap_bootstrap(&first);
 1316 
 1317         /*
 1318          * Size up each available chunk of physical memory.
 1319          *
 1320          * XXX Some BIOSes corrupt low 64KB between suspend and resume.
 1321          * By default, mask off the first 16 pages unless we appear to be
 1322          * running in a VM.
 1323          */
 1324         physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT;
 1325         TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start);
 1326         if (physmap[0] < physmem_start) {
 1327                 if (physmem_start < PAGE_SIZE)
 1328                         physmap[0] = PAGE_SIZE;
 1329                 else if (physmem_start >= physmap[1])
 1330                         physmap[0] = round_page(physmap[1] - PAGE_SIZE);
 1331                 else
 1332                         physmap[0] = round_page(physmem_start);
 1333         }
 1334         pa_indx = 0;
 1335         da_indx = 1;
 1336         phys_avail[pa_indx++] = physmap[0];
 1337         phys_avail[pa_indx] = physmap[0];
 1338         dump_avail[da_indx] = physmap[0];
 1339         pte = CMAP1;
 1340 
 1341         /*
 1342          * Get dcons buffer address
 1343          */
 1344         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 1345             getenv_quad("dcons.size", &dcons_size) == 0)
 1346                 dcons_addr = 0;
 1347 
 1348         /*
 1349          * physmap is in bytes, so when converting to page boundaries,
 1350          * round up the start address and round down the end address.
 1351          */
 1352         page_counter = 0;
 1353         if (memtest != 0)
 1354                 printf("Testing system memory");
 1355         for (i = 0; i <= physmap_idx; i += 2) {
 1356                 vm_paddr_t end;
 1357 
 1358                 end = ptoa((vm_paddr_t)Maxmem);
 1359                 if (physmap[i + 1] < end)
 1360                         end = trunc_page(physmap[i + 1]);
 1361                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1362                         int tmp, page_bad, full;
 1363                         int *ptr = (int *)CADDR1;
 1364 
 1365                         full = FALSE;
 1366                         /*
 1367                          * block out kernel memory as not available.
 1368                          */
 1369                         if (pa >= (vm_paddr_t)kernphys && pa < first)
 1370                                 goto do_dump_avail;
 1371 
 1372                         /*
 1373                          * block out dcons buffer
 1374                          */
 1375                         if (dcons_addr > 0
 1376                             && pa >= trunc_page(dcons_addr)
 1377                             && pa < dcons_addr + dcons_size)
 1378                                 goto do_dump_avail;
 1379 
 1380                         page_bad = FALSE;
 1381                         if (memtest == 0)
 1382                                 goto skip_memtest;
 1383 
 1384                         /*
 1385                          * Print a "." every GB to show we're making
 1386                          * progress.
 1387                          */
 1388                         page_counter++;
 1389                         if ((page_counter % PAGES_PER_GB) == 0)
 1390                                 printf(".");
 1391 
 1392                         /*
 1393                          * map page into kernel: valid, read/write,non-cacheable
 1394                          */
 1395                         *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
 1396                         invltlb();
 1397 
 1398                         tmp = *(int *)ptr;
 1399                         /*
 1400                          * Test for alternating 1's and 0's
 1401                          */
 1402                         *(volatile int *)ptr = 0xaaaaaaaa;
 1403                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 1404                                 page_bad = TRUE;
 1405                         /*
 1406                          * Test for alternating 0's and 1's
 1407                          */
 1408                         *(volatile int *)ptr = 0x55555555;
 1409                         if (*(volatile int *)ptr != 0x55555555)
 1410                                 page_bad = TRUE;
 1411                         /*
 1412                          * Test for all 1's
 1413                          */
 1414                         *(volatile int *)ptr = 0xffffffff;
 1415                         if (*(volatile int *)ptr != 0xffffffff)
 1416                                 page_bad = TRUE;
 1417                         /*
 1418                          * Test for all 0's
 1419                          */
 1420                         *(volatile int *)ptr = 0x0;
 1421                         if (*(volatile int *)ptr != 0x0)
 1422                                 page_bad = TRUE;
 1423                         /*
 1424                          * Restore original value.
 1425                          */
 1426                         *(int *)ptr = tmp;
 1427 
 1428 skip_memtest:
 1429                         /*
 1430                          * Adjust array of valid/good pages.
 1431                          */
 1432                         if (page_bad == TRUE)
 1433                                 continue;
 1434                         /*
 1435                          * If this good page is a continuation of the
 1436                          * previous set of good pages, then just increase
 1437                          * the end pointer. Otherwise start a new chunk.
 1438                          * Note that "end" points one higher than end,
 1439                          * making the range >= start and < end.
 1440                          * If we're also doing a speculative memory
 1441                          * test and we at or past the end, bump up Maxmem
 1442                          * so that we keep going. The first bad page
 1443                          * will terminate the loop.
 1444                          */
 1445                         if (phys_avail[pa_indx] == pa) {
 1446                                 phys_avail[pa_indx] += PAGE_SIZE;
 1447                         } else {
 1448                                 pa_indx++;
 1449                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1450                                         printf(
 1451                 "Too many holes in the physical address space, giving up\n");
 1452                                         pa_indx--;
 1453                                         full = TRUE;
 1454                                         goto do_dump_avail;
 1455                                 }
 1456                                 phys_avail[pa_indx++] = pa;     /* start */
 1457                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 1458                         }
 1459                         physmem++;
 1460 do_dump_avail:
 1461                         if (dump_avail[da_indx] == pa) {
 1462                                 dump_avail[da_indx] += PAGE_SIZE;
 1463                         } else {
 1464                                 da_indx++;
 1465                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 1466                                         da_indx--;
 1467                                         goto do_next;
 1468                                 }
 1469                                 dump_avail[da_indx++] = pa; /* start */
 1470                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 1471                         }
 1472 do_next:
 1473                         if (full)
 1474                                 break;
 1475                 }
 1476         }
 1477         *pte = 0;
 1478         invltlb();
 1479         if (memtest != 0)
 1480                 printf("\n");
 1481 
 1482         /*
 1483          * XXX
 1484          * The last chunk must contain at least one page plus the message
 1485          * buffer to avoid complicating other code (message buffer address
 1486          * calculation, etc.).
 1487          */
 1488         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1489             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 1490                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1491                 phys_avail[pa_indx--] = 0;
 1492                 phys_avail[pa_indx--] = 0;
 1493         }
 1494 
 1495         Maxmem = atop(phys_avail[pa_indx]);
 1496 
 1497         /* Trim off space for the message buffer. */
 1498         phys_avail[pa_indx] -= round_page(msgbufsize);
 1499 
 1500         /* Map the message buffer. */
 1501         msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]);
 1502 }
 1503 
 1504 static caddr_t
 1505 native_parse_preload_data(u_int64_t modulep)
 1506 {
 1507         caddr_t kmdp;
 1508         char *envp;
 1509 #ifdef DDB
 1510         vm_offset_t ksym_start;
 1511         vm_offset_t ksym_end;
 1512 #endif
 1513 
 1514         preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
 1515         preload_bootstrap_relocate(KERNBASE);
 1516         kmdp = preload_search_by_type("elf kernel");
 1517         if (kmdp == NULL)
 1518                 kmdp = preload_search_by_type("elf64 kernel");
 1519         boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 1520         envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 1521         if (envp != NULL)
 1522                 envp += KERNBASE;
 1523         init_static_kenv(envp, 0);
 1524 #ifdef DDB
 1525         ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 1526         ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 1527         db_fetch_ksymtab(ksym_start, ksym_end);
 1528 #endif
 1529         efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
 1530 
 1531         return (kmdp);
 1532 }
 1533 
 1534 static void
 1535 amd64_kdb_init(void)
 1536 {
 1537         kdb_init();
 1538 #ifdef KDB
 1539         if (boothowto & RB_KDB)
 1540                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 1541 #endif
 1542 }
 1543 
 1544 /* Set up the fast syscall stuff */
 1545 void
 1546 amd64_conf_fast_syscall(void)
 1547 {
 1548         uint64_t msr;
 1549 
 1550         msr = rdmsr(MSR_EFER) | EFER_SCE;
 1551         wrmsr(MSR_EFER, msr);
 1552         wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) :
 1553             (u_int64_t)IDTVEC(fast_syscall));
 1554         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 1555         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 1556             ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 1557         wrmsr(MSR_STAR, msr);
 1558         wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
 1559 }
 1560 
 1561 u_int64_t
 1562 hammer_time(u_int64_t modulep, u_int64_t physfree)
 1563 {
 1564         caddr_t kmdp;
 1565         int gsel_tss, x;
 1566         struct pcpu *pc;
 1567         struct nmi_pcpu *np;
 1568         struct xstate_hdr *xhdr;
 1569         u_int64_t rsp0;
 1570         char *env;
 1571         size_t kstack0_sz;
 1572         int late_console;
 1573 
 1574         TSRAW(&thread0, TS_ENTER, __func__, NULL);
 1575 
 1576         kmdp = init_ops.parse_preload_data(modulep);
 1577 
 1578         physfree += ucode_load_bsp(physfree + KERNBASE);
 1579         physfree = roundup2(physfree, PAGE_SIZE);
 1580 
 1581         identify_cpu1();
 1582         identify_hypervisor();
 1583         identify_cpu_fixup_bsp();
 1584         identify_cpu2();
 1585         initializecpucache();
 1586 
 1587         /*
 1588          * Check for pti, pcid, and invpcid before ifuncs are
 1589          * resolved, to correctly select the implementation for
 1590          * pmap_activate_sw_mode().
 1591          */
 1592         pti = pti_get_default();
 1593         TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
 1594         TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 1595         if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
 1596                 invpcid_works = (cpu_stdext_feature &
 1597                     CPUID_STDEXT_INVPCID) != 0;
 1598         } else {
 1599                 pmap_pcid_enabled = 0;
 1600         }
 1601 
 1602         link_elf_ireloc(kmdp);
 1603 
 1604         /*
 1605          * This may be done better later if it gets more high level
 1606          * components in it. If so just link td->td_proc here.
 1607          */
 1608         proc_linkup0(&proc0, &thread0);
 1609 
 1610         /* Init basic tunables, hz etc */
 1611         init_param1();
 1612 
 1613         thread0.td_kstack = physfree + KERNBASE;
 1614         thread0.td_kstack_pages = kstack_pages;
 1615         kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 1616         bzero((void *)thread0.td_kstack, kstack0_sz);
 1617         physfree += kstack0_sz;
 1618 
 1619         /*
 1620          * make gdt memory segments
 1621          */
 1622         for (x = 0; x < NGDT; x++) {
 1623                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 1624                     x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
 1625                         ssdtosd(&gdt_segs[x], &gdt[x]);
 1626         }
 1627         gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
 1628         ssdtosyssd(&gdt_segs[GPROC0_SEL],
 1629             (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1630 
 1631         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 1632         r_gdt.rd_base =  (long) gdt;
 1633         lgdt(&r_gdt);
 1634         pc = &__pcpu[0];
 1635 
 1636         wrmsr(MSR_FSBASE, 0);           /* User value */
 1637         wrmsr(MSR_GSBASE, (u_int64_t)pc);
 1638         wrmsr(MSR_KGSBASE, 0);          /* User value while in the kernel */
 1639 
 1640         pcpu_init(pc, 0, sizeof(struct pcpu));
 1641         dpcpu_init((void *)(physfree + KERNBASE), 0);
 1642         physfree += DPCPU_SIZE;
 1643         PCPU_SET(prvspace, pc);
 1644         PCPU_SET(curthread, &thread0);
 1645         /* Non-late cninit() and printf() can be moved up to here. */
 1646         PCPU_SET(tssp, &common_tss[0]);
 1647         PCPU_SET(commontssp, &common_tss[0]);
 1648         PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1649         PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
 1650         PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
 1651         PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
 1652 
 1653         /*
 1654          * Initialize mutexes.
 1655          *
 1656          * icu_lock: in order to allow an interrupt to occur in a critical
 1657          *           section, to set pcpu->ipending (etc...) properly, we
 1658          *           must be able to get the icu lock, so it can't be
 1659          *           under witness.
 1660          */
 1661         mutex_init();
 1662         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 1663         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
 1664 
 1665         /* exceptions */
 1666         for (x = 0; x < NIDT; x++)
 1667                 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
 1668                     SEL_KPL, 0);
 1669         setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
 1670             SEL_KPL, 0);
 1671         setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
 1672         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
 1673         setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
 1674             SEL_UPL, 0);
 1675         setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT,
 1676             SEL_UPL, 0);
 1677         setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT,
 1678             SEL_KPL, 0);
 1679         setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT,
 1680             SEL_KPL, 0);
 1681         setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT,
 1682             SEL_KPL, 0);
 1683         setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
 1684         setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm),
 1685             SDT_SYSIGT, SEL_KPL, 0);
 1686         setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT,
 1687             SEL_KPL, 0);
 1688         setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing),
 1689             SDT_SYSIGT, SEL_KPL, 0);
 1690         setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT,
 1691             SEL_KPL, 0);
 1692         setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT,
 1693             SEL_KPL, 0);
 1694         setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT,
 1695             SEL_KPL, 0);
 1696         setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT,
 1697             SEL_KPL, 0);
 1698         setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT,
 1699             SEL_KPL, 0);
 1700         setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3);
 1701         setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT,
 1702             SEL_KPL, 0);
 1703 #ifdef KDTRACE_HOOKS
 1704         setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) :
 1705             &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
 1706 #endif
 1707 #ifdef XENHVM
 1708         setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) :
 1709             &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0);
 1710 #endif
 1711         r_idt.rd_limit = sizeof(idt0) - 1;
 1712         r_idt.rd_base = (long) idt;
 1713         lidt(&r_idt);
 1714 
 1715         /*
 1716          * Initialize the clock before the console so that console
 1717          * initialization can use DELAY().
 1718          */
 1719         clock_init();
 1720 
 1721         /*
 1722          * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4)
 1723          * transition).
 1724          * Once bootblocks have updated, we can test directly for
 1725          * efi_systbl != NULL here...
 1726          */
 1727         if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP)
 1728             != NULL)
 1729                 vty_set_preferred(VTY_VT);
 1730 
 1731         TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable);
 1732         TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable);
 1733         TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush",
 1734             &syscall_ret_l1d_flush_mode);
 1735 
 1736         finishidentcpu();       /* Final stage of CPU initialization */
 1737         initializecpu();        /* Initialize CPU registers */
 1738 
 1739         /* doublefault stack space, runs on ist1 */
 1740         common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
 1741 
 1742         /*
 1743          * NMI stack, runs on ist2.  The pcpu pointer is stored just
 1744          * above the start of the ist2 stack.
 1745          */
 1746         np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
 1747         np->np_pcpu = (register_t) pc;
 1748         common_tss[0].tss_ist2 = (long) np;
 1749 
 1750         /*
 1751          * MC# stack, runs on ist3.  The pcpu pointer is stored just
 1752          * above the start of the ist3 stack.
 1753          */
 1754         np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1;
 1755         np->np_pcpu = (register_t) pc;
 1756         common_tss[0].tss_ist3 = (long) np;
 1757 
 1758         /*
 1759          * DB# stack, runs on ist4.
 1760          */
 1761         np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1;
 1762         np->np_pcpu = (register_t) pc;
 1763         common_tss[0].tss_ist4 = (long) np;
 1764         
 1765         /* Set the IO permission bitmap (empty due to tss seg limit) */
 1766         common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE;
 1767 
 1768         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 1769         ltr(gsel_tss);
 1770 
 1771         amd64_conf_fast_syscall();
 1772 
 1773         /*
 1774          * Temporary forge some valid pointer to PCB, for exception
 1775          * handlers.  It is reinitialized properly below after FPU is
 1776          * set up.  Also set up td_critnest to short-cut the page
 1777          * fault handler.
 1778          */
 1779         cpu_max_ext_state_size = sizeof(struct savefpu);
 1780         thread0.td_pcb = get_pcb_td(&thread0);
 1781         thread0.td_critnest = 1;
 1782 
 1783         /*
 1784          * The console and kdb should be initialized even earlier than here,
 1785          * but some console drivers don't work until after getmemsize().
 1786          * Default to late console initialization to support these drivers.
 1787          * This loses mainly printf()s in getmemsize() and early debugging.
 1788          */
 1789         late_console = 1;
 1790         TUNABLE_INT_FETCH("debug.late_console", &late_console);
 1791         if (!late_console) {
 1792                 cninit();
 1793                 amd64_kdb_init();
 1794         }
 1795 
 1796         getmemsize(kmdp, physfree);
 1797         init_param2(physmem);
 1798 
 1799         /* now running on new page tables, configured,and u/iom is accessible */
 1800 
 1801 #ifdef DEV_PCI
 1802         /* This call might adjust phys_avail[]. */
 1803         pci_early_quirks();
 1804 #endif
 1805 
 1806         if (late_console)
 1807                 cninit();
 1808 
 1809 #ifdef DEV_ISA
 1810 #ifdef DEV_ATPIC
 1811         elcr_probe();
 1812         atpic_startup();
 1813 #else
 1814         /* Reset and mask the atpics and leave them shut down. */
 1815         atpic_reset();
 1816 
 1817         /*
 1818          * Point the ICU spurious interrupt vectors at the APIC spurious
 1819          * interrupt handler.
 1820          */
 1821         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1822         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1823 #endif
 1824 #else
 1825 #error "have you forgotten the isa device?";
 1826 #endif
 1827 
 1828         if (late_console)
 1829                 amd64_kdb_init();
 1830 
 1831         msgbufinit(msgbufp, msgbufsize);
 1832         fpuinit();
 1833 
 1834         /*
 1835          * Set up thread0 pcb after fpuinit calculated pcb + fpu save
 1836          * area size.  Zero out the extended state header in fpu save
 1837          * area.
 1838          */
 1839         thread0.td_pcb = get_pcb_td(&thread0);
 1840         thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 1841         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 1842         if (use_xsave) {
 1843                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 1844                     1);
 1845                 xhdr->xstate_bv = xsave_mask;
 1846         }
 1847         /* make an initial tss so cpu can get interrupt stack on syscall! */
 1848         rsp0 = (vm_offset_t)thread0.td_pcb;
 1849         /* Ensure the stack is aligned to 16 bytes */
 1850         rsp0 &= ~0xFul;
 1851         common_tss[0].tss_rsp0 = rsp0;
 1852         PCPU_SET(rsp0, rsp0);
 1853         PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) +
 1854             PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
 1855         PCPU_SET(curpcb, thread0.td_pcb);
 1856 
 1857         /* transfer to user mode */
 1858 
 1859         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 1860         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 1861         _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
 1862         _ufssel = GSEL(GUFS32_SEL, SEL_UPL);
 1863         _ugssel = GSEL(GUGS32_SEL, SEL_UPL);
 1864 
 1865         load_ds(_udatasel);
 1866         load_es(_udatasel);
 1867         load_fs(_ufssel);
 1868 
 1869         /* setup proc 0's pcb */
 1870         thread0.td_pcb->pcb_flags = 0;
 1871         thread0.td_frame = &proc0_tf;
 1872 
 1873         env = kern_getenv("kernelname");
 1874         if (env != NULL)
 1875                 strlcpy(kernelname, env, sizeof(kernelname));
 1876 
 1877         cpu_probe_amdc1e();
 1878 
 1879 #ifdef FDT
 1880         x86_init_fdt();
 1881 #endif
 1882         thread0.td_critnest = 0;
 1883 
 1884         TSEXIT();
 1885 
 1886         /* Location of kernel stack for locore */
 1887         return ((u_int64_t)thread0.td_pcb);
 1888 }
 1889 
 1890 void
 1891 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 1892 {
 1893 
 1894         pcpu->pc_acpi_id = 0xffffffff;
 1895 }
 1896 
 1897 static int
 1898 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1899 {
 1900         struct bios_smap *smapbase;
 1901         struct bios_smap_xattr smap;
 1902         caddr_t kmdp;
 1903         uint32_t *smapattr;
 1904         int count, error, i;
 1905 
 1906         /* Retrieve the system memory map from the loader. */
 1907         kmdp = preload_search_by_type("elf kernel");
 1908         if (kmdp == NULL)
 1909                 kmdp = preload_search_by_type("elf64 kernel");
 1910         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 1911             MODINFO_METADATA | MODINFOMD_SMAP);
 1912         if (smapbase == NULL)
 1913                 return (0);
 1914         smapattr = (uint32_t *)preload_search_info(kmdp,
 1915             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 1916         count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase);
 1917         error = 0;
 1918         for (i = 0; i < count; i++) {
 1919                 smap.base = smapbase[i].base;
 1920                 smap.length = smapbase[i].length;
 1921                 smap.type = smapbase[i].type;
 1922                 if (smapattr != NULL)
 1923                         smap.xattr = smapattr[i];
 1924                 else
 1925                         smap.xattr = 0;
 1926                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 1927         }
 1928         return (error);
 1929 }
 1930 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1931     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 1932 
 1933 static int
 1934 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1935 {
 1936         struct efi_map_header *efihdr;
 1937         caddr_t kmdp;
 1938         uint32_t efisize;
 1939 
 1940         kmdp = preload_search_by_type("elf kernel");
 1941         if (kmdp == NULL)
 1942                 kmdp = preload_search_by_type("elf64 kernel");
 1943         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1944             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1945         if (efihdr == NULL)
 1946                 return (0);
 1947         efisize = *((uint32_t *)efihdr - 1);
 1948         return (SYSCTL_OUT(req, efihdr, efisize));
 1949 }
 1950 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1951     efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map");
 1952 
 1953 void
 1954 spinlock_enter(void)
 1955 {
 1956         struct thread *td;
 1957         register_t flags;
 1958 
 1959         td = curthread;
 1960         if (td->td_md.md_spinlock_count == 0) {
 1961                 flags = intr_disable();
 1962                 td->td_md.md_spinlock_count = 1;
 1963                 td->td_md.md_saved_flags = flags;
 1964                 critical_enter();
 1965         } else
 1966                 td->td_md.md_spinlock_count++;
 1967 }
 1968 
 1969 void
 1970 spinlock_exit(void)
 1971 {
 1972         struct thread *td;
 1973         register_t flags;
 1974 
 1975         td = curthread;
 1976         flags = td->td_md.md_saved_flags;
 1977         td->td_md.md_spinlock_count--;
 1978         if (td->td_md.md_spinlock_count == 0) {
 1979                 critical_exit();
 1980                 intr_restore(flags);
 1981         }
 1982 }
 1983 
 1984 /*
 1985  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 1986  * we want to start a backtrace from the function that caused us to enter
 1987  * the debugger. We have the context in the trapframe, but base the trace
 1988  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 1989  * enough for a backtrace.
 1990  */
 1991 void
 1992 makectx(struct trapframe *tf, struct pcb *pcb)
 1993 {
 1994 
 1995         pcb->pcb_r12 = tf->tf_r12;
 1996         pcb->pcb_r13 = tf->tf_r13;
 1997         pcb->pcb_r14 = tf->tf_r14;
 1998         pcb->pcb_r15 = tf->tf_r15;
 1999         pcb->pcb_rbp = tf->tf_rbp;
 2000         pcb->pcb_rbx = tf->tf_rbx;
 2001         pcb->pcb_rip = tf->tf_rip;
 2002         pcb->pcb_rsp = tf->tf_rsp;
 2003 }
 2004 
 2005 int
 2006 ptrace_set_pc(struct thread *td, unsigned long addr)
 2007 {
 2008 
 2009         td->td_frame->tf_rip = addr;
 2010         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2011         return (0);
 2012 }
 2013 
 2014 int
 2015 ptrace_single_step(struct thread *td)
 2016 {
 2017 
 2018         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2019         if ((td->td_frame->tf_rflags & PSL_T) == 0) {
 2020                 td->td_frame->tf_rflags |= PSL_T;
 2021                 td->td_dbgflags |= TDB_STEP;
 2022         }
 2023         return (0);
 2024 }
 2025 
 2026 int
 2027 ptrace_clear_single_step(struct thread *td)
 2028 {
 2029 
 2030         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2031         td->td_frame->tf_rflags &= ~PSL_T;
 2032         td->td_dbgflags &= ~TDB_STEP;
 2033         return (0);
 2034 }
 2035 
 2036 int
 2037 fill_regs(struct thread *td, struct reg *regs)
 2038 {
 2039         struct trapframe *tp;
 2040 
 2041         tp = td->td_frame;
 2042         return (fill_frame_regs(tp, regs));
 2043 }
 2044 
 2045 int
 2046 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2047 {
 2048 
 2049         regs->r_r15 = tp->tf_r15;
 2050         regs->r_r14 = tp->tf_r14;
 2051         regs->r_r13 = tp->tf_r13;
 2052         regs->r_r12 = tp->tf_r12;
 2053         regs->r_r11 = tp->tf_r11;
 2054         regs->r_r10 = tp->tf_r10;
 2055         regs->r_r9  = tp->tf_r9;
 2056         regs->r_r8  = tp->tf_r8;
 2057         regs->r_rdi = tp->tf_rdi;
 2058         regs->r_rsi = tp->tf_rsi;
 2059         regs->r_rbp = tp->tf_rbp;
 2060         regs->r_rbx = tp->tf_rbx;
 2061         regs->r_rdx = tp->tf_rdx;
 2062         regs->r_rcx = tp->tf_rcx;
 2063         regs->r_rax = tp->tf_rax;
 2064         regs->r_rip = tp->tf_rip;
 2065         regs->r_cs = tp->tf_cs;
 2066         regs->r_rflags = tp->tf_rflags;
 2067         regs->r_rsp = tp->tf_rsp;
 2068         regs->r_ss = tp->tf_ss;
 2069         if (tp->tf_flags & TF_HASSEGS) {
 2070                 regs->r_ds = tp->tf_ds;
 2071                 regs->r_es = tp->tf_es;
 2072                 regs->r_fs = tp->tf_fs;
 2073                 regs->r_gs = tp->tf_gs;
 2074         } else {
 2075                 regs->r_ds = 0;
 2076                 regs->r_es = 0;
 2077                 regs->r_fs = 0;
 2078                 regs->r_gs = 0;
 2079         }
 2080         regs->r_err = 0;
 2081         regs->r_trapno = 0;
 2082         return (0);
 2083 }
 2084 
 2085 int
 2086 set_regs(struct thread *td, struct reg *regs)
 2087 {
 2088         struct trapframe *tp;
 2089         register_t rflags;
 2090 
 2091         tp = td->td_frame;
 2092         rflags = regs->r_rflags & 0xffffffff;
 2093         if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 2094                 return (EINVAL);
 2095         tp->tf_r15 = regs->r_r15;
 2096         tp->tf_r14 = regs->r_r14;
 2097         tp->tf_r13 = regs->r_r13;
 2098         tp->tf_r12 = regs->r_r12;
 2099         tp->tf_r11 = regs->r_r11;
 2100         tp->tf_r10 = regs->r_r10;
 2101         tp->tf_r9  = regs->r_r9;
 2102         tp->tf_r8  = regs->r_r8;
 2103         tp->tf_rdi = regs->r_rdi;
 2104         tp->tf_rsi = regs->r_rsi;
 2105         tp->tf_rbp = regs->r_rbp;
 2106         tp->tf_rbx = regs->r_rbx;
 2107         tp->tf_rdx = regs->r_rdx;
 2108         tp->tf_rcx = regs->r_rcx;
 2109         tp->tf_rax = regs->r_rax;
 2110         tp->tf_rip = regs->r_rip;
 2111         tp->tf_cs = regs->r_cs;
 2112         tp->tf_rflags = rflags;
 2113         tp->tf_rsp = regs->r_rsp;
 2114         tp->tf_ss = regs->r_ss;
 2115         if (0) {        /* XXXKIB */
 2116                 tp->tf_ds = regs->r_ds;
 2117                 tp->tf_es = regs->r_es;
 2118                 tp->tf_fs = regs->r_fs;
 2119                 tp->tf_gs = regs->r_gs;
 2120                 tp->tf_flags = TF_HASSEGS;
 2121         }
 2122         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2123         return (0);
 2124 }
 2125 
 2126 /* XXX check all this stuff! */
 2127 /* externalize from sv_xmm */
 2128 static void
 2129 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 2130 {
 2131         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2132         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2133         int i;
 2134 
 2135         /* pcb -> fpregs */
 2136         bzero(fpregs, sizeof(*fpregs));
 2137 
 2138         /* FPU control/status */
 2139         penv_fpreg->en_cw = penv_xmm->en_cw;
 2140         penv_fpreg->en_sw = penv_xmm->en_sw;
 2141         penv_fpreg->en_tw = penv_xmm->en_tw;
 2142         penv_fpreg->en_opcode = penv_xmm->en_opcode;
 2143         penv_fpreg->en_rip = penv_xmm->en_rip;
 2144         penv_fpreg->en_rdp = penv_xmm->en_rdp;
 2145         penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 2146         penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 2147 
 2148         /* FPU registers */
 2149         for (i = 0; i < 8; ++i)
 2150                 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 2151 
 2152         /* SSE registers */
 2153         for (i = 0; i < 16; ++i)
 2154                 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 2155 }
 2156 
 2157 /* internalize from fpregs into sv_xmm */
 2158 static void
 2159 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 2160 {
 2161         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2162         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2163         int i;
 2164 
 2165         /* fpregs -> pcb */
 2166         /* FPU control/status */
 2167         penv_xmm->en_cw = penv_fpreg->en_cw;
 2168         penv_xmm->en_sw = penv_fpreg->en_sw;
 2169         penv_xmm->en_tw = penv_fpreg->en_tw;
 2170         penv_xmm->en_opcode = penv_fpreg->en_opcode;
 2171         penv_xmm->en_rip = penv_fpreg->en_rip;
 2172         penv_xmm->en_rdp = penv_fpreg->en_rdp;
 2173         penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 2174         penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 2175 
 2176         /* FPU registers */
 2177         for (i = 0; i < 8; ++i)
 2178                 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 2179 
 2180         /* SSE registers */
 2181         for (i = 0; i < 16; ++i)
 2182                 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 2183 }
 2184 
 2185 /* externalize from td->pcb */
 2186 int
 2187 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2188 {
 2189 
 2190         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2191             P_SHOULDSTOP(td->td_proc),
 2192             ("not suspended thread %p", td));
 2193         fpugetregs(td);
 2194         fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 2195         return (0);
 2196 }
 2197 
 2198 /* internalize to td->pcb */
 2199 int
 2200 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2201 {
 2202 
 2203         critical_enter();
 2204         set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 2205         fpuuserinited(td);
 2206         critical_exit();
 2207         return (0);
 2208 }
 2209 
 2210 /*
 2211  * Get machine context.
 2212  */
 2213 int
 2214 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2215 {
 2216         struct pcb *pcb;
 2217         struct trapframe *tp;
 2218 
 2219         pcb = td->td_pcb;
 2220         tp = td->td_frame;
 2221         PROC_LOCK(curthread->td_proc);
 2222         mcp->mc_onstack = sigonstack(tp->tf_rsp);
 2223         PROC_UNLOCK(curthread->td_proc);
 2224         mcp->mc_r15 = tp->tf_r15;
 2225         mcp->mc_r14 = tp->tf_r14;
 2226         mcp->mc_r13 = tp->tf_r13;
 2227         mcp->mc_r12 = tp->tf_r12;
 2228         mcp->mc_r11 = tp->tf_r11;
 2229         mcp->mc_r10 = tp->tf_r10;
 2230         mcp->mc_r9  = tp->tf_r9;
 2231         mcp->mc_r8  = tp->tf_r8;
 2232         mcp->mc_rdi = tp->tf_rdi;
 2233         mcp->mc_rsi = tp->tf_rsi;
 2234         mcp->mc_rbp = tp->tf_rbp;
 2235         mcp->mc_rbx = tp->tf_rbx;
 2236         mcp->mc_rcx = tp->tf_rcx;
 2237         mcp->mc_rflags = tp->tf_rflags;
 2238         if (flags & GET_MC_CLEAR_RET) {
 2239                 mcp->mc_rax = 0;
 2240                 mcp->mc_rdx = 0;
 2241                 mcp->mc_rflags &= ~PSL_C;
 2242         } else {
 2243                 mcp->mc_rax = tp->tf_rax;
 2244                 mcp->mc_rdx = tp->tf_rdx;
 2245         }
 2246         mcp->mc_rip = tp->tf_rip;
 2247         mcp->mc_cs = tp->tf_cs;
 2248         mcp->mc_rsp = tp->tf_rsp;
 2249         mcp->mc_ss = tp->tf_ss;
 2250         mcp->mc_ds = tp->tf_ds;
 2251         mcp->mc_es = tp->tf_es;
 2252         mcp->mc_fs = tp->tf_fs;
 2253         mcp->mc_gs = tp->tf_gs;
 2254         mcp->mc_flags = tp->tf_flags;
 2255         mcp->mc_len = sizeof(*mcp);
 2256         get_fpcontext(td, mcp, NULL, 0);
 2257         update_pcb_bases(pcb);
 2258         mcp->mc_fsbase = pcb->pcb_fsbase;
 2259         mcp->mc_gsbase = pcb->pcb_gsbase;
 2260         mcp->mc_xfpustate = 0;
 2261         mcp->mc_xfpustate_len = 0;
 2262         bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 2263         return (0);
 2264 }
 2265 
 2266 /*
 2267  * Set machine context.
 2268  *
 2269  * However, we don't set any but the user modifiable flags, and we won't
 2270  * touch the cs selector.
 2271  */
 2272 int
 2273 set_mcontext(struct thread *td, mcontext_t *mcp)
 2274 {
 2275         struct pcb *pcb;
 2276         struct trapframe *tp;
 2277         char *xfpustate;
 2278         long rflags;
 2279         int ret;
 2280 
 2281         pcb = td->td_pcb;
 2282         tp = td->td_frame;
 2283         if (mcp->mc_len != sizeof(*mcp) ||
 2284             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 2285                 return (EINVAL);
 2286         rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 2287             (tp->tf_rflags & ~PSL_USERCHANGE);
 2288         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 2289                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 2290                     sizeof(struct savefpu))
 2291                         return (EINVAL);
 2292                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 2293                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 2294                     mcp->mc_xfpustate_len);
 2295                 if (ret != 0)
 2296                         return (ret);
 2297         } else
 2298                 xfpustate = NULL;
 2299         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 2300         if (ret != 0)
 2301                 return (ret);
 2302         tp->tf_r15 = mcp->mc_r15;
 2303         tp->tf_r14 = mcp->mc_r14;
 2304         tp->tf_r13 = mcp->mc_r13;
 2305         tp->tf_r12 = mcp->mc_r12;
 2306         tp->tf_r11 = mcp->mc_r11;
 2307         tp->tf_r10 = mcp->mc_r10;
 2308         tp->tf_r9  = mcp->mc_r9;
 2309         tp->tf_r8  = mcp->mc_r8;
 2310         tp->tf_rdi = mcp->mc_rdi;
 2311         tp->tf_rsi = mcp->mc_rsi;
 2312         tp->tf_rbp = mcp->mc_rbp;
 2313         tp->tf_rbx = mcp->mc_rbx;
 2314         tp->tf_rdx = mcp->mc_rdx;
 2315         tp->tf_rcx = mcp->mc_rcx;
 2316         tp->tf_rax = mcp->mc_rax;
 2317         tp->tf_rip = mcp->mc_rip;
 2318         tp->tf_rflags = rflags;
 2319         tp->tf_rsp = mcp->mc_rsp;
 2320         tp->tf_ss = mcp->mc_ss;
 2321         tp->tf_flags = mcp->mc_flags;
 2322         if (tp->tf_flags & TF_HASSEGS) {
 2323                 tp->tf_ds = mcp->mc_ds;
 2324                 tp->tf_es = mcp->mc_es;
 2325                 tp->tf_fs = mcp->mc_fs;
 2326                 tp->tf_gs = mcp->mc_gs;
 2327         }
 2328         set_pcb_flags(pcb, PCB_FULL_IRET);
 2329         if (mcp->mc_flags & _MC_HASBASES) {
 2330                 pcb->pcb_fsbase = mcp->mc_fsbase;
 2331                 pcb->pcb_gsbase = mcp->mc_gsbase;
 2332         }
 2333         return (0);
 2334 }
 2335 
 2336 static void
 2337 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 2338     size_t xfpusave_len)
 2339 {
 2340         size_t max_len, len;
 2341 
 2342         mcp->mc_ownedfp = fpugetregs(td);
 2343         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 2344             sizeof(mcp->mc_fpstate));
 2345         mcp->mc_fpformat = fpuformat();
 2346         if (!use_xsave || xfpusave_len == 0)
 2347                 return;
 2348         max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 2349         len = xfpusave_len;
 2350         if (len > max_len) {
 2351                 len = max_len;
 2352                 bzero(xfpusave + max_len, len - max_len);
 2353         }
 2354         mcp->mc_flags |= _MC_HASFPXSTATE;
 2355         mcp->mc_xfpustate_len = len;
 2356         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 2357 }
 2358 
 2359 static int
 2360 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 2361     size_t xfpustate_len)
 2362 {
 2363         int error;
 2364 
 2365         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 2366                 return (0);
 2367         else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 2368                 return (EINVAL);
 2369         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 2370                 /* We don't care what state is left in the FPU or PCB. */
 2371                 fpstate_drop(td);
 2372                 error = 0;
 2373         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 2374             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 2375                 error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
 2376                     xfpustate, xfpustate_len);
 2377         } else
 2378                 return (EINVAL);
 2379         return (error);
 2380 }
 2381 
 2382 void
 2383 fpstate_drop(struct thread *td)
 2384 {
 2385 
 2386         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 2387         critical_enter();
 2388         if (PCPU_GET(fpcurthread) == td)
 2389                 fpudrop();
 2390         /*
 2391          * XXX force a full drop of the fpu.  The above only drops it if we
 2392          * owned it.
 2393          *
 2394          * XXX I don't much like fpugetuserregs()'s semantics of doing a full
 2395          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 2396          * We only need to drop to !PCB_INITDONE in sendsig().  But
 2397          * sendsig() is the only caller of fpugetuserregs()... perhaps we just
 2398          * have too many layers.
 2399          */
 2400         clear_pcb_flags(curthread->td_pcb,
 2401             PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 2402         critical_exit();
 2403 }
 2404 
 2405 int
 2406 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 2407 {
 2408         struct pcb *pcb;
 2409 
 2410         if (td == NULL) {
 2411                 dbregs->dr[0] = rdr0();
 2412                 dbregs->dr[1] = rdr1();
 2413                 dbregs->dr[2] = rdr2();
 2414                 dbregs->dr[3] = rdr3();
 2415                 dbregs->dr[6] = rdr6();
 2416                 dbregs->dr[7] = rdr7();
 2417         } else {
 2418                 pcb = td->td_pcb;
 2419                 dbregs->dr[0] = pcb->pcb_dr0;
 2420                 dbregs->dr[1] = pcb->pcb_dr1;
 2421                 dbregs->dr[2] = pcb->pcb_dr2;
 2422                 dbregs->dr[3] = pcb->pcb_dr3;
 2423                 dbregs->dr[6] = pcb->pcb_dr6;
 2424                 dbregs->dr[7] = pcb->pcb_dr7;
 2425         }
 2426         dbregs->dr[4] = 0;
 2427         dbregs->dr[5] = 0;
 2428         dbregs->dr[8] = 0;
 2429         dbregs->dr[9] = 0;
 2430         dbregs->dr[10] = 0;
 2431         dbregs->dr[11] = 0;
 2432         dbregs->dr[12] = 0;
 2433         dbregs->dr[13] = 0;
 2434         dbregs->dr[14] = 0;
 2435         dbregs->dr[15] = 0;
 2436         return (0);
 2437 }
 2438 
 2439 int
 2440 set_dbregs(struct thread *td, struct dbreg *dbregs)
 2441 {
 2442         struct pcb *pcb;
 2443         int i;
 2444 
 2445         if (td == NULL) {
 2446                 load_dr0(dbregs->dr[0]);
 2447                 load_dr1(dbregs->dr[1]);
 2448                 load_dr2(dbregs->dr[2]);
 2449                 load_dr3(dbregs->dr[3]);
 2450                 load_dr6(dbregs->dr[6]);
 2451                 load_dr7(dbregs->dr[7]);
 2452         } else {
 2453                 /*
 2454                  * Don't let an illegal value for dr7 get set.  Specifically,
 2455                  * check for undefined settings.  Setting these bit patterns
 2456                  * result in undefined behaviour and can lead to an unexpected
 2457                  * TRCTRAP or a general protection fault right here.
 2458                  * Upper bits of dr6 and dr7 must not be set
 2459                  */
 2460                 for (i = 0; i < 4; i++) {
 2461                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 2462                                 return (EINVAL);
 2463                         if (td->td_frame->tf_cs == _ucode32sel &&
 2464                             DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
 2465                                 return (EINVAL);
 2466                 }
 2467                 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 2468                     (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 2469                         return (EINVAL);
 2470 
 2471                 pcb = td->td_pcb;
 2472 
 2473                 /*
 2474                  * Don't let a process set a breakpoint that is not within the
 2475                  * process's address space.  If a process could do this, it
 2476                  * could halt the system by setting a breakpoint in the kernel
 2477                  * (if ddb was enabled).  Thus, we need to check to make sure
 2478                  * that no breakpoints are being enabled for addresses outside
 2479                  * process's address space.
 2480                  *
 2481                  * XXX - what about when the watched area of the user's
 2482                  * address space is written into from within the kernel
 2483                  * ... wouldn't that still cause a breakpoint to be generated
 2484                  * from within kernel mode?
 2485                  */
 2486 
 2487                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 2488                         /* dr0 is enabled */
 2489                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 2490                                 return (EINVAL);
 2491                 }
 2492                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 2493                         /* dr1 is enabled */
 2494                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 2495                                 return (EINVAL);
 2496                 }
 2497                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 2498                         /* dr2 is enabled */
 2499                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 2500                                 return (EINVAL);
 2501                 }
 2502                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 2503                         /* dr3 is enabled */
 2504                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 2505                                 return (EINVAL);
 2506                 }
 2507 
 2508                 pcb->pcb_dr0 = dbregs->dr[0];
 2509                 pcb->pcb_dr1 = dbregs->dr[1];
 2510                 pcb->pcb_dr2 = dbregs->dr[2];
 2511                 pcb->pcb_dr3 = dbregs->dr[3];
 2512                 pcb->pcb_dr6 = dbregs->dr[6];
 2513                 pcb->pcb_dr7 = dbregs->dr[7];
 2514 
 2515                 set_pcb_flags(pcb, PCB_DBREGS);
 2516         }
 2517 
 2518         return (0);
 2519 }
 2520 
 2521 void
 2522 reset_dbregs(void)
 2523 {
 2524 
 2525         load_dr7(0);    /* Turn off the control bits first */
 2526         load_dr0(0);
 2527         load_dr1(0);
 2528         load_dr2(0);
 2529         load_dr3(0);
 2530         load_dr6(0);
 2531 }
 2532 
 2533 /*
 2534  * Return > 0 if a hardware breakpoint has been hit, and the
 2535  * breakpoint was in user space.  Return 0, otherwise.
 2536  */
 2537 int
 2538 user_dbreg_trap(register_t dr6)
 2539 {
 2540         u_int64_t dr7;
 2541         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
 2542         int nbp;            /* number of breakpoints that triggered */
 2543         caddr_t addr[4];    /* breakpoint addresses */
 2544         int i;
 2545 
 2546         bp = dr6 & DBREG_DR6_BMASK;
 2547         if (bp == 0) {
 2548                 /*
 2549                  * None of the breakpoint bits are set meaning this
 2550                  * trap was not caused by any of the debug registers
 2551                  */
 2552                 return 0;
 2553         }
 2554 
 2555         dr7 = rdr7();
 2556         if ((dr7 & 0x000000ff) == 0) {
 2557                 /*
 2558                  * all GE and LE bits in the dr7 register are zero,
 2559                  * thus the trap couldn't have been caused by the
 2560                  * hardware debug registers
 2561                  */
 2562                 return 0;
 2563         }
 2564 
 2565         nbp = 0;
 2566 
 2567         /*
 2568          * at least one of the breakpoints were hit, check to see
 2569          * which ones and if any of them are user space addresses
 2570          */
 2571 
 2572         if (bp & 0x01) {
 2573                 addr[nbp++] = (caddr_t)rdr0();
 2574         }
 2575         if (bp & 0x02) {
 2576                 addr[nbp++] = (caddr_t)rdr1();
 2577         }
 2578         if (bp & 0x04) {
 2579                 addr[nbp++] = (caddr_t)rdr2();
 2580         }
 2581         if (bp & 0x08) {
 2582                 addr[nbp++] = (caddr_t)rdr3();
 2583         }
 2584 
 2585         for (i = 0; i < nbp; i++) {
 2586                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 2587                         /*
 2588                          * addr[i] is in user space
 2589                          */
 2590                         return nbp;
 2591                 }
 2592         }
 2593 
 2594         /*
 2595          * None of the breakpoints are in user space.
 2596          */
 2597         return 0;
 2598 }
 2599 
 2600 /*
 2601  * The pcb_flags is only modified by current thread, or by other threads
 2602  * when current thread is stopped.  However, current thread may change it
 2603  * from the interrupt context in cpu_switch(), or in the trap handler.
 2604  * When we read-modify-write pcb_flags from C sources, compiler may generate
 2605  * code that is not atomic regarding the interrupt handler.  If a trap or
 2606  * interrupt happens and any flag is modified from the handler, it can be
 2607  * clobbered with the cached value later.  Therefore, we implement setting
 2608  * and clearing flags with single-instruction functions, which do not race
 2609  * with possible modification of the flags from the trap or interrupt context,
 2610  * because traps and interrupts are executed only on instruction boundary.
 2611  */
 2612 void
 2613 set_pcb_flags_raw(struct pcb *pcb, const u_int flags)
 2614 {
 2615 
 2616         __asm __volatile("orl %1,%0"
 2617             : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
 2618             : "cc", "memory");
 2619 
 2620 }
 2621 
 2622 /*
 2623  * The support for RDFSBASE, WRFSBASE and similar instructions for %gs
 2624  * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into
 2625  * pcb if user space modified the bases.  We must save on the context
 2626  * switch or if the return to usermode happens through the doreti.
 2627  *
 2628  * Tracking of both events is performed by the pcb flag PCB_FULL_IRET,
 2629  * which have a consequence that the base MSRs must be saved each time
 2630  * the PCB_FULL_IRET flag is set.  We disable interrupts to sync with
 2631  * context switches.
 2632  */
 2633 static void
 2634 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags)
 2635 {
 2636         register_t r;
 2637 
 2638         if (curpcb == pcb &&
 2639             (flags & PCB_FULL_IRET) != 0 &&
 2640             (pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2641                 r = intr_disable();
 2642                 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2643                         if (rfs() == _ufssel)
 2644                                 pcb->pcb_fsbase = rdfsbase();
 2645                         if (rgs() == _ugssel)
 2646                                 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE);
 2647                 }
 2648                 set_pcb_flags_raw(pcb, flags);
 2649                 intr_restore(r);
 2650         } else {
 2651                 set_pcb_flags_raw(pcb, flags);
 2652         }
 2653 }
 2654 
 2655 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int), static)
 2656 {
 2657 
 2658         return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ?
 2659             set_pcb_flags_fsgsbase : set_pcb_flags_raw);
 2660 }
 2661 
 2662 void
 2663 clear_pcb_flags(struct pcb *pcb, const u_int flags)
 2664 {
 2665 
 2666         __asm __volatile("andl %1,%0"
 2667             : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
 2668             : "cc", "memory");
 2669 }
 2670 
 2671 #ifdef KDB
 2672 
 2673 /*
 2674  * Provide inb() and outb() as functions.  They are normally only available as
 2675  * inline functions, thus cannot be called from the debugger.
 2676  */
 2677 
 2678 /* silence compiler warnings */
 2679 u_char inb_(u_short);
 2680 void outb_(u_short, u_char);
 2681 
 2682 u_char
 2683 inb_(u_short port)
 2684 {
 2685         return inb(port);
 2686 }
 2687 
 2688 void
 2689 outb_(u_short port, u_char data)
 2690 {
 2691         outb(port, data);
 2692 }
 2693 
 2694 #endif /* KDB */
 2695 
 2696 #undef memset
 2697 #undef memmove
 2698 #undef memcpy
 2699 
 2700 void    *memset_std(void *buf, int c, size_t len);
 2701 void    *memset_erms(void *buf, int c, size_t len);
 2702 DEFINE_IFUNC(, void *, memset, (void *, int, size_t), static)
 2703 {
 2704 
 2705         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2706             memset_erms : memset_std);
 2707 }
 2708 
 2709 void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
 2710             size_t len);
 2711 void    *memmove_erms(void * _Nonnull dst, const void * _Nonnull src,
 2712             size_t len);
 2713 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
 2714     size_t), static)
 2715 {
 2716 
 2717         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2718             memmove_erms : memmove_std);
 2719 }
 2720 
 2721 void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
 2722             size_t len);
 2723 void    *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src,
 2724             size_t len);
 2725 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t),
 2726     static)
 2727 {
 2728 
 2729         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2730             memcpy_erms : memcpy_std);
 2731 }
 2732 
 2733 void    pagezero_std(void *addr);
 2734 void    pagezero_erms(void *addr);
 2735 DEFINE_IFUNC(, void , pagezero, (void *), static)
 2736 {
 2737 
 2738         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2739             pagezero_erms : pagezero_std);
 2740 }

Cache object: d2a7034d5824baaf8c285f19ca662e32


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.