The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/machdep.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 2003 Peter Wemm.
    5  * Copyright (c) 1992 Terrence R. Lambert.
    6  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * William Jolitz.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by the University of
   23  *      California, Berkeley and its contributors.
   24  * 4. Neither the name of the University nor the names of its contributors
   25  *    may be used to endorse or promote products derived from this software
   26  *    without specific prior written permission.
   27  *
   28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   38  * SUCH DAMAGE.
   39  *
   40  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
   41  */
   42 
   43 #include <sys/cdefs.h>
   44 __FBSDID("$FreeBSD: stable/12/sys/amd64/amd64/machdep.c 354856 2019-11-19 15:32:15Z kib $");
   45 
   46 #include "opt_atpic.h"
   47 #include "opt_cpu.h"
   48 #include "opt_ddb.h"
   49 #include "opt_inet.h"
   50 #include "opt_isa.h"
   51 #include "opt_kstack_pages.h"
   52 #include "opt_maxmem.h"
   53 #include "opt_mp_watchdog.h"
   54 #include "opt_pci.h"
   55 #include "opt_platform.h"
   56 #include "opt_sched.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/proc.h>
   60 #include <sys/systm.h>
   61 #include <sys/bio.h>
   62 #include <sys/buf.h>
   63 #include <sys/bus.h>
   64 #include <sys/callout.h>
   65 #include <sys/cons.h>
   66 #include <sys/cpu.h>
   67 #include <sys/efi.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/exec.h>
   70 #include <sys/imgact.h>
   71 #include <sys/kdb.h>
   72 #include <sys/kernel.h>
   73 #include <sys/ktr.h>
   74 #include <sys/linker.h>
   75 #include <sys/lock.h>
   76 #include <sys/malloc.h>
   77 #include <sys/memrange.h>
   78 #include <sys/msgbuf.h>
   79 #include <sys/mutex.h>
   80 #include <sys/pcpu.h>
   81 #include <sys/ptrace.h>
   82 #include <sys/reboot.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/sched.h>
   85 #include <sys/signalvar.h>
   86 #ifdef SMP
   87 #include <sys/smp.h>
   88 #endif
   89 #include <sys/syscallsubr.h>
   90 #include <sys/sysctl.h>
   91 #include <sys/sysent.h>
   92 #include <sys/sysproto.h>
   93 #include <sys/ucontext.h>
   94 #include <sys/vmmeter.h>
   95 
   96 #include <vm/vm.h>
   97 #include <vm/vm_extern.h>
   98 #include <vm/vm_kern.h>
   99 #include <vm/vm_page.h>
  100 #include <vm/vm_map.h>
  101 #include <vm/vm_object.h>
  102 #include <vm/vm_pager.h>
  103 #include <vm/vm_param.h>
  104 #include <vm/vm_phys.h>
  105 
  106 #ifdef DDB
  107 #ifndef KDB
  108 #error KDB must be enabled in order for DDB to work!
  109 #endif
  110 #include <ddb/ddb.h>
  111 #include <ddb/db_sym.h>
  112 #endif
  113 
  114 #include <net/netisr.h>
  115 
  116 #include <machine/clock.h>
  117 #include <machine/cpu.h>
  118 #include <machine/cputypes.h>
  119 #include <machine/frame.h>
  120 #include <machine/intr_machdep.h>
  121 #include <x86/mca.h>
  122 #include <machine/md_var.h>
  123 #include <machine/metadata.h>
  124 #include <machine/mp_watchdog.h>
  125 #include <machine/pc/bios.h>
  126 #include <machine/pcb.h>
  127 #include <machine/proc.h>
  128 #include <machine/reg.h>
  129 #include <machine/sigframe.h>
  130 #include <machine/specialreg.h>
  131 #include <machine/trap.h>
  132 #include <machine/tss.h>
  133 #include <x86/ucode.h>
  134 #include <x86/ifunc.h>
  135 #ifdef SMP
  136 #include <machine/smp.h>
  137 #endif
  138 #ifdef FDT
  139 #include <x86/fdt.h>
  140 #endif
  141 
  142 #ifdef DEV_ATPIC
  143 #include <x86/isa/icu.h>
  144 #else
  145 #include <x86/apicvar.h>
  146 #endif
  147 
  148 #include <isa/isareg.h>
  149 #include <isa/rtc.h>
  150 #include <x86/init.h>
  151 
  152 /* Sanity check for __curthread() */
  153 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
  154 
  155 /*
  156  * The PTI trampoline stack needs enough space for a hardware trapframe and a
  157  * couple of scratch registers, as well as the trapframe left behind after an
  158  * iret fault.
  159  */
  160 CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) -
  161     offsetof(struct pti_frame, pti_rip));
  162 
  163 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
  164 
  165 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
  166 #define EFL_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  167 
  168 static void cpu_startup(void *);
  169 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
  170     char *xfpusave, size_t xfpusave_len);
  171 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
  172     char *xfpustate, size_t xfpustate_len);
  173 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
  174 
  175 /* Preload data parse function */
  176 static caddr_t native_parse_preload_data(u_int64_t);
  177 
  178 /* Native function to fetch and parse the e820 map */
  179 static void native_parse_memmap(caddr_t, vm_paddr_t *, int *);
  180 
  181 /* Default init_ops implementation. */
  182 struct init_ops init_ops = {
  183         .parse_preload_data =   native_parse_preload_data,
  184         .early_clock_source_init =      i8254_init,
  185         .early_delay =                  i8254_delay,
  186         .parse_memmap =                 native_parse_memmap,
  187 #ifdef SMP
  188         .mp_bootaddress =               mp_bootaddress,
  189         .start_all_aps =                native_start_all_aps,
  190 #endif
  191 #ifdef DEV_PCI
  192         .msi_init =                     msi_init,
  193 #endif
  194 };
  195 
  196 /*
  197  * Physical address of the EFI System Table. Stashed from the metadata hints
  198  * passed into the kernel and used by the EFI code to call runtime services.
  199  */
  200 vm_paddr_t efi_systbl_phys;
  201 
  202 /* Intel ICH registers */
  203 #define ICH_PMBASE      0x400
  204 #define ICH_SMI_EN      ICH_PMBASE + 0x30
  205 
  206 int     _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
  207 
  208 int cold = 1;
  209 
  210 long Maxmem = 0;
  211 long realmem = 0;
  212 
  213 /*
  214  * The number of PHYSMAP entries must be one less than the number of
  215  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  216  * physical address that is accessible by ISA DMA is split into two
  217  * PHYSSEG entries.
  218  */
  219 #define PHYSMAP_SIZE    (2 * (VM_PHYSSEG_MAX - 1))
  220 
  221 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
  222 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
  223 
  224 /* must be 2 less so 0 0 can signal end of chunks */
  225 #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
  226 #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
  227 
  228 struct kva_md_info kmi;
  229 
  230 static struct trapframe proc0_tf;
  231 struct region_descriptor r_idt;
  232 
  233 struct pcpu *__pcpu;
  234 struct pcpu temp_bsp_pcpu;
  235 
  236 struct mtx icu_lock;
  237 
  238 struct mem_range_softc mem_range_softc;
  239 
  240 struct mtx dt_lock;     /* lock for GDT and LDT */
  241 
  242 void (*vmm_resume_p)(void);
  243 
  244 static void
  245 cpu_startup(dummy)
  246         void *dummy;
  247 {
  248         uintmax_t memsize;
  249         char *sysenv;
  250 
  251         /*
  252          * On MacBooks, we need to disallow the legacy USB circuit to
  253          * generate an SMI# because this can cause several problems,
  254          * namely: incorrect CPU frequency detection and failure to
  255          * start the APs.
  256          * We do this by disabling a bit in the SMI_EN (SMI Control and
  257          * Enable register) of the Intel ICH LPC Interface Bridge. 
  258          */
  259         sysenv = kern_getenv("smbios.system.product");
  260         if (sysenv != NULL) {
  261                 if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
  262                     strncmp(sysenv, "MacBook3,1", 10) == 0 ||
  263                     strncmp(sysenv, "MacBook4,1", 10) == 0 ||
  264                     strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
  265                     strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
  266                     strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
  267                     strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
  268                     strncmp(sysenv, "Macmini1,1", 10) == 0) {
  269                         if (bootverbose)
  270                                 printf("Disabling LEGACY_USB_EN bit on "
  271                                     "Intel ICH.\n");
  272                         outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
  273                 }
  274                 freeenv(sysenv);
  275         }
  276 
  277         /*
  278          * Good {morning,afternoon,evening,night}.
  279          */
  280         startrtclock();
  281         printcpuinfo();
  282 
  283         /*
  284          * Display physical memory if SMBIOS reports reasonable amount.
  285          */
  286         memsize = 0;
  287         sysenv = kern_getenv("smbios.memory.enabled");
  288         if (sysenv != NULL) {
  289                 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
  290                 freeenv(sysenv);
  291         }
  292         if (memsize < ptoa((uintmax_t)vm_free_count()))
  293                 memsize = ptoa((uintmax_t)Maxmem);
  294         printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
  295         realmem = atop(memsize);
  296 
  297         /*
  298          * Display any holes after the first chunk of extended memory.
  299          */
  300         if (bootverbose) {
  301                 int indx;
  302 
  303                 printf("Physical memory chunk(s):\n");
  304                 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
  305                         vm_paddr_t size;
  306 
  307                         size = phys_avail[indx + 1] - phys_avail[indx];
  308                         printf(
  309                             "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
  310                             (uintmax_t)phys_avail[indx],
  311                             (uintmax_t)phys_avail[indx + 1] - 1,
  312                             (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
  313                 }
  314         }
  315 
  316         vm_ksubmap_init(&kmi);
  317 
  318         printf("avail memory = %ju (%ju MB)\n",
  319             ptoa((uintmax_t)vm_free_count()),
  320             ptoa((uintmax_t)vm_free_count()) / 1048576);
  321 #ifdef DEV_PCI
  322         if (bootverbose && intel_graphics_stolen_base != 0)
  323                 printf("intel stolen mem: base %#jx size %ju MB\n",
  324                     (uintmax_t)intel_graphics_stolen_base,
  325                     (uintmax_t)intel_graphics_stolen_size / 1024 / 1024);
  326 #endif
  327 
  328         /*
  329          * Set up buffers, so they can be used to read disk labels.
  330          */
  331         bufinit();
  332         vm_pager_bufferinit();
  333 
  334         cpu_setregs();
  335 }
  336 
  337 /*
  338  * Send an interrupt to process.
  339  *
  340  * Stack is set up to allow sigcode stored
  341  * at top to call routine, followed by call
  342  * to sigreturn routine below.  After sigreturn
  343  * resets the signal mask, the stack, and the
  344  * frame pointer, it returns to the user
  345  * specified pc, psl.
  346  */
  347 void
  348 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  349 {
  350         struct sigframe sf, *sfp;
  351         struct pcb *pcb;
  352         struct proc *p;
  353         struct thread *td;
  354         struct sigacts *psp;
  355         char *sp;
  356         struct trapframe *regs;
  357         char *xfpusave;
  358         size_t xfpusave_len;
  359         int sig;
  360         int oonstack;
  361 
  362         td = curthread;
  363         pcb = td->td_pcb;
  364         p = td->td_proc;
  365         PROC_LOCK_ASSERT(p, MA_OWNED);
  366         sig = ksi->ksi_signo;
  367         psp = p->p_sigacts;
  368         mtx_assert(&psp->ps_mtx, MA_OWNED);
  369         regs = td->td_frame;
  370         oonstack = sigonstack(regs->tf_rsp);
  371 
  372         if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
  373                 xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
  374                 xfpusave = __builtin_alloca(xfpusave_len);
  375         } else {
  376                 xfpusave_len = 0;
  377                 xfpusave = NULL;
  378         }
  379 
  380         /* Save user context. */
  381         bzero(&sf, sizeof(sf));
  382         sf.sf_uc.uc_sigmask = *mask;
  383         sf.sf_uc.uc_stack = td->td_sigstk;
  384         sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  385             ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
  386         sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
  387         bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
  388         sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
  389         get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
  390         fpstate_drop(td);
  391         update_pcb_bases(pcb);
  392         sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
  393         sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
  394         bzero(sf.sf_uc.uc_mcontext.mc_spare,
  395             sizeof(sf.sf_uc.uc_mcontext.mc_spare));
  396         bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
  397 
  398         /* Allocate space for the signal handler context. */
  399         if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
  400             SIGISMEMBER(psp->ps_sigonstack, sig)) {
  401                 sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
  402 #if defined(COMPAT_43)
  403                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  404 #endif
  405         } else
  406                 sp = (char *)regs->tf_rsp - 128;
  407         if (xfpusave != NULL) {
  408                 sp -= xfpusave_len;
  409                 sp = (char *)((unsigned long)sp & ~0x3Ful);
  410                 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
  411         }
  412         sp -= sizeof(struct sigframe);
  413         /* Align to 16 bytes. */
  414         sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
  415 
  416         /* Build the argument list for the signal handler. */
  417         regs->tf_rdi = sig;                     /* arg 1 in %rdi */
  418         regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */
  419         bzero(&sf.sf_si, sizeof(sf.sf_si));
  420         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  421                 /* Signal handler installed with SA_SIGINFO. */
  422                 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */
  423                 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
  424 
  425                 /* Fill in POSIX parts */
  426                 sf.sf_si = ksi->ksi_info;
  427                 sf.sf_si.si_signo = sig; /* maybe a translated signal */
  428                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  429         } else {
  430                 /* Old FreeBSD-style arguments. */
  431                 regs->tf_rsi = ksi->ksi_code;   /* arg 2 in %rsi */
  432                 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
  433                 sf.sf_ahu.sf_handler = catcher;
  434         }
  435         mtx_unlock(&psp->ps_mtx);
  436         PROC_UNLOCK(p);
  437 
  438         /*
  439          * Copy the sigframe out to the user's stack.
  440          */
  441         if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
  442             (xfpusave != NULL && copyout(xfpusave,
  443             (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
  444             != 0)) {
  445 #ifdef DEBUG
  446                 printf("process %ld has trashed its stack\n", (long)p->p_pid);
  447 #endif
  448                 PROC_LOCK(p);
  449                 sigexit(td, SIGILL);
  450         }
  451 
  452         regs->tf_rsp = (long)sfp;
  453         regs->tf_rip = p->p_sysent->sv_sigcode_base;
  454         regs->tf_rflags &= ~(PSL_T | PSL_D);
  455         regs->tf_cs = _ucodesel;
  456         regs->tf_ds = _udatasel;
  457         regs->tf_ss = _udatasel;
  458         regs->tf_es = _udatasel;
  459         regs->tf_fs = _ufssel;
  460         regs->tf_gs = _ugssel;
  461         regs->tf_flags = TF_HASSEGS;
  462         PROC_LOCK(p);
  463         mtx_lock(&psp->ps_mtx);
  464 }
  465 
  466 /*
  467  * System call to cleanup state after a signal
  468  * has been taken.  Reset signal mask and
  469  * stack state from context left by sendsig (above).
  470  * Return to previous pc and psl as specified by
  471  * context left by sendsig. Check carefully to
  472  * make sure that the user has not modified the
  473  * state to gain improper privileges.
  474  *
  475  * MPSAFE
  476  */
  477 int
  478 sys_sigreturn(td, uap)
  479         struct thread *td;
  480         struct sigreturn_args /* {
  481                 const struct __ucontext *sigcntxp;
  482         } */ *uap;
  483 {
  484         ucontext_t uc;
  485         struct pcb *pcb;
  486         struct proc *p;
  487         struct trapframe *regs;
  488         ucontext_t *ucp;
  489         char *xfpustate;
  490         size_t xfpustate_len;
  491         long rflags;
  492         int cs, error, ret;
  493         ksiginfo_t ksi;
  494 
  495         pcb = td->td_pcb;
  496         p = td->td_proc;
  497 
  498         error = copyin(uap->sigcntxp, &uc, sizeof(uc));
  499         if (error != 0) {
  500                 uprintf("pid %d (%s): sigreturn copyin failed\n",
  501                     p->p_pid, td->td_name);
  502                 return (error);
  503         }
  504         ucp = &uc;
  505         if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
  506                 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
  507                     td->td_name, ucp->uc_mcontext.mc_flags);
  508                 return (EINVAL);
  509         }
  510         regs = td->td_frame;
  511         rflags = ucp->uc_mcontext.mc_rflags;
  512         /*
  513          * Don't allow users to change privileged or reserved flags.
  514          */
  515         if (!EFL_SECURE(rflags, regs->tf_rflags)) {
  516                 uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
  517                     td->td_name, rflags);
  518                 return (EINVAL);
  519         }
  520 
  521         /*
  522          * Don't allow users to load a valid privileged %cs.  Let the
  523          * hardware check for invalid selectors, excess privilege in
  524          * other selectors, invalid %eip's and invalid %esp's.
  525          */
  526         cs = ucp->uc_mcontext.mc_cs;
  527         if (!CS_SECURE(cs)) {
  528                 uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
  529                     td->td_name, cs);
  530                 ksiginfo_init_trap(&ksi);
  531                 ksi.ksi_signo = SIGBUS;
  532                 ksi.ksi_code = BUS_OBJERR;
  533                 ksi.ksi_trapno = T_PROTFLT;
  534                 ksi.ksi_addr = (void *)regs->tf_rip;
  535                 trapsignal(td, &ksi);
  536                 return (EINVAL);
  537         }
  538 
  539         if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
  540                 xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
  541                 if (xfpustate_len > cpu_max_ext_state_size -
  542                     sizeof(struct savefpu)) {
  543                         uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
  544                             p->p_pid, td->td_name, xfpustate_len);
  545                         return (EINVAL);
  546                 }
  547                 xfpustate = __builtin_alloca(xfpustate_len);
  548                 error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
  549                     xfpustate, xfpustate_len);
  550                 if (error != 0) {
  551                         uprintf(
  552         "pid %d (%s): sigreturn copying xfpustate failed\n",
  553                             p->p_pid, td->td_name);
  554                         return (error);
  555                 }
  556         } else {
  557                 xfpustate = NULL;
  558                 xfpustate_len = 0;
  559         }
  560         ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
  561         if (ret != 0) {
  562                 uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
  563                     p->p_pid, td->td_name, ret);
  564                 return (ret);
  565         }
  566         bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
  567         update_pcb_bases(pcb);
  568         pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
  569         pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
  570 
  571 #if defined(COMPAT_43)
  572         if (ucp->uc_mcontext.mc_onstack & 1)
  573                 td->td_sigstk.ss_flags |= SS_ONSTACK;
  574         else
  575                 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
  576 #endif
  577 
  578         kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
  579         return (EJUSTRETURN);
  580 }
  581 
  582 #ifdef COMPAT_FREEBSD4
  583 int
  584 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
  585 {
  586  
  587         return sys_sigreturn(td, (struct sigreturn_args *)uap);
  588 }
  589 #endif
  590 
  591 /*
  592  * Reset registers to default values on exec.
  593  */
  594 void
  595 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
  596 {
  597         struct trapframe *regs;
  598         struct pcb *pcb;
  599         register_t saved_rflags;
  600 
  601         regs = td->td_frame;
  602         pcb = td->td_pcb;
  603 
  604         if (td->td_proc->p_md.md_ldt != NULL)
  605                 user_ldt_free(td);
  606 
  607         update_pcb_bases(pcb);
  608         pcb->pcb_fsbase = 0;
  609         pcb->pcb_gsbase = 0;
  610         clear_pcb_flags(pcb, PCB_32BIT);
  611         pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
  612 
  613         saved_rflags = regs->tf_rflags & PSL_T;
  614         bzero((char *)regs, sizeof(struct trapframe));
  615         regs->tf_rip = imgp->entry_addr;
  616         regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
  617         regs->tf_rdi = stack;           /* argv */
  618         regs->tf_rflags = PSL_USER | saved_rflags;
  619         regs->tf_ss = _udatasel;
  620         regs->tf_cs = _ucodesel;
  621         regs->tf_ds = _udatasel;
  622         regs->tf_es = _udatasel;
  623         regs->tf_fs = _ufssel;
  624         regs->tf_gs = _ugssel;
  625         regs->tf_flags = TF_HASSEGS;
  626 
  627         /*
  628          * Reset the hardware debug registers if they were in use.
  629          * They won't have any meaning for the newly exec'd process.
  630          */
  631         if (pcb->pcb_flags & PCB_DBREGS) {
  632                 pcb->pcb_dr0 = 0;
  633                 pcb->pcb_dr1 = 0;
  634                 pcb->pcb_dr2 = 0;
  635                 pcb->pcb_dr3 = 0;
  636                 pcb->pcb_dr6 = 0;
  637                 pcb->pcb_dr7 = 0;
  638                 if (pcb == curpcb) {
  639                         /*
  640                          * Clear the debug registers on the running
  641                          * CPU, otherwise they will end up affecting
  642                          * the next process we switch to.
  643                          */
  644                         reset_dbregs();
  645                 }
  646                 clear_pcb_flags(pcb, PCB_DBREGS);
  647         }
  648 
  649         /*
  650          * Drop the FP state if we hold it, so that the process gets a
  651          * clean FP state if it uses the FPU again.
  652          */
  653         fpstate_drop(td);
  654 }
  655 
  656 void
  657 cpu_setregs(void)
  658 {
  659         register_t cr0;
  660 
  661         cr0 = rcr0();
  662         /*
  663          * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
  664          * BSP.  See the comments there about why we set them.
  665          */
  666         cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
  667         load_cr0(cr0);
  668 }
  669 
  670 /*
  671  * Initialize amd64 and configure to run kernel
  672  */
  673 
  674 /*
  675  * Initialize segments & interrupt table
  676  */
  677 
  678 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */
  679 static struct gate_descriptor idt0[NIDT];
  680 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
  681 
  682 static char dblfault_stack[PAGE_SIZE] __aligned(16);
  683 static char mce0_stack[PAGE_SIZE] __aligned(16);
  684 static char nmi0_stack[PAGE_SIZE] __aligned(16);
  685 static char dbg0_stack[PAGE_SIZE] __aligned(16);
  686 CTASSERT(sizeof(struct nmi_pcpu) == 16);
  687 
  688 struct amd64tss common_tss[MAXCPU];
  689 
  690 /*
  691  * Software prototypes -- in more palatable form.
  692  *
  693  * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same
  694  * slots as corresponding segments for i386 kernel.
  695  */
  696 struct soft_segment_descriptor gdt_segs[] = {
  697 /* GNULL_SEL    0 Null Descriptor */
  698 {       .ssd_base = 0x0,
  699         .ssd_limit = 0x0,
  700         .ssd_type = 0,
  701         .ssd_dpl = 0,
  702         .ssd_p = 0,
  703         .ssd_long = 0,
  704         .ssd_def32 = 0,
  705         .ssd_gran = 0           },
  706 /* GNULL2_SEL   1 Null Descriptor */
  707 {       .ssd_base = 0x0,
  708         .ssd_limit = 0x0,
  709         .ssd_type = 0,
  710         .ssd_dpl = 0,
  711         .ssd_p = 0,
  712         .ssd_long = 0,
  713         .ssd_def32 = 0,
  714         .ssd_gran = 0           },
  715 /* GUFS32_SEL   2 32 bit %gs Descriptor for user */
  716 {       .ssd_base = 0x0,
  717         .ssd_limit = 0xfffff,
  718         .ssd_type = SDT_MEMRWA,
  719         .ssd_dpl = SEL_UPL,
  720         .ssd_p = 1,
  721         .ssd_long = 0,
  722         .ssd_def32 = 1,
  723         .ssd_gran = 1           },
  724 /* GUGS32_SEL   3 32 bit %fs Descriptor for user */
  725 {       .ssd_base = 0x0,
  726         .ssd_limit = 0xfffff,
  727         .ssd_type = SDT_MEMRWA,
  728         .ssd_dpl = SEL_UPL,
  729         .ssd_p = 1,
  730         .ssd_long = 0,
  731         .ssd_def32 = 1,
  732         .ssd_gran = 1           },
  733 /* GCODE_SEL    4 Code Descriptor for kernel */
  734 {       .ssd_base = 0x0,
  735         .ssd_limit = 0xfffff,
  736         .ssd_type = SDT_MEMERA,
  737         .ssd_dpl = SEL_KPL,
  738         .ssd_p = 1,
  739         .ssd_long = 1,
  740         .ssd_def32 = 0,
  741         .ssd_gran = 1           },
  742 /* GDATA_SEL    5 Data Descriptor for kernel */
  743 {       .ssd_base = 0x0,
  744         .ssd_limit = 0xfffff,
  745         .ssd_type = SDT_MEMRWA,
  746         .ssd_dpl = SEL_KPL,
  747         .ssd_p = 1,
  748         .ssd_long = 1,
  749         .ssd_def32 = 0,
  750         .ssd_gran = 1           },
  751 /* GUCODE32_SEL 6 32 bit Code Descriptor for user */
  752 {       .ssd_base = 0x0,
  753         .ssd_limit = 0xfffff,
  754         .ssd_type = SDT_MEMERA,
  755         .ssd_dpl = SEL_UPL,
  756         .ssd_p = 1,
  757         .ssd_long = 0,
  758         .ssd_def32 = 1,
  759         .ssd_gran = 1           },
  760 /* GUDATA_SEL   7 32/64 bit Data Descriptor for user */
  761 {       .ssd_base = 0x0,
  762         .ssd_limit = 0xfffff,
  763         .ssd_type = SDT_MEMRWA,
  764         .ssd_dpl = SEL_UPL,
  765         .ssd_p = 1,
  766         .ssd_long = 0,
  767         .ssd_def32 = 1,
  768         .ssd_gran = 1           },
  769 /* GUCODE_SEL   8 64 bit Code Descriptor for user */
  770 {       .ssd_base = 0x0,
  771         .ssd_limit = 0xfffff,
  772         .ssd_type = SDT_MEMERA,
  773         .ssd_dpl = SEL_UPL,
  774         .ssd_p = 1,
  775         .ssd_long = 1,
  776         .ssd_def32 = 0,
  777         .ssd_gran = 1           },
  778 /* GPROC0_SEL   9 Proc 0 Tss Descriptor */
  779 {       .ssd_base = 0x0,
  780         .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1,
  781         .ssd_type = SDT_SYSTSS,
  782         .ssd_dpl = SEL_KPL,
  783         .ssd_p = 1,
  784         .ssd_long = 0,
  785         .ssd_def32 = 0,
  786         .ssd_gran = 0           },
  787 /* Actually, the TSS is a system descriptor which is double size */
  788 {       .ssd_base = 0x0,
  789         .ssd_limit = 0x0,
  790         .ssd_type = 0,
  791         .ssd_dpl = 0,
  792         .ssd_p = 0,
  793         .ssd_long = 0,
  794         .ssd_def32 = 0,
  795         .ssd_gran = 0           },
  796 /* GUSERLDT_SEL 11 LDT Descriptor */
  797 {       .ssd_base = 0x0,
  798         .ssd_limit = 0x0,
  799         .ssd_type = 0,
  800         .ssd_dpl = 0,
  801         .ssd_p = 0,
  802         .ssd_long = 0,
  803         .ssd_def32 = 0,
  804         .ssd_gran = 0           },
  805 /* GUSERLDT_SEL 12 LDT Descriptor, double size */
  806 {       .ssd_base = 0x0,
  807         .ssd_limit = 0x0,
  808         .ssd_type = 0,
  809         .ssd_dpl = 0,
  810         .ssd_p = 0,
  811         .ssd_long = 0,
  812         .ssd_def32 = 0,
  813         .ssd_gran = 0           },
  814 };
  815 _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT");
  816 
  817 void
  818 setidt(int idx, inthand_t *func, int typ, int dpl, int ist)
  819 {
  820         struct gate_descriptor *ip;
  821 
  822         ip = idt + idx;
  823         ip->gd_looffset = (uintptr_t)func;
  824         ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
  825         ip->gd_ist = ist;
  826         ip->gd_xx = 0;
  827         ip->gd_type = typ;
  828         ip->gd_dpl = dpl;
  829         ip->gd_p = 1;
  830         ip->gd_hioffset = ((uintptr_t)func)>>16 ;
  831 }
  832 
  833 extern inthand_t
  834         IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
  835         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
  836         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
  837         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
  838         IDTVEC(xmm), IDTVEC(dblfault),
  839         IDTVEC(div_pti), IDTVEC(bpt_pti),
  840         IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
  841         IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
  842         IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti),
  843         IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti),
  844         IDTVEC(xmm_pti),
  845 #ifdef KDTRACE_HOOKS
  846         IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti),
  847 #endif
  848 #ifdef XENHVM
  849         IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti),
  850 #endif
  851         IDTVEC(fast_syscall), IDTVEC(fast_syscall32),
  852         IDTVEC(fast_syscall_pti);
  853 
  854 #ifdef DDB
  855 /*
  856  * Display the index and function name of any IDT entries that don't use
  857  * the default 'rsvd' entry point.
  858  */
  859 DB_SHOW_COMMAND(idt, db_show_idt)
  860 {
  861         struct gate_descriptor *ip;
  862         int idx;
  863         uintptr_t func;
  864 
  865         ip = idt;
  866         for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
  867                 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
  868                 if (func != (uintptr_t)&IDTVEC(rsvd)) {
  869                         db_printf("%3d\t", idx);
  870                         db_printsym(func, DB_STGY_PROC);
  871                         db_printf("\n");
  872                 }
  873                 ip++;
  874         }
  875 }
  876 
  877 /* Show privileged registers. */
  878 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
  879 {
  880         struct {
  881                 uint16_t limit;
  882                 uint64_t base;
  883         } __packed idtr, gdtr;
  884         uint16_t ldt, tr;
  885 
  886         __asm __volatile("sidt %0" : "=m" (idtr));
  887         db_printf("idtr\t0x%016lx/%04x\n",
  888             (u_long)idtr.base, (u_int)idtr.limit);
  889         __asm __volatile("sgdt %0" : "=m" (gdtr));
  890         db_printf("gdtr\t0x%016lx/%04x\n",
  891             (u_long)gdtr.base, (u_int)gdtr.limit);
  892         __asm __volatile("sldt %0" : "=r" (ldt));
  893         db_printf("ldtr\t0x%04x\n", ldt);
  894         __asm __volatile("str %0" : "=r" (tr));
  895         db_printf("tr\t0x%04x\n", tr);
  896         db_printf("cr0\t0x%016lx\n", rcr0());
  897         db_printf("cr2\t0x%016lx\n", rcr2());
  898         db_printf("cr3\t0x%016lx\n", rcr3());
  899         db_printf("cr4\t0x%016lx\n", rcr4());
  900         if (rcr4() & CR4_XSAVE)
  901                 db_printf("xcr0\t0x%016lx\n", rxcr(0));
  902         db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER));
  903         if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
  904                 db_printf("FEATURES_CTL\t%016lx\n",
  905                     rdmsr(MSR_IA32_FEATURE_CONTROL));
  906         db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR));
  907         db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT));
  908         db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE));
  909 }
  910 
  911 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
  912 {
  913 
  914         db_printf("dr0\t0x%016lx\n", rdr0());
  915         db_printf("dr1\t0x%016lx\n", rdr1());
  916         db_printf("dr2\t0x%016lx\n", rdr2());
  917         db_printf("dr3\t0x%016lx\n", rdr3());
  918         db_printf("dr6\t0x%016lx\n", rdr6());
  919         db_printf("dr7\t0x%016lx\n", rdr7());   
  920 }
  921 #endif
  922 
  923 void
  924 sdtossd(sd, ssd)
  925         struct user_segment_descriptor *sd;
  926         struct soft_segment_descriptor *ssd;
  927 {
  928 
  929         ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
  930         ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
  931         ssd->ssd_type  = sd->sd_type;
  932         ssd->ssd_dpl   = sd->sd_dpl;
  933         ssd->ssd_p     = sd->sd_p;
  934         ssd->ssd_long  = sd->sd_long;
  935         ssd->ssd_def32 = sd->sd_def32;
  936         ssd->ssd_gran  = sd->sd_gran;
  937 }
  938 
  939 void
  940 ssdtosd(ssd, sd)
  941         struct soft_segment_descriptor *ssd;
  942         struct user_segment_descriptor *sd;
  943 {
  944 
  945         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  946         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
  947         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  948         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  949         sd->sd_type  = ssd->ssd_type;
  950         sd->sd_dpl   = ssd->ssd_dpl;
  951         sd->sd_p     = ssd->ssd_p;
  952         sd->sd_long  = ssd->ssd_long;
  953         sd->sd_def32 = ssd->ssd_def32;
  954         sd->sd_gran  = ssd->ssd_gran;
  955 }
  956 
  957 void
  958 ssdtosyssd(ssd, sd)
  959         struct soft_segment_descriptor *ssd;
  960         struct system_segment_descriptor *sd;
  961 {
  962 
  963         sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
  964         sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
  965         sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
  966         sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
  967         sd->sd_type  = ssd->ssd_type;
  968         sd->sd_dpl   = ssd->ssd_dpl;
  969         sd->sd_p     = ssd->ssd_p;
  970         sd->sd_gran  = ssd->ssd_gran;
  971 }
  972 
  973 #if !defined(DEV_ATPIC) && defined(DEV_ISA)
  974 #include <isa/isavar.h>
  975 #include <isa/isareg.h>
  976 /*
  977  * Return a bitmap of the current interrupt requests.  This is 8259-specific
  978  * and is only suitable for use at probe time.
  979  * This is only here to pacify sio.  It is NOT FATAL if this doesn't work.
  980  * It shouldn't be here.  There should probably be an APIC centric
  981  * implementation in the apic driver code, if at all.
  982  */
  983 intrmask_t
  984 isa_irq_pending(void)
  985 {
  986         u_char irr1;
  987         u_char irr2;
  988 
  989         irr1 = inb(IO_ICU1);
  990         irr2 = inb(IO_ICU2);
  991         return ((irr2 << 8) | irr1);
  992 }
  993 #endif
  994 
  995 u_int basemem;
  996 
  997 static int
  998 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
  999     int *physmap_idxp)
 1000 {
 1001         int i, insert_idx, physmap_idx;
 1002 
 1003         physmap_idx = *physmap_idxp;
 1004 
 1005         if (length == 0)
 1006                 return (1);
 1007 
 1008         /*
 1009          * Find insertion point while checking for overlap.  Start off by
 1010          * assuming the new entry will be added to the end.
 1011          *
 1012          * NB: physmap_idx points to the next free slot.
 1013          */
 1014         insert_idx = physmap_idx;
 1015         for (i = 0; i <= physmap_idx; i += 2) {
 1016                 if (base < physmap[i + 1]) {
 1017                         if (base + length <= physmap[i]) {
 1018                                 insert_idx = i;
 1019                                 break;
 1020                         }
 1021                         if (boothowto & RB_VERBOSE)
 1022                                 printf(
 1023                     "Overlapping memory regions, ignoring second region\n");
 1024                         return (1);
 1025                 }
 1026         }
 1027 
 1028         /* See if we can prepend to the next entry. */
 1029         if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 1030                 physmap[insert_idx] = base;
 1031                 return (1);
 1032         }
 1033 
 1034         /* See if we can append to the previous entry. */
 1035         if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 1036                 physmap[insert_idx - 1] += length;
 1037                 return (1);
 1038         }
 1039 
 1040         physmap_idx += 2;
 1041         *physmap_idxp = physmap_idx;
 1042         if (physmap_idx == PHYSMAP_SIZE) {
 1043                 printf(
 1044                 "Too many segments in the physical address map, giving up\n");
 1045                 return (0);
 1046         }
 1047 
 1048         /*
 1049          * Move the last 'N' entries down to make room for the new
 1050          * entry if needed.
 1051          */
 1052         for (i = (physmap_idx - 2); i > insert_idx; i -= 2) {
 1053                 physmap[i] = physmap[i - 2];
 1054                 physmap[i + 1] = physmap[i - 1];
 1055         }
 1056 
 1057         /* Insert the new entry. */
 1058         physmap[insert_idx] = base;
 1059         physmap[insert_idx + 1] = base + length;
 1060         return (1);
 1061 }
 1062 
 1063 void
 1064 bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize,
 1065                       vm_paddr_t *physmap, int *physmap_idx)
 1066 {
 1067         struct bios_smap *smap, *smapend;
 1068 
 1069         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 1070 
 1071         for (smap = smapbase; smap < smapend; smap++) {
 1072                 if (boothowto & RB_VERBOSE)
 1073                         printf("SMAP type=%02x base=%016lx len=%016lx\n",
 1074                             smap->type, smap->base, smap->length);
 1075 
 1076                 if (smap->type != SMAP_TYPE_MEMORY)
 1077                         continue;
 1078 
 1079                 if (!add_physmap_entry(smap->base, smap->length, physmap,
 1080                     physmap_idx))
 1081                         break;
 1082         }
 1083 }
 1084 
 1085 static void
 1086 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
 1087     int *physmap_idx)
 1088 {
 1089         struct efi_md *map, *p;
 1090         const char *type;
 1091         size_t efisz;
 1092         int ndesc, i;
 1093 
 1094         static const char *types[] = {
 1095                 "Reserved",
 1096                 "LoaderCode",
 1097                 "LoaderData",
 1098                 "BootServicesCode",
 1099                 "BootServicesData",
 1100                 "RuntimeServicesCode",
 1101                 "RuntimeServicesData",
 1102                 "ConventionalMemory",
 1103                 "UnusableMemory",
 1104                 "ACPIReclaimMemory",
 1105                 "ACPIMemoryNVS",
 1106                 "MemoryMappedIO",
 1107                 "MemoryMappedIOPortSpace",
 1108                 "PalCode",
 1109                 "PersistentMemory"
 1110         };
 1111 
 1112         /*
 1113          * Memory map data provided by UEFI via the GetMemoryMap
 1114          * Boot Services API.
 1115          */
 1116         efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 1117         map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 1118 
 1119         if (efihdr->descriptor_size == 0)
 1120                 return;
 1121         ndesc = efihdr->memory_size / efihdr->descriptor_size;
 1122 
 1123         if (boothowto & RB_VERBOSE)
 1124                 printf("%23s %12s %12s %8s %4s\n",
 1125                     "Type", "Physical", "Virtual", "#Pages", "Attr");
 1126 
 1127         for (i = 0, p = map; i < ndesc; i++,
 1128             p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 1129                 if (boothowto & RB_VERBOSE) {
 1130                         if (p->md_type < nitems(types))
 1131                                 type = types[p->md_type];
 1132                         else
 1133                                 type = "<INVALID>";
 1134                         printf("%23s %012lx %12p %08lx ", type, p->md_phys,
 1135                             p->md_virt, p->md_pages);
 1136                         if (p->md_attr & EFI_MD_ATTR_UC)
 1137                                 printf("UC ");
 1138                         if (p->md_attr & EFI_MD_ATTR_WC)
 1139                                 printf("WC ");
 1140                         if (p->md_attr & EFI_MD_ATTR_WT)
 1141                                 printf("WT ");
 1142                         if (p->md_attr & EFI_MD_ATTR_WB)
 1143                                 printf("WB ");
 1144                         if (p->md_attr & EFI_MD_ATTR_UCE)
 1145                                 printf("UCE ");
 1146                         if (p->md_attr & EFI_MD_ATTR_WP)
 1147                                 printf("WP ");
 1148                         if (p->md_attr & EFI_MD_ATTR_RP)
 1149                                 printf("RP ");
 1150                         if (p->md_attr & EFI_MD_ATTR_XP)
 1151                                 printf("XP ");
 1152                         if (p->md_attr & EFI_MD_ATTR_NV)
 1153                                 printf("NV ");
 1154                         if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 1155                                 printf("MORE_RELIABLE ");
 1156                         if (p->md_attr & EFI_MD_ATTR_RO)
 1157                                 printf("RO ");
 1158                         if (p->md_attr & EFI_MD_ATTR_RT)
 1159                                 printf("RUNTIME");
 1160                         printf("\n");
 1161                 }
 1162 
 1163                 switch (p->md_type) {
 1164                 case EFI_MD_TYPE_CODE:
 1165                 case EFI_MD_TYPE_DATA:
 1166                 case EFI_MD_TYPE_BS_CODE:
 1167                 case EFI_MD_TYPE_BS_DATA:
 1168                 case EFI_MD_TYPE_FREE:
 1169                         /*
 1170                          * We're allowed to use any entry with these types.
 1171                          */
 1172                         break;
 1173                 default:
 1174                         continue;
 1175                 }
 1176 
 1177                 if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE),
 1178                     physmap, physmap_idx))
 1179                         break;
 1180         }
 1181 }
 1182 
 1183 static char bootmethod[16] = "";
 1184 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
 1185     "System firmware boot method");
 1186 
 1187 static void
 1188 native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx)
 1189 {
 1190         struct bios_smap *smap;
 1191         struct efi_map_header *efihdr;
 1192         u_int32_t size;
 1193 
 1194         /*
 1195          * Memory map from INT 15:E820.
 1196          *
 1197          * subr_module.c says:
 1198          * "Consumer may safely assume that size value precedes data."
 1199          * ie: an int32_t immediately precedes smap.
 1200          */
 1201 
 1202         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1203             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1204         smap = (struct bios_smap *)preload_search_info(kmdp,
 1205             MODINFO_METADATA | MODINFOMD_SMAP);
 1206         if (efihdr == NULL && smap == NULL)
 1207                 panic("No BIOS smap or EFI map info from loader!");
 1208 
 1209         if (efihdr != NULL) {
 1210                 add_efi_map_entries(efihdr, physmap, physmap_idx);
 1211                 strlcpy(bootmethod, "UEFI", sizeof(bootmethod));
 1212         } else {
 1213                 size = *((u_int32_t *)smap - 1);
 1214                 bios_add_smap_entries(smap, size, physmap, physmap_idx);
 1215                 strlcpy(bootmethod, "BIOS", sizeof(bootmethod));
 1216         }
 1217 }
 1218 
 1219 #define PAGES_PER_GB    (1024 * 1024 * 1024 / PAGE_SIZE)
 1220 
 1221 /*
 1222  * Populate the (physmap) array with base/bound pairs describing the
 1223  * available physical memory in the system, then test this memory and
 1224  * build the phys_avail array describing the actually-available memory.
 1225  *
 1226  * Total memory size may be set by the kernel environment variable
 1227  * hw.physmem or the compile-time define MAXMEM.
 1228  *
 1229  * XXX first should be vm_paddr_t.
 1230  */
 1231 static void
 1232 getmemsize(caddr_t kmdp, u_int64_t first)
 1233 {
 1234         int i, physmap_idx, pa_indx, da_indx;
 1235         vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 1236         u_long physmem_start, physmem_tunable, memtest;
 1237         pt_entry_t *pte;
 1238         quad_t dcons_addr, dcons_size;
 1239         int page_counter;
 1240 
 1241         /*
 1242          * Tell the physical memory allocator about pages used to store
 1243          * the kernel and preloaded data.  See kmem_bootstrap_free().
 1244          */
 1245         vm_phys_add_seg((vm_paddr_t)kernphys, trunc_page(first));
 1246 
 1247         bzero(physmap, sizeof(physmap));
 1248         physmap_idx = 0;
 1249 
 1250         init_ops.parse_memmap(kmdp, physmap, &physmap_idx);
 1251         physmap_idx -= 2;
 1252 
 1253         /*
 1254          * Find the 'base memory' segment for SMP
 1255          */
 1256         basemem = 0;
 1257         for (i = 0; i <= physmap_idx; i += 2) {
 1258                 if (physmap[i] <= 0xA0000) {
 1259                         basemem = physmap[i + 1] / 1024;
 1260                         break;
 1261                 }
 1262         }
 1263         if (basemem == 0 || basemem > 640) {
 1264                 if (bootverbose)
 1265                         printf(
 1266                 "Memory map doesn't contain a basemem segment, faking it");
 1267                 basemem = 640;
 1268         }
 1269 
 1270         /*
 1271          * Maxmem isn't the "maximum memory", it's one larger than the
 1272          * highest page of the physical address space.  It should be
 1273          * called something like "Maxphyspage".  We may adjust this
 1274          * based on ``hw.physmem'' and the results of the memory test.
 1275          */
 1276         Maxmem = atop(physmap[physmap_idx + 1]);
 1277 
 1278 #ifdef MAXMEM
 1279         Maxmem = MAXMEM / 4;
 1280 #endif
 1281 
 1282         if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 1283                 Maxmem = atop(physmem_tunable);
 1284 
 1285         /*
 1286          * The boot memory test is disabled by default, as it takes a
 1287          * significant amount of time on large-memory systems, and is
 1288          * unfriendly to virtual machines as it unnecessarily touches all
 1289          * pages.
 1290          *
 1291          * A general name is used as the code may be extended to support
 1292          * additional tests beyond the current "page present" test.
 1293          */
 1294         memtest = 0;
 1295         TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 1296 
 1297         /*
 1298          * Don't allow MAXMEM or hw.physmem to extend the amount of memory
 1299          * in the system.
 1300          */
 1301         if (Maxmem > atop(physmap[physmap_idx + 1]))
 1302                 Maxmem = atop(physmap[physmap_idx + 1]);
 1303 
 1304         if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 1305             (boothowto & RB_VERBOSE))
 1306                 printf("Physical memory use set to %ldK\n", Maxmem * 4);
 1307 
 1308         /*
 1309          * Make hole for "AP -> long mode" bootstrap code.  The
 1310          * mp_bootaddress vector is only available when the kernel
 1311          * is configured to support APs and APs for the system start
 1312          * in real mode mode (e.g. SMP bare metal).
 1313          */
 1314         if (init_ops.mp_bootaddress)
 1315                 init_ops.mp_bootaddress(physmap, &physmap_idx);
 1316 
 1317         /* call pmap initialization to make new kernel address space */
 1318         pmap_bootstrap(&first);
 1319 
 1320         /*
 1321          * Size up each available chunk of physical memory.
 1322          *
 1323          * XXX Some BIOSes corrupt low 64KB between suspend and resume.
 1324          * By default, mask off the first 16 pages unless we appear to be
 1325          * running in a VM.
 1326          */
 1327         physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT;
 1328         TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start);
 1329         if (physmap[0] < physmem_start) {
 1330                 if (physmem_start < PAGE_SIZE)
 1331                         physmap[0] = PAGE_SIZE;
 1332                 else if (physmem_start >= physmap[1])
 1333                         physmap[0] = round_page(physmap[1] - PAGE_SIZE);
 1334                 else
 1335                         physmap[0] = round_page(physmem_start);
 1336         }
 1337         pa_indx = 0;
 1338         da_indx = 1;
 1339         phys_avail[pa_indx++] = physmap[0];
 1340         phys_avail[pa_indx] = physmap[0];
 1341         dump_avail[da_indx] = physmap[0];
 1342         pte = CMAP1;
 1343 
 1344         /*
 1345          * Get dcons buffer address
 1346          */
 1347         if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 1348             getenv_quad("dcons.size", &dcons_size) == 0)
 1349                 dcons_addr = 0;
 1350 
 1351         /*
 1352          * physmap is in bytes, so when converting to page boundaries,
 1353          * round up the start address and round down the end address.
 1354          */
 1355         page_counter = 0;
 1356         if (memtest != 0)
 1357                 printf("Testing system memory");
 1358         for (i = 0; i <= physmap_idx; i += 2) {
 1359                 vm_paddr_t end;
 1360 
 1361                 end = ptoa((vm_paddr_t)Maxmem);
 1362                 if (physmap[i + 1] < end)
 1363                         end = trunc_page(physmap[i + 1]);
 1364                 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 1365                         int tmp, page_bad, full;
 1366                         int *ptr = (int *)CADDR1;
 1367 
 1368                         full = FALSE;
 1369                         /*
 1370                          * block out kernel memory as not available.
 1371                          */
 1372                         if (pa >= (vm_paddr_t)kernphys && pa < first)
 1373                                 goto do_dump_avail;
 1374 
 1375                         /*
 1376                          * block out dcons buffer
 1377                          */
 1378                         if (dcons_addr > 0
 1379                             && pa >= trunc_page(dcons_addr)
 1380                             && pa < dcons_addr + dcons_size)
 1381                                 goto do_dump_avail;
 1382 
 1383                         page_bad = FALSE;
 1384                         if (memtest == 0)
 1385                                 goto skip_memtest;
 1386 
 1387                         /*
 1388                          * Print a "." every GB to show we're making
 1389                          * progress.
 1390                          */
 1391                         page_counter++;
 1392                         if ((page_counter % PAGES_PER_GB) == 0)
 1393                                 printf(".");
 1394 
 1395                         /*
 1396                          * map page into kernel: valid, read/write,non-cacheable
 1397                          */
 1398                         *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
 1399                         invltlb();
 1400 
 1401                         tmp = *(int *)ptr;
 1402                         /*
 1403                          * Test for alternating 1's and 0's
 1404                          */
 1405                         *(volatile int *)ptr = 0xaaaaaaaa;
 1406                         if (*(volatile int *)ptr != 0xaaaaaaaa)
 1407                                 page_bad = TRUE;
 1408                         /*
 1409                          * Test for alternating 0's and 1's
 1410                          */
 1411                         *(volatile int *)ptr = 0x55555555;
 1412                         if (*(volatile int *)ptr != 0x55555555)
 1413                                 page_bad = TRUE;
 1414                         /*
 1415                          * Test for all 1's
 1416                          */
 1417                         *(volatile int *)ptr = 0xffffffff;
 1418                         if (*(volatile int *)ptr != 0xffffffff)
 1419                                 page_bad = TRUE;
 1420                         /*
 1421                          * Test for all 0's
 1422                          */
 1423                         *(volatile int *)ptr = 0x0;
 1424                         if (*(volatile int *)ptr != 0x0)
 1425                                 page_bad = TRUE;
 1426                         /*
 1427                          * Restore original value.
 1428                          */
 1429                         *(int *)ptr = tmp;
 1430 
 1431 skip_memtest:
 1432                         /*
 1433                          * Adjust array of valid/good pages.
 1434                          */
 1435                         if (page_bad == TRUE)
 1436                                 continue;
 1437                         /*
 1438                          * If this good page is a continuation of the
 1439                          * previous set of good pages, then just increase
 1440                          * the end pointer. Otherwise start a new chunk.
 1441                          * Note that "end" points one higher than end,
 1442                          * making the range >= start and < end.
 1443                          * If we're also doing a speculative memory
 1444                          * test and we at or past the end, bump up Maxmem
 1445                          * so that we keep going. The first bad page
 1446                          * will terminate the loop.
 1447                          */
 1448                         if (phys_avail[pa_indx] == pa) {
 1449                                 phys_avail[pa_indx] += PAGE_SIZE;
 1450                         } else {
 1451                                 pa_indx++;
 1452                                 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 1453                                         printf(
 1454                 "Too many holes in the physical address space, giving up\n");
 1455                                         pa_indx--;
 1456                                         full = TRUE;
 1457                                         goto do_dump_avail;
 1458                                 }
 1459                                 phys_avail[pa_indx++] = pa;     /* start */
 1460                                 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 1461                         }
 1462                         physmem++;
 1463 do_dump_avail:
 1464                         if (dump_avail[da_indx] == pa) {
 1465                                 dump_avail[da_indx] += PAGE_SIZE;
 1466                         } else {
 1467                                 da_indx++;
 1468                                 if (da_indx == DUMP_AVAIL_ARRAY_END) {
 1469                                         da_indx--;
 1470                                         goto do_next;
 1471                                 }
 1472                                 dump_avail[da_indx++] = pa; /* start */
 1473                                 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 1474                         }
 1475 do_next:
 1476                         if (full)
 1477                                 break;
 1478                 }
 1479         }
 1480         *pte = 0;
 1481         invltlb();
 1482         if (memtest != 0)
 1483                 printf("\n");
 1484 
 1485         /*
 1486          * XXX
 1487          * The last chunk must contain at least one page plus the message
 1488          * buffer to avoid complicating other code (message buffer address
 1489          * calculation, etc.).
 1490          */
 1491         while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 1492             round_page(msgbufsize) >= phys_avail[pa_indx]) {
 1493                 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 1494                 phys_avail[pa_indx--] = 0;
 1495                 phys_avail[pa_indx--] = 0;
 1496         }
 1497 
 1498         Maxmem = atop(phys_avail[pa_indx]);
 1499 
 1500         /* Trim off space for the message buffer. */
 1501         phys_avail[pa_indx] -= round_page(msgbufsize);
 1502 
 1503         /* Map the message buffer. */
 1504         msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]);
 1505 }
 1506 
 1507 static caddr_t
 1508 native_parse_preload_data(u_int64_t modulep)
 1509 {
 1510         caddr_t kmdp;
 1511         char *envp;
 1512 #ifdef DDB
 1513         vm_offset_t ksym_start;
 1514         vm_offset_t ksym_end;
 1515 #endif
 1516 
 1517         preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
 1518         preload_bootstrap_relocate(KERNBASE);
 1519         kmdp = preload_search_by_type("elf kernel");
 1520         if (kmdp == NULL)
 1521                 kmdp = preload_search_by_type("elf64 kernel");
 1522         boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 1523         envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 1524         if (envp != NULL)
 1525                 envp += KERNBASE;
 1526         init_static_kenv(envp, 0);
 1527 #ifdef DDB
 1528         ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 1529         ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 1530         db_fetch_ksymtab(ksym_start, ksym_end);
 1531 #endif
 1532         efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
 1533 
 1534         return (kmdp);
 1535 }
 1536 
 1537 static void
 1538 amd64_kdb_init(void)
 1539 {
 1540         kdb_init();
 1541 #ifdef KDB
 1542         if (boothowto & RB_KDB)
 1543                 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 1544 #endif
 1545 }
 1546 
 1547 /* Set up the fast syscall stuff */
 1548 void
 1549 amd64_conf_fast_syscall(void)
 1550 {
 1551         uint64_t msr;
 1552 
 1553         msr = rdmsr(MSR_EFER) | EFER_SCE;
 1554         wrmsr(MSR_EFER, msr);
 1555         wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) :
 1556             (u_int64_t)IDTVEC(fast_syscall));
 1557         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 1558         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 1559             ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 1560         wrmsr(MSR_STAR, msr);
 1561         wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
 1562 }
 1563 
 1564 void
 1565 amd64_bsp_pcpu_init1(struct pcpu *pc)
 1566 {
 1567 
 1568         PCPU_SET(prvspace, pc);
 1569         PCPU_SET(curthread, &thread0);
 1570         PCPU_SET(tssp, &common_tss[0]);
 1571         PCPU_SET(commontssp, &common_tss[0]);
 1572         PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1573         PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
 1574         PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
 1575         PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
 1576 }
 1577 
 1578 void
 1579 amd64_bsp_pcpu_init2(uint64_t rsp0)
 1580 {
 1581 
 1582         PCPU_SET(rsp0, rsp0);
 1583         PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) +
 1584             PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
 1585         PCPU_SET(curpcb, thread0.td_pcb);
 1586 }
 1587 
 1588 void
 1589 amd64_bsp_ist_init(struct pcpu *pc)
 1590 {
 1591         struct nmi_pcpu *np;
 1592 
 1593         /* doublefault stack space, runs on ist1 */
 1594         common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
 1595 
 1596         /*
 1597          * NMI stack, runs on ist2.  The pcpu pointer is stored just
 1598          * above the start of the ist2 stack.
 1599          */
 1600         np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1;
 1601         np->np_pcpu = (register_t)pc;
 1602         common_tss[0].tss_ist2 = (long)np;
 1603 
 1604         /*
 1605          * MC# stack, runs on ist3.  The pcpu pointer is stored just
 1606          * above the start of the ist3 stack.
 1607          */
 1608         np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1;
 1609         np->np_pcpu = (register_t)pc;
 1610         common_tss[0].tss_ist3 = (long)np;
 1611 
 1612         /*
 1613          * DB# stack, runs on ist4.
 1614          */
 1615         np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1;
 1616         np->np_pcpu = (register_t)pc;
 1617         common_tss[0].tss_ist4 = (long)np;
 1618 }
 1619 
 1620 u_int64_t
 1621 hammer_time(u_int64_t modulep, u_int64_t physfree)
 1622 {
 1623         caddr_t kmdp;
 1624         int gsel_tss, x;
 1625         struct pcpu *pc;
 1626         struct xstate_hdr *xhdr;
 1627         u_int64_t rsp0;
 1628         char *env;
 1629         struct region_descriptor r_gdt;
 1630         size_t kstack0_sz;
 1631         int late_console;
 1632 
 1633         TSRAW(&thread0, TS_ENTER, __func__, NULL);
 1634 
 1635         kmdp = init_ops.parse_preload_data(modulep);
 1636 
 1637         physfree += ucode_load_bsp(physfree + KERNBASE);
 1638         physfree = roundup2(physfree, PAGE_SIZE);
 1639 
 1640         identify_cpu1();
 1641         identify_hypervisor();
 1642         identify_cpu_fixup_bsp();
 1643         identify_cpu2();
 1644         initializecpucache();
 1645 
 1646         /*
 1647          * Check for pti, pcid, and invpcid before ifuncs are
 1648          * resolved, to correctly select the implementation for
 1649          * pmap_activate_sw_mode().
 1650          */
 1651         pti = pti_get_default();
 1652         TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
 1653         TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 1654         if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
 1655                 invpcid_works = (cpu_stdext_feature &
 1656                     CPUID_STDEXT_INVPCID) != 0;
 1657         } else {
 1658                 pmap_pcid_enabled = 0;
 1659         }
 1660 
 1661         link_elf_ireloc(kmdp);
 1662 
 1663         /*
 1664          * This may be done better later if it gets more high level
 1665          * components in it. If so just link td->td_proc here.
 1666          */
 1667         proc_linkup0(&proc0, &thread0);
 1668 
 1669         /* Init basic tunables, hz etc */
 1670         init_param1();
 1671 
 1672         thread0.td_kstack = physfree + KERNBASE;
 1673         thread0.td_kstack_pages = kstack_pages;
 1674         kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 1675         bzero((void *)thread0.td_kstack, kstack0_sz);
 1676         physfree += kstack0_sz;
 1677 
 1678         /*
 1679          * Initialize enough of thread0 for delayed invalidation to
 1680          * work very early.  Rely on thread0.td_base_pri
 1681          * zero-initialization, it is reset to PVM at proc0_init().
 1682          */
 1683         pmap_thread_init_invl_gen(&thread0);
 1684 
 1685         /*
 1686          * make gdt memory segments
 1687          */
 1688         for (x = 0; x < NGDT; x++) {
 1689                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 1690                     x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
 1691                         ssdtosd(&gdt_segs[x], &gdt[x]);
 1692         }
 1693         gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
 1694         ssdtosyssd(&gdt_segs[GPROC0_SEL],
 1695             (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 1696 
 1697         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 1698         r_gdt.rd_base =  (long) gdt;
 1699         lgdt(&r_gdt);
 1700         pc = &temp_bsp_pcpu;
 1701 
 1702         wrmsr(MSR_FSBASE, 0);           /* User value */
 1703         wrmsr(MSR_GSBASE, (u_int64_t)pc);
 1704         wrmsr(MSR_KGSBASE, 0);          /* User value while in the kernel */
 1705 
 1706         pcpu_init(pc, 0, sizeof(struct pcpu));
 1707         dpcpu_init((void *)(physfree + KERNBASE), 0);
 1708         physfree += DPCPU_SIZE;
 1709         amd64_bsp_pcpu_init1(pc);
 1710         /* Non-late cninit() and printf() can be moved up to here. */
 1711 
 1712         /*
 1713          * Initialize mutexes.
 1714          *
 1715          * icu_lock: in order to allow an interrupt to occur in a critical
 1716          *           section, to set pcpu->ipending (etc...) properly, we
 1717          *           must be able to get the icu lock, so it can't be
 1718          *           under witness.
 1719          */
 1720         mutex_init();
 1721         mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 1722         mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
 1723 
 1724         /* exceptions */
 1725         for (x = 0; x < NIDT; x++)
 1726                 setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT,
 1727                     SEL_KPL, 0);
 1728         setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
 1729             SEL_KPL, 0);
 1730         setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
 1731         setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
 1732         setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
 1733             SEL_UPL, 0);
 1734         setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT,
 1735             SEL_UPL, 0);
 1736         setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT,
 1737             SEL_KPL, 0);
 1738         setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT,
 1739             SEL_KPL, 0);
 1740         setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT,
 1741             SEL_KPL, 0);
 1742         setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
 1743         setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm),
 1744             SDT_SYSIGT, SEL_KPL, 0);
 1745         setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT,
 1746             SEL_KPL, 0);
 1747         setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing),
 1748             SDT_SYSIGT, SEL_KPL, 0);
 1749         setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT,
 1750             SEL_KPL, 0);
 1751         setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT,
 1752             SEL_KPL, 0);
 1753         setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT,
 1754             SEL_KPL, 0);
 1755         setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT,
 1756             SEL_KPL, 0);
 1757         setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT,
 1758             SEL_KPL, 0);
 1759         setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3);
 1760         setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT,
 1761             SEL_KPL, 0);
 1762 #ifdef KDTRACE_HOOKS
 1763         setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) :
 1764             &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
 1765 #endif
 1766 #ifdef XENHVM
 1767         setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) :
 1768             &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0);
 1769 #endif
 1770         r_idt.rd_limit = sizeof(idt0) - 1;
 1771         r_idt.rd_base = (long) idt;
 1772         lidt(&r_idt);
 1773 
 1774         /*
 1775          * Initialize the clock before the console so that console
 1776          * initialization can use DELAY().
 1777          */
 1778         clock_init();
 1779 
 1780         /*
 1781          * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4)
 1782          * transition).
 1783          * Once bootblocks have updated, we can test directly for
 1784          * efi_systbl != NULL here...
 1785          */
 1786         if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP)
 1787             != NULL)
 1788                 vty_set_preferred(VTY_VT);
 1789 
 1790         TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable);
 1791         TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable);
 1792         TUNABLE_INT_FETCH("machdep.syscall_ret_l1d_flush",
 1793             &syscall_ret_l1d_flush_mode);
 1794         TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable);
 1795         TUNABLE_INT_FETCH("machdep.mitigations.taa.enable", &x86_taa_enable);
 1796 
 1797         finishidentcpu();       /* Final stage of CPU initialization */
 1798         initializecpu();        /* Initialize CPU registers */
 1799 
 1800         amd64_bsp_ist_init(pc);
 1801         
 1802         /* Set the IO permission bitmap (empty due to tss seg limit) */
 1803         common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE;
 1804 
 1805         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 1806         ltr(gsel_tss);
 1807 
 1808         amd64_conf_fast_syscall();
 1809 
 1810         /*
 1811          * We initialize the PCB pointer early so that exception
 1812          * handlers will work.  Also set up td_critnest to short-cut
 1813          * the page fault handler.
 1814          */
 1815         cpu_max_ext_state_size = sizeof(struct savefpu);
 1816         set_top_of_stack_td(&thread0);
 1817         thread0.td_pcb = get_pcb_td(&thread0);
 1818         thread0.td_critnest = 1;
 1819 
 1820         /*
 1821          * The console and kdb should be initialized even earlier than here,
 1822          * but some console drivers don't work until after getmemsize().
 1823          * Default to late console initialization to support these drivers.
 1824          * This loses mainly printf()s in getmemsize() and early debugging.
 1825          */
 1826         late_console = 1;
 1827         TUNABLE_INT_FETCH("debug.late_console", &late_console);
 1828         if (!late_console) {
 1829                 cninit();
 1830                 amd64_kdb_init();
 1831         }
 1832 
 1833         getmemsize(kmdp, physfree);
 1834         init_param2(physmem);
 1835 
 1836         /* now running on new page tables, configured,and u/iom is accessible */
 1837 
 1838 #ifdef DEV_PCI
 1839         /* This call might adjust phys_avail[]. */
 1840         pci_early_quirks();
 1841 #endif
 1842 
 1843         if (late_console)
 1844                 cninit();
 1845 
 1846 #ifdef DEV_ISA
 1847 #ifdef DEV_ATPIC
 1848         elcr_probe();
 1849         atpic_startup();
 1850 #else
 1851         /* Reset and mask the atpics and leave them shut down. */
 1852         atpic_reset();
 1853 
 1854         /*
 1855          * Point the ICU spurious interrupt vectors at the APIC spurious
 1856          * interrupt handler.
 1857          */
 1858         setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1859         setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 1860 #endif
 1861 #else
 1862 #error "have you forgotten the isa device?";
 1863 #endif
 1864 
 1865         if (late_console)
 1866                 amd64_kdb_init();
 1867 
 1868         msgbufinit(msgbufp, msgbufsize);
 1869         fpuinit();
 1870 
 1871         /*
 1872          * Set up thread0 pcb save area after fpuinit calculated fpu save
 1873          * area size.  Zero out the extended state header in fpu save
 1874          * area.
 1875          */
 1876         thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 1877         bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 1878         if (use_xsave) {
 1879                 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 1880                     1);
 1881                 xhdr->xstate_bv = xsave_mask;
 1882         }
 1883         /* make an initial tss so cpu can get interrupt stack on syscall! */
 1884         rsp0 = thread0.td_md.md_stack_base;
 1885         /* Ensure the stack is aligned to 16 bytes */
 1886         rsp0 &= ~0xFul;
 1887         common_tss[0].tss_rsp0 = rsp0;
 1888         amd64_bsp_pcpu_init2(rsp0);
 1889 
 1890         /* transfer to user mode */
 1891 
 1892         _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 1893         _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 1894         _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
 1895         _ufssel = GSEL(GUFS32_SEL, SEL_UPL);
 1896         _ugssel = GSEL(GUGS32_SEL, SEL_UPL);
 1897 
 1898         load_ds(_udatasel);
 1899         load_es(_udatasel);
 1900         load_fs(_ufssel);
 1901 
 1902         /* setup proc 0's pcb */
 1903         thread0.td_pcb->pcb_flags = 0;
 1904         thread0.td_frame = &proc0_tf;
 1905 
 1906         env = kern_getenv("kernelname");
 1907         if (env != NULL)
 1908                 strlcpy(kernelname, env, sizeof(kernelname));
 1909 
 1910         cpu_probe_amdc1e();
 1911 
 1912 #ifdef FDT
 1913         x86_init_fdt();
 1914 #endif
 1915         thread0.td_critnest = 0;
 1916 
 1917         TSEXIT();
 1918 
 1919         /* Location of kernel stack for locore */
 1920         return (thread0.td_md.md_stack_base);
 1921 }
 1922 
 1923 void
 1924 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 1925 {
 1926 
 1927         pcpu->pc_acpi_id = 0xffffffff;
 1928 }
 1929 
 1930 static int
 1931 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1932 {
 1933         struct bios_smap *smapbase;
 1934         struct bios_smap_xattr smap;
 1935         caddr_t kmdp;
 1936         uint32_t *smapattr;
 1937         int count, error, i;
 1938 
 1939         /* Retrieve the system memory map from the loader. */
 1940         kmdp = preload_search_by_type("elf kernel");
 1941         if (kmdp == NULL)
 1942                 kmdp = preload_search_by_type("elf64 kernel");
 1943         smapbase = (struct bios_smap *)preload_search_info(kmdp,
 1944             MODINFO_METADATA | MODINFOMD_SMAP);
 1945         if (smapbase == NULL)
 1946                 return (0);
 1947         smapattr = (uint32_t *)preload_search_info(kmdp,
 1948             MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 1949         count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase);
 1950         error = 0;
 1951         for (i = 0; i < count; i++) {
 1952                 smap.base = smapbase[i].base;
 1953                 smap.length = smapbase[i].length;
 1954                 smap.type = smapbase[i].type;
 1955                 if (smapattr != NULL)
 1956                         smap.xattr = smapattr[i];
 1957                 else
 1958                         smap.xattr = 0;
 1959                 error = SYSCTL_OUT(req, &smap, sizeof(smap));
 1960         }
 1961         return (error);
 1962 }
 1963 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1964     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 1965 
 1966 static int
 1967 efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS)
 1968 {
 1969         struct efi_map_header *efihdr;
 1970         caddr_t kmdp;
 1971         uint32_t efisize;
 1972 
 1973         kmdp = preload_search_by_type("elf kernel");
 1974         if (kmdp == NULL)
 1975                 kmdp = preload_search_by_type("elf64 kernel");
 1976         efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 1977             MODINFO_METADATA | MODINFOMD_EFI_MAP);
 1978         if (efihdr == NULL)
 1979                 return (0);
 1980         efisize = *((uint32_t *)efihdr - 1);
 1981         return (SYSCTL_OUT(req, efihdr, efisize));
 1982 }
 1983 SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
 1984     efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map");
 1985 
 1986 void
 1987 spinlock_enter(void)
 1988 {
 1989         struct thread *td;
 1990         register_t flags;
 1991 
 1992         td = curthread;
 1993         if (td->td_md.md_spinlock_count == 0) {
 1994                 flags = intr_disable();
 1995                 td->td_md.md_spinlock_count = 1;
 1996                 td->td_md.md_saved_flags = flags;
 1997                 critical_enter();
 1998         } else
 1999                 td->td_md.md_spinlock_count++;
 2000 }
 2001 
 2002 void
 2003 spinlock_exit(void)
 2004 {
 2005         struct thread *td;
 2006         register_t flags;
 2007 
 2008         td = curthread;
 2009         flags = td->td_md.md_saved_flags;
 2010         td->td_md.md_spinlock_count--;
 2011         if (td->td_md.md_spinlock_count == 0) {
 2012                 critical_exit();
 2013                 intr_restore(flags);
 2014         }
 2015 }
 2016 
 2017 /*
 2018  * Construct a PCB from a trapframe. This is called from kdb_trap() where
 2019  * we want to start a backtrace from the function that caused us to enter
 2020  * the debugger. We have the context in the trapframe, but base the trace
 2021  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
 2022  * enough for a backtrace.
 2023  */
 2024 void
 2025 makectx(struct trapframe *tf, struct pcb *pcb)
 2026 {
 2027 
 2028         pcb->pcb_r12 = tf->tf_r12;
 2029         pcb->pcb_r13 = tf->tf_r13;
 2030         pcb->pcb_r14 = tf->tf_r14;
 2031         pcb->pcb_r15 = tf->tf_r15;
 2032         pcb->pcb_rbp = tf->tf_rbp;
 2033         pcb->pcb_rbx = tf->tf_rbx;
 2034         pcb->pcb_rip = tf->tf_rip;
 2035         pcb->pcb_rsp = tf->tf_rsp;
 2036 }
 2037 
 2038 int
 2039 ptrace_set_pc(struct thread *td, unsigned long addr)
 2040 {
 2041 
 2042         td->td_frame->tf_rip = addr;
 2043         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2044         return (0);
 2045 }
 2046 
 2047 int
 2048 ptrace_single_step(struct thread *td)
 2049 {
 2050 
 2051         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2052         if ((td->td_frame->tf_rflags & PSL_T) == 0) {
 2053                 td->td_frame->tf_rflags |= PSL_T;
 2054                 td->td_dbgflags |= TDB_STEP;
 2055         }
 2056         return (0);
 2057 }
 2058 
 2059 int
 2060 ptrace_clear_single_step(struct thread *td)
 2061 {
 2062 
 2063         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 2064         td->td_frame->tf_rflags &= ~PSL_T;
 2065         td->td_dbgflags &= ~TDB_STEP;
 2066         return (0);
 2067 }
 2068 
 2069 int
 2070 fill_regs(struct thread *td, struct reg *regs)
 2071 {
 2072         struct trapframe *tp;
 2073 
 2074         tp = td->td_frame;
 2075         return (fill_frame_regs(tp, regs));
 2076 }
 2077 
 2078 int
 2079 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 2080 {
 2081 
 2082         regs->r_r15 = tp->tf_r15;
 2083         regs->r_r14 = tp->tf_r14;
 2084         regs->r_r13 = tp->tf_r13;
 2085         regs->r_r12 = tp->tf_r12;
 2086         regs->r_r11 = tp->tf_r11;
 2087         regs->r_r10 = tp->tf_r10;
 2088         regs->r_r9  = tp->tf_r9;
 2089         regs->r_r8  = tp->tf_r8;
 2090         regs->r_rdi = tp->tf_rdi;
 2091         regs->r_rsi = tp->tf_rsi;
 2092         regs->r_rbp = tp->tf_rbp;
 2093         regs->r_rbx = tp->tf_rbx;
 2094         regs->r_rdx = tp->tf_rdx;
 2095         regs->r_rcx = tp->tf_rcx;
 2096         regs->r_rax = tp->tf_rax;
 2097         regs->r_rip = tp->tf_rip;
 2098         regs->r_cs = tp->tf_cs;
 2099         regs->r_rflags = tp->tf_rflags;
 2100         regs->r_rsp = tp->tf_rsp;
 2101         regs->r_ss = tp->tf_ss;
 2102         if (tp->tf_flags & TF_HASSEGS) {
 2103                 regs->r_ds = tp->tf_ds;
 2104                 regs->r_es = tp->tf_es;
 2105                 regs->r_fs = tp->tf_fs;
 2106                 regs->r_gs = tp->tf_gs;
 2107         } else {
 2108                 regs->r_ds = 0;
 2109                 regs->r_es = 0;
 2110                 regs->r_fs = 0;
 2111                 regs->r_gs = 0;
 2112         }
 2113         regs->r_err = 0;
 2114         regs->r_trapno = 0;
 2115         return (0);
 2116 }
 2117 
 2118 int
 2119 set_regs(struct thread *td, struct reg *regs)
 2120 {
 2121         struct trapframe *tp;
 2122         register_t rflags;
 2123 
 2124         tp = td->td_frame;
 2125         rflags = regs->r_rflags & 0xffffffff;
 2126         if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 2127                 return (EINVAL);
 2128         tp->tf_r15 = regs->r_r15;
 2129         tp->tf_r14 = regs->r_r14;
 2130         tp->tf_r13 = regs->r_r13;
 2131         tp->tf_r12 = regs->r_r12;
 2132         tp->tf_r11 = regs->r_r11;
 2133         tp->tf_r10 = regs->r_r10;
 2134         tp->tf_r9  = regs->r_r9;
 2135         tp->tf_r8  = regs->r_r8;
 2136         tp->tf_rdi = regs->r_rdi;
 2137         tp->tf_rsi = regs->r_rsi;
 2138         tp->tf_rbp = regs->r_rbp;
 2139         tp->tf_rbx = regs->r_rbx;
 2140         tp->tf_rdx = regs->r_rdx;
 2141         tp->tf_rcx = regs->r_rcx;
 2142         tp->tf_rax = regs->r_rax;
 2143         tp->tf_rip = regs->r_rip;
 2144         tp->tf_cs = regs->r_cs;
 2145         tp->tf_rflags = rflags;
 2146         tp->tf_rsp = regs->r_rsp;
 2147         tp->tf_ss = regs->r_ss;
 2148         if (0) {        /* XXXKIB */
 2149                 tp->tf_ds = regs->r_ds;
 2150                 tp->tf_es = regs->r_es;
 2151                 tp->tf_fs = regs->r_fs;
 2152                 tp->tf_gs = regs->r_gs;
 2153                 tp->tf_flags = TF_HASSEGS;
 2154         }
 2155         set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 2156         return (0);
 2157 }
 2158 
 2159 /* XXX check all this stuff! */
 2160 /* externalize from sv_xmm */
 2161 static void
 2162 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 2163 {
 2164         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2165         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2166         int i;
 2167 
 2168         /* pcb -> fpregs */
 2169         bzero(fpregs, sizeof(*fpregs));
 2170 
 2171         /* FPU control/status */
 2172         penv_fpreg->en_cw = penv_xmm->en_cw;
 2173         penv_fpreg->en_sw = penv_xmm->en_sw;
 2174         penv_fpreg->en_tw = penv_xmm->en_tw;
 2175         penv_fpreg->en_opcode = penv_xmm->en_opcode;
 2176         penv_fpreg->en_rip = penv_xmm->en_rip;
 2177         penv_fpreg->en_rdp = penv_xmm->en_rdp;
 2178         penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 2179         penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 2180 
 2181         /* FPU registers */
 2182         for (i = 0; i < 8; ++i)
 2183                 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 2184 
 2185         /* SSE registers */
 2186         for (i = 0; i < 16; ++i)
 2187                 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 2188 }
 2189 
 2190 /* internalize from fpregs into sv_xmm */
 2191 static void
 2192 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 2193 {
 2194         struct envxmm *penv_xmm = &sv_xmm->sv_env;
 2195         struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 2196         int i;
 2197 
 2198         /* fpregs -> pcb */
 2199         /* FPU control/status */
 2200         penv_xmm->en_cw = penv_fpreg->en_cw;
 2201         penv_xmm->en_sw = penv_fpreg->en_sw;
 2202         penv_xmm->en_tw = penv_fpreg->en_tw;
 2203         penv_xmm->en_opcode = penv_fpreg->en_opcode;
 2204         penv_xmm->en_rip = penv_fpreg->en_rip;
 2205         penv_xmm->en_rdp = penv_fpreg->en_rdp;
 2206         penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 2207         penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 2208 
 2209         /* FPU registers */
 2210         for (i = 0; i < 8; ++i)
 2211                 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 2212 
 2213         /* SSE registers */
 2214         for (i = 0; i < 16; ++i)
 2215                 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 2216 }
 2217 
 2218 /* externalize from td->pcb */
 2219 int
 2220 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 2221 {
 2222 
 2223         KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 2224             P_SHOULDSTOP(td->td_proc),
 2225             ("not suspended thread %p", td));
 2226         fpugetregs(td);
 2227         fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 2228         return (0);
 2229 }
 2230 
 2231 /* internalize to td->pcb */
 2232 int
 2233 set_fpregs(struct thread *td, struct fpreg *fpregs)
 2234 {
 2235 
 2236         critical_enter();
 2237         set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 2238         fpuuserinited(td);
 2239         critical_exit();
 2240         return (0);
 2241 }
 2242 
 2243 /*
 2244  * Get machine context.
 2245  */
 2246 int
 2247 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 2248 {
 2249         struct pcb *pcb;
 2250         struct trapframe *tp;
 2251 
 2252         pcb = td->td_pcb;
 2253         tp = td->td_frame;
 2254         PROC_LOCK(curthread->td_proc);
 2255         mcp->mc_onstack = sigonstack(tp->tf_rsp);
 2256         PROC_UNLOCK(curthread->td_proc);
 2257         mcp->mc_r15 = tp->tf_r15;
 2258         mcp->mc_r14 = tp->tf_r14;
 2259         mcp->mc_r13 = tp->tf_r13;
 2260         mcp->mc_r12 = tp->tf_r12;
 2261         mcp->mc_r11 = tp->tf_r11;
 2262         mcp->mc_r10 = tp->tf_r10;
 2263         mcp->mc_r9  = tp->tf_r9;
 2264         mcp->mc_r8  = tp->tf_r8;
 2265         mcp->mc_rdi = tp->tf_rdi;
 2266         mcp->mc_rsi = tp->tf_rsi;
 2267         mcp->mc_rbp = tp->tf_rbp;
 2268         mcp->mc_rbx = tp->tf_rbx;
 2269         mcp->mc_rcx = tp->tf_rcx;
 2270         mcp->mc_rflags = tp->tf_rflags;
 2271         if (flags & GET_MC_CLEAR_RET) {
 2272                 mcp->mc_rax = 0;
 2273                 mcp->mc_rdx = 0;
 2274                 mcp->mc_rflags &= ~PSL_C;
 2275         } else {
 2276                 mcp->mc_rax = tp->tf_rax;
 2277                 mcp->mc_rdx = tp->tf_rdx;
 2278         }
 2279         mcp->mc_rip = tp->tf_rip;
 2280         mcp->mc_cs = tp->tf_cs;
 2281         mcp->mc_rsp = tp->tf_rsp;
 2282         mcp->mc_ss = tp->tf_ss;
 2283         mcp->mc_ds = tp->tf_ds;
 2284         mcp->mc_es = tp->tf_es;
 2285         mcp->mc_fs = tp->tf_fs;
 2286         mcp->mc_gs = tp->tf_gs;
 2287         mcp->mc_flags = tp->tf_flags;
 2288         mcp->mc_len = sizeof(*mcp);
 2289         get_fpcontext(td, mcp, NULL, 0);
 2290         update_pcb_bases(pcb);
 2291         mcp->mc_fsbase = pcb->pcb_fsbase;
 2292         mcp->mc_gsbase = pcb->pcb_gsbase;
 2293         mcp->mc_xfpustate = 0;
 2294         mcp->mc_xfpustate_len = 0;
 2295         bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 2296         return (0);
 2297 }
 2298 
 2299 /*
 2300  * Set machine context.
 2301  *
 2302  * However, we don't set any but the user modifiable flags, and we won't
 2303  * touch the cs selector.
 2304  */
 2305 int
 2306 set_mcontext(struct thread *td, mcontext_t *mcp)
 2307 {
 2308         struct pcb *pcb;
 2309         struct trapframe *tp;
 2310         char *xfpustate;
 2311         long rflags;
 2312         int ret;
 2313 
 2314         pcb = td->td_pcb;
 2315         tp = td->td_frame;
 2316         if (mcp->mc_len != sizeof(*mcp) ||
 2317             (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 2318                 return (EINVAL);
 2319         rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 2320             (tp->tf_rflags & ~PSL_USERCHANGE);
 2321         if (mcp->mc_flags & _MC_HASFPXSTATE) {
 2322                 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 2323                     sizeof(struct savefpu))
 2324                         return (EINVAL);
 2325                 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 2326                 ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 2327                     mcp->mc_xfpustate_len);
 2328                 if (ret != 0)
 2329                         return (ret);
 2330         } else
 2331                 xfpustate = NULL;
 2332         ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 2333         if (ret != 0)
 2334                 return (ret);
 2335         tp->tf_r15 = mcp->mc_r15;
 2336         tp->tf_r14 = mcp->mc_r14;
 2337         tp->tf_r13 = mcp->mc_r13;
 2338         tp->tf_r12 = mcp->mc_r12;
 2339         tp->tf_r11 = mcp->mc_r11;
 2340         tp->tf_r10 = mcp->mc_r10;
 2341         tp->tf_r9  = mcp->mc_r9;
 2342         tp->tf_r8  = mcp->mc_r8;
 2343         tp->tf_rdi = mcp->mc_rdi;
 2344         tp->tf_rsi = mcp->mc_rsi;
 2345         tp->tf_rbp = mcp->mc_rbp;
 2346         tp->tf_rbx = mcp->mc_rbx;
 2347         tp->tf_rdx = mcp->mc_rdx;
 2348         tp->tf_rcx = mcp->mc_rcx;
 2349         tp->tf_rax = mcp->mc_rax;
 2350         tp->tf_rip = mcp->mc_rip;
 2351         tp->tf_rflags = rflags;
 2352         tp->tf_rsp = mcp->mc_rsp;
 2353         tp->tf_ss = mcp->mc_ss;
 2354         tp->tf_flags = mcp->mc_flags;
 2355         if (tp->tf_flags & TF_HASSEGS) {
 2356                 tp->tf_ds = mcp->mc_ds;
 2357                 tp->tf_es = mcp->mc_es;
 2358                 tp->tf_fs = mcp->mc_fs;
 2359                 tp->tf_gs = mcp->mc_gs;
 2360         }
 2361         set_pcb_flags(pcb, PCB_FULL_IRET);
 2362         if (mcp->mc_flags & _MC_HASBASES) {
 2363                 pcb->pcb_fsbase = mcp->mc_fsbase;
 2364                 pcb->pcb_gsbase = mcp->mc_gsbase;
 2365         }
 2366         return (0);
 2367 }
 2368 
 2369 static void
 2370 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
 2371     size_t xfpusave_len)
 2372 {
 2373         size_t max_len, len;
 2374 
 2375         mcp->mc_ownedfp = fpugetregs(td);
 2376         bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 2377             sizeof(mcp->mc_fpstate));
 2378         mcp->mc_fpformat = fpuformat();
 2379         if (!use_xsave || xfpusave_len == 0)
 2380                 return;
 2381         max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 2382         len = xfpusave_len;
 2383         if (len > max_len) {
 2384                 len = max_len;
 2385                 bzero(xfpusave + max_len, len - max_len);
 2386         }
 2387         mcp->mc_flags |= _MC_HASFPXSTATE;
 2388         mcp->mc_xfpustate_len = len;
 2389         bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 2390 }
 2391 
 2392 static int
 2393 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
 2394     size_t xfpustate_len)
 2395 {
 2396         int error;
 2397 
 2398         if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 2399                 return (0);
 2400         else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 2401                 return (EINVAL);
 2402         else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 2403                 /* We don't care what state is left in the FPU or PCB. */
 2404                 fpstate_drop(td);
 2405                 error = 0;
 2406         } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 2407             mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 2408                 error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
 2409                     xfpustate, xfpustate_len);
 2410         } else
 2411                 return (EINVAL);
 2412         return (error);
 2413 }
 2414 
 2415 void
 2416 fpstate_drop(struct thread *td)
 2417 {
 2418 
 2419         KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 2420         critical_enter();
 2421         if (PCPU_GET(fpcurthread) == td)
 2422                 fpudrop();
 2423         /*
 2424          * XXX force a full drop of the fpu.  The above only drops it if we
 2425          * owned it.
 2426          *
 2427          * XXX I don't much like fpugetuserregs()'s semantics of doing a full
 2428          * drop.  Dropping only to the pcb matches fnsave's behaviour.
 2429          * We only need to drop to !PCB_INITDONE in sendsig().  But
 2430          * sendsig() is the only caller of fpugetuserregs()... perhaps we just
 2431          * have too many layers.
 2432          */
 2433         clear_pcb_flags(curthread->td_pcb,
 2434             PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 2435         critical_exit();
 2436 }
 2437 
 2438 int
 2439 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 2440 {
 2441         struct pcb *pcb;
 2442 
 2443         if (td == NULL) {
 2444                 dbregs->dr[0] = rdr0();
 2445                 dbregs->dr[1] = rdr1();
 2446                 dbregs->dr[2] = rdr2();
 2447                 dbregs->dr[3] = rdr3();
 2448                 dbregs->dr[6] = rdr6();
 2449                 dbregs->dr[7] = rdr7();
 2450         } else {
 2451                 pcb = td->td_pcb;
 2452                 dbregs->dr[0] = pcb->pcb_dr0;
 2453                 dbregs->dr[1] = pcb->pcb_dr1;
 2454                 dbregs->dr[2] = pcb->pcb_dr2;
 2455                 dbregs->dr[3] = pcb->pcb_dr3;
 2456                 dbregs->dr[6] = pcb->pcb_dr6;
 2457                 dbregs->dr[7] = pcb->pcb_dr7;
 2458         }
 2459         dbregs->dr[4] = 0;
 2460         dbregs->dr[5] = 0;
 2461         dbregs->dr[8] = 0;
 2462         dbregs->dr[9] = 0;
 2463         dbregs->dr[10] = 0;
 2464         dbregs->dr[11] = 0;
 2465         dbregs->dr[12] = 0;
 2466         dbregs->dr[13] = 0;
 2467         dbregs->dr[14] = 0;
 2468         dbregs->dr[15] = 0;
 2469         return (0);
 2470 }
 2471 
 2472 int
 2473 set_dbregs(struct thread *td, struct dbreg *dbregs)
 2474 {
 2475         struct pcb *pcb;
 2476         int i;
 2477 
 2478         if (td == NULL) {
 2479                 load_dr0(dbregs->dr[0]);
 2480                 load_dr1(dbregs->dr[1]);
 2481                 load_dr2(dbregs->dr[2]);
 2482                 load_dr3(dbregs->dr[3]);
 2483                 load_dr6(dbregs->dr[6]);
 2484                 load_dr7(dbregs->dr[7]);
 2485         } else {
 2486                 /*
 2487                  * Don't let an illegal value for dr7 get set.  Specifically,
 2488                  * check for undefined settings.  Setting these bit patterns
 2489                  * result in undefined behaviour and can lead to an unexpected
 2490                  * TRCTRAP or a general protection fault right here.
 2491                  * Upper bits of dr6 and dr7 must not be set
 2492                  */
 2493                 for (i = 0; i < 4; i++) {
 2494                         if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 2495                                 return (EINVAL);
 2496                         if (td->td_frame->tf_cs == _ucode32sel &&
 2497                             DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
 2498                                 return (EINVAL);
 2499                 }
 2500                 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 2501                     (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 2502                         return (EINVAL);
 2503 
 2504                 pcb = td->td_pcb;
 2505 
 2506                 /*
 2507                  * Don't let a process set a breakpoint that is not within the
 2508                  * process's address space.  If a process could do this, it
 2509                  * could halt the system by setting a breakpoint in the kernel
 2510                  * (if ddb was enabled).  Thus, we need to check to make sure
 2511                  * that no breakpoints are being enabled for addresses outside
 2512                  * process's address space.
 2513                  *
 2514                  * XXX - what about when the watched area of the user's
 2515                  * address space is written into from within the kernel
 2516                  * ... wouldn't that still cause a breakpoint to be generated
 2517                  * from within kernel mode?
 2518                  */
 2519 
 2520                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 2521                         /* dr0 is enabled */
 2522                         if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 2523                                 return (EINVAL);
 2524                 }
 2525                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 2526                         /* dr1 is enabled */
 2527                         if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 2528                                 return (EINVAL);
 2529                 }
 2530                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 2531                         /* dr2 is enabled */
 2532                         if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 2533                                 return (EINVAL);
 2534                 }
 2535                 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 2536                         /* dr3 is enabled */
 2537                         if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 2538                                 return (EINVAL);
 2539                 }
 2540 
 2541                 pcb->pcb_dr0 = dbregs->dr[0];
 2542                 pcb->pcb_dr1 = dbregs->dr[1];
 2543                 pcb->pcb_dr2 = dbregs->dr[2];
 2544                 pcb->pcb_dr3 = dbregs->dr[3];
 2545                 pcb->pcb_dr6 = dbregs->dr[6];
 2546                 pcb->pcb_dr7 = dbregs->dr[7];
 2547 
 2548                 set_pcb_flags(pcb, PCB_DBREGS);
 2549         }
 2550 
 2551         return (0);
 2552 }
 2553 
 2554 void
 2555 reset_dbregs(void)
 2556 {
 2557 
 2558         load_dr7(0);    /* Turn off the control bits first */
 2559         load_dr0(0);
 2560         load_dr1(0);
 2561         load_dr2(0);
 2562         load_dr3(0);
 2563         load_dr6(0);
 2564 }
 2565 
 2566 /*
 2567  * Return > 0 if a hardware breakpoint has been hit, and the
 2568  * breakpoint was in user space.  Return 0, otherwise.
 2569  */
 2570 int
 2571 user_dbreg_trap(register_t dr6)
 2572 {
 2573         u_int64_t dr7;
 2574         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
 2575         int nbp;            /* number of breakpoints that triggered */
 2576         caddr_t addr[4];    /* breakpoint addresses */
 2577         int i;
 2578 
 2579         bp = dr6 & DBREG_DR6_BMASK;
 2580         if (bp == 0) {
 2581                 /*
 2582                  * None of the breakpoint bits are set meaning this
 2583                  * trap was not caused by any of the debug registers
 2584                  */
 2585                 return 0;
 2586         }
 2587 
 2588         dr7 = rdr7();
 2589         if ((dr7 & 0x000000ff) == 0) {
 2590                 /*
 2591                  * all GE and LE bits in the dr7 register are zero,
 2592                  * thus the trap couldn't have been caused by the
 2593                  * hardware debug registers
 2594                  */
 2595                 return 0;
 2596         }
 2597 
 2598         nbp = 0;
 2599 
 2600         /*
 2601          * at least one of the breakpoints were hit, check to see
 2602          * which ones and if any of them are user space addresses
 2603          */
 2604 
 2605         if (bp & 0x01) {
 2606                 addr[nbp++] = (caddr_t)rdr0();
 2607         }
 2608         if (bp & 0x02) {
 2609                 addr[nbp++] = (caddr_t)rdr1();
 2610         }
 2611         if (bp & 0x04) {
 2612                 addr[nbp++] = (caddr_t)rdr2();
 2613         }
 2614         if (bp & 0x08) {
 2615                 addr[nbp++] = (caddr_t)rdr3();
 2616         }
 2617 
 2618         for (i = 0; i < nbp; i++) {
 2619                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
 2620                         /*
 2621                          * addr[i] is in user space
 2622                          */
 2623                         return nbp;
 2624                 }
 2625         }
 2626 
 2627         /*
 2628          * None of the breakpoints are in user space.
 2629          */
 2630         return 0;
 2631 }
 2632 
 2633 /*
 2634  * The pcb_flags is only modified by current thread, or by other threads
 2635  * when current thread is stopped.  However, current thread may change it
 2636  * from the interrupt context in cpu_switch(), or in the trap handler.
 2637  * When we read-modify-write pcb_flags from C sources, compiler may generate
 2638  * code that is not atomic regarding the interrupt handler.  If a trap or
 2639  * interrupt happens and any flag is modified from the handler, it can be
 2640  * clobbered with the cached value later.  Therefore, we implement setting
 2641  * and clearing flags with single-instruction functions, which do not race
 2642  * with possible modification of the flags from the trap or interrupt context,
 2643  * because traps and interrupts are executed only on instruction boundary.
 2644  */
 2645 void
 2646 set_pcb_flags_raw(struct pcb *pcb, const u_int flags)
 2647 {
 2648 
 2649         __asm __volatile("orl %1,%0"
 2650             : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
 2651             : "cc", "memory");
 2652 
 2653 }
 2654 
 2655 /*
 2656  * The support for RDFSBASE, WRFSBASE and similar instructions for %gs
 2657  * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into
 2658  * pcb if user space modified the bases.  We must save on the context
 2659  * switch or if the return to usermode happens through the doreti.
 2660  *
 2661  * Tracking of both events is performed by the pcb flag PCB_FULL_IRET,
 2662  * which have a consequence that the base MSRs must be saved each time
 2663  * the PCB_FULL_IRET flag is set.  We disable interrupts to sync with
 2664  * context switches.
 2665  */
 2666 static void
 2667 set_pcb_flags_fsgsbase(struct pcb *pcb, const u_int flags)
 2668 {
 2669         register_t r;
 2670 
 2671         if (curpcb == pcb &&
 2672             (flags & PCB_FULL_IRET) != 0 &&
 2673             (pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2674                 r = intr_disable();
 2675                 if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) {
 2676                         if (rfs() == _ufssel)
 2677                                 pcb->pcb_fsbase = rdfsbase();
 2678                         if (rgs() == _ugssel)
 2679                                 pcb->pcb_gsbase = rdmsr(MSR_KGSBASE);
 2680                 }
 2681                 set_pcb_flags_raw(pcb, flags);
 2682                 intr_restore(r);
 2683         } else {
 2684                 set_pcb_flags_raw(pcb, flags);
 2685         }
 2686 }
 2687 
 2688 DEFINE_IFUNC(, void, set_pcb_flags, (struct pcb *, const u_int), static)
 2689 {
 2690 
 2691         return ((cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0 ?
 2692             set_pcb_flags_fsgsbase : set_pcb_flags_raw);
 2693 }
 2694 
 2695 void
 2696 clear_pcb_flags(struct pcb *pcb, const u_int flags)
 2697 {
 2698 
 2699         __asm __volatile("andl %1,%0"
 2700             : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
 2701             : "cc", "memory");
 2702 }
 2703 
 2704 #ifdef KDB
 2705 
 2706 /*
 2707  * Provide inb() and outb() as functions.  They are normally only available as
 2708  * inline functions, thus cannot be called from the debugger.
 2709  */
 2710 
 2711 /* silence compiler warnings */
 2712 u_char inb_(u_short);
 2713 void outb_(u_short, u_char);
 2714 
 2715 u_char
 2716 inb_(u_short port)
 2717 {
 2718         return inb(port);
 2719 }
 2720 
 2721 void
 2722 outb_(u_short port, u_char data)
 2723 {
 2724         outb(port, data);
 2725 }
 2726 
 2727 #endif /* KDB */
 2728 
 2729 #undef memset
 2730 #undef memmove
 2731 #undef memcpy
 2732 
 2733 void    *memset_std(void *buf, int c, size_t len);
 2734 void    *memset_erms(void *buf, int c, size_t len);
 2735 DEFINE_IFUNC(, void *, memset, (void *, int, size_t), static)
 2736 {
 2737 
 2738         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2739             memset_erms : memset_std);
 2740 }
 2741 
 2742 void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
 2743             size_t len);
 2744 void    *memmove_erms(void * _Nonnull dst, const void * _Nonnull src,
 2745             size_t len);
 2746 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
 2747     size_t), static)
 2748 {
 2749 
 2750         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2751             memmove_erms : memmove_std);
 2752 }
 2753 
 2754 void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
 2755             size_t len);
 2756 void    *memcpy_erms(void * _Nonnull dst, const void * _Nonnull src,
 2757             size_t len);
 2758 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,size_t),
 2759     static)
 2760 {
 2761 
 2762         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2763             memcpy_erms : memcpy_std);
 2764 }
 2765 
 2766 void    pagezero_std(void *addr);
 2767 void    pagezero_erms(void *addr);
 2768 DEFINE_IFUNC(, void , pagezero, (void *), static)
 2769 {
 2770 
 2771         return ((cpu_stdext_feature & CPUID_STDEXT_ERMS) != 0 ?
 2772             pagezero_erms : pagezero_std);
 2773 }

Cache object: b25f8d4c88c0f10f0678f2acdec90236


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.