The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/exception.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1990 William F. Jolitz.
    3  * Copyright (c) 1990 The Regents of the University of California.
    4  * Copyright (c) 2007-2018 The FreeBSD Foundation
    5  * All rights reserved.
    6  *
    7  * Portions of this software were developed by A. Joseph Koshy under
    8  * sponsorship from the FreeBSD Foundation and Google, Inc.
    9  *
   10  * Portions of this software were developed by
   11  * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
   12  * the FreeBSD Foundation.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  * $FreeBSD$
   39  */
   40 
   41 #include "opt_atpic.h"
   42 #include "opt_hwpmc_hooks.h"
   43 
   44 #include "assym.inc"
   45 
   46 #include <machine/psl.h>
   47 #include <machine/asmacros.h>
   48 #include <machine/trap.h>
   49 #include <machine/specialreg.h>
   50 #include <machine/pmap.h>
   51 
   52 #ifdef KDTRACE_HOOKS
   53         .bss
   54         .globl  dtrace_invop_jump_addr
   55         .align  8
   56         .type   dtrace_invop_jump_addr,@object
   57         .size   dtrace_invop_jump_addr,8
   58 dtrace_invop_jump_addr:
   59         .zero   8
   60         .globl  dtrace_invop_calltrap_addr
   61         .align  8
   62         .type   dtrace_invop_calltrap_addr,@object
   63         .size   dtrace_invop_calltrap_addr,8
   64 dtrace_invop_calltrap_addr:
   65         .zero   8
   66 #endif
   67         .text
   68 #ifdef HWPMC_HOOKS
   69         ENTRY(start_exceptions)
   70 #endif
   71 
   72 /*****************************************************************************/
   73 /* Trap handling                                                             */
   74 /*****************************************************************************/
   75 /*
   76  * Trap and fault vector routines.
   77  *
   78  * All traps are 'interrupt gates', SDT_SYSIGT.  An interrupt gate pushes
   79  * state on the stack but also disables interrupts.  This is important for
   80  * us for the use of the swapgs instruction.  We cannot be interrupted
   81  * until the GS.base value is correct.  For most traps, we automatically
   82  * then enable interrupts if the interrupted context had them enabled.
   83  * This is equivalent to the i386 port's use of SDT_SYS386TGT.
   84  *
   85  * The cpu will push a certain amount of state onto the kernel stack for
   86  * the current process.  See amd64/include/frame.h.
   87  * This includes the current RFLAGS (status register, which includes
   88  * the interrupt disable state prior to the trap), the code segment register,
   89  * and the return instruction pointer are pushed by the cpu.  The cpu
   90  * will also push an 'error' code for certain traps.  We push a dummy
   91  * error code for those traps where the cpu doesn't in order to maintain
   92  * a consistent frame.  We also push a contrived 'trap number'.
   93  *
   94  * The CPU does not push the general registers, so we must do that, and we
   95  * must restore them prior to calling 'iret'.  The CPU adjusts %cs and %ss
   96  * but does not mess with %ds, %es, %gs or %fs.  We swap the %gs base for
   97  * for the kernel mode operation shortly, without changes to the selector
   98  * loaded.  Since superuser long mode works with any selectors loaded into
   99  * segment registers other then %cs, which makes them mostly unused in long
  100  * mode, and kernel does not reference %fs, leave them alone.  The segment
  101  * registers are reloaded on return to the usermode.
  102  */
  103 
  104 /* Traps that we leave interrupts disabled for. */
  105         .macro  TRAP_NOEN       l, trapno
  106         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u
  107 \l\()_pti_k:
  108         subq    $TF_RIP,%rsp
  109         movl    $\trapno,TF_TRAPNO(%rsp)
  110         movq    $0,TF_ADDR(%rsp)
  111         movq    $0,TF_ERR(%rsp)
  112         jmp     alltraps_noen_k
  113 \l\()_pti_u:
  114         subq    $TF_RIP,%rsp
  115         movl    $\trapno,TF_TRAPNO(%rsp)
  116         movq    $0,TF_ADDR(%rsp)
  117         movq    $0,TF_ERR(%rsp)
  118         jmp     alltraps_noen_u
  119         
  120         .globl  X\l
  121         .type   X\l,@function
  122 X\l:
  123         subq    $TF_RIP,%rsp
  124         movl    $\trapno,TF_TRAPNO(%rsp)
  125         movq    $0,TF_ADDR(%rsp)
  126         movq    $0,TF_ERR(%rsp)
  127         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  128         jz      alltraps_noen_k
  129         swapgs
  130         lfence
  131         jmp     alltraps_noen_u
  132         .endm
  133 
  134         TRAP_NOEN       bpt, T_BPTFLT
  135 #ifdef KDTRACE_HOOKS
  136         TRAP_NOEN       dtrace_ret, T_DTRACE_RET
  137 #endif
  138 
  139 /* Regular traps; The cpu does not supply tf_err for these. */
  140         .macro  TRAP    l, trapno
  141         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u
  142 \l\()_pti_k:
  143         subq    $TF_RIP,%rsp
  144         movl    $\trapno,TF_TRAPNO(%rsp)
  145         movq    $0,TF_ADDR(%rsp)
  146         movq    $0,TF_ERR(%rsp)
  147         jmp     alltraps_k
  148 \l\()_pti_u:
  149         subq    $TF_RIP,%rsp
  150         movl    $\trapno,TF_TRAPNO(%rsp)
  151         movq    $0,TF_ADDR(%rsp)
  152         movq    $0,TF_ERR(%rsp)
  153         jmp     alltraps_u
  154 
  155         .globl  X\l
  156         .type   X\l,@function
  157 X\l:
  158         subq    $TF_RIP,%rsp
  159         movl    $\trapno,TF_TRAPNO(%rsp)
  160         movq    $0,TF_ADDR(%rsp)
  161         movq    $0,TF_ERR(%rsp)
  162         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  163         jz      alltraps_k
  164         swapgs
  165         lfence
  166         jmp     alltraps_u
  167         .endm
  168 
  169         TRAP    div, T_DIVIDE
  170         TRAP    ofl, T_OFLOW
  171         TRAP    bnd, T_BOUND
  172         TRAP    ill, T_PRIVINFLT
  173         TRAP    dna, T_DNA
  174         TRAP    fpusegm, T_FPOPFLT
  175         TRAP    rsvd, T_RESERVED
  176         TRAP    fpu, T_ARITHTRAP
  177         TRAP    xmm, T_XMMFLT
  178 
  179 /* This group of traps have tf_err already pushed by the cpu. */
  180         .macro  TRAP_ERR        l, trapno
  181         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u,has_err=1
  182 \l\()_pti_k:
  183         subq    $TF_ERR,%rsp
  184         movl    $\trapno,TF_TRAPNO(%rsp)
  185         movq    $0,TF_ADDR(%rsp)
  186         jmp     alltraps_k
  187 \l\()_pti_u:
  188         subq    $TF_ERR,%rsp
  189         movl    $\trapno,TF_TRAPNO(%rsp)
  190         movq    $0,TF_ADDR(%rsp)
  191         jmp     alltraps_u
  192         .globl  X\l
  193         .type   X\l,@function
  194 X\l:
  195         subq    $TF_ERR,%rsp
  196         movl    $\trapno,TF_TRAPNO(%rsp)
  197         movq    $0,TF_ADDR(%rsp)
  198         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  199         jz      alltraps_k
  200         swapgs
  201         lfence
  202         jmp     alltraps_u
  203         .endm
  204 
  205         TRAP_ERR        tss, T_TSSFLT
  206         TRAP_ERR        align, T_ALIGNFLT
  207 
  208         /*
  209          * alltraps_u/k entry points.
  210          * SWAPGS must be already performed by prologue,
  211          * if this is the first time in the kernel from userland.
  212          * Reenable interrupts if they were enabled before the trap.
  213          * This approximates SDT_SYS386TGT on the i386 port.
  214          */
  215         SUPERALIGN_TEXT
  216         .globl  alltraps_u
  217         .type   alltraps_u,@function
  218 alltraps_u:
  219         movq    %rdi,TF_RDI(%rsp)
  220         movq    %rdx,TF_RDX(%rsp)
  221         movq    %rax,TF_RAX(%rsp)
  222         movq    %rcx,TF_RCX(%rsp)
  223         movq    PCPU(CURPCB),%rdi
  224         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  225         call    handle_ibrs_entry
  226         jmp     alltraps_save_segs
  227         SUPERALIGN_TEXT
  228         .globl  alltraps_k
  229         .type   alltraps_k,@function
  230 alltraps_k:
  231         lfence
  232         movq    %rdi,TF_RDI(%rsp)
  233         movq    %rdx,TF_RDX(%rsp)
  234         movq    %rax,TF_RAX(%rsp)
  235         movq    %rcx,TF_RCX(%rsp)
  236 alltraps_save_segs:
  237         SAVE_SEGS
  238         testl   $PSL_I,TF_RFLAGS(%rsp)
  239         jz      alltraps_pushregs_no_rax
  240         sti
  241 alltraps_pushregs_no_rax:
  242         movq    %rsi,TF_RSI(%rsp)
  243         movq    %r8,TF_R8(%rsp)
  244         movq    %r9,TF_R9(%rsp)
  245         movq    %rbx,TF_RBX(%rsp)
  246         movq    %rbp,TF_RBP(%rsp)
  247         movq    %r10,TF_R10(%rsp)
  248         movq    %r11,TF_R11(%rsp)
  249         movq    %r12,TF_R12(%rsp)
  250         movq    %r13,TF_R13(%rsp)
  251         movq    %r14,TF_R14(%rsp)
  252         movq    %r15,TF_R15(%rsp)
  253         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  254         pushfq
  255         andq    $~(PSL_D | PSL_AC),(%rsp)
  256         popfq
  257 #ifdef KDTRACE_HOOKS
  258         /*
  259          * DTrace Function Boundary Trace (fbt) probes are triggered
  260          * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint
  261          * interrupt. For all other trap types, just handle them in
  262          * the usual way.
  263          */
  264         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  265         jnz     calltrap                /* ignore userland traps */
  266         cmpl    $T_BPTFLT,TF_TRAPNO(%rsp)
  267         jne     calltrap
  268 
  269         /* Check if there is no DTrace hook registered. */
  270         cmpq    $0,dtrace_invop_jump_addr
  271         je      calltrap
  272 
  273         /*
  274          * Set our jump address for the jump back in the event that
  275          * the breakpoint wasn't caused by DTrace at all.
  276          */
  277         movq    $calltrap,dtrace_invop_calltrap_addr(%rip)
  278 
  279         /* Jump to the code hooked in by DTrace. */
  280         jmpq    *dtrace_invop_jump_addr
  281 #endif
  282         .globl  calltrap
  283         .type   calltrap,@function
  284 calltrap:
  285         KMSAN_ENTER
  286         movq    %rsp, %rdi
  287         call    trap_check
  288         KMSAN_LEAVE
  289         jmp     doreti                  /* Handle any pending ASTs */
  290 
  291         /*
  292          * alltraps_noen_u/k entry points.
  293          * Again, SWAPGS must be already performed by prologue, if needed.
  294          * Unlike alltraps above, we want to leave the interrupts disabled.
  295          * This corresponds to SDT_SYS386IGT on the i386 port.
  296          */
  297         SUPERALIGN_TEXT
  298         .globl  alltraps_noen_u
  299         .type   alltraps_noen_u,@function
  300 alltraps_noen_u:
  301         movq    %rdi,TF_RDI(%rsp)
  302         movq    PCPU(CURPCB),%rdi
  303         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  304         jmp     alltraps_noen_save_segs
  305         SUPERALIGN_TEXT
  306         .globl  alltraps_noen_k
  307         .type   alltraps_noen_k,@function
  308 alltraps_noen_k:
  309         lfence
  310         movq    %rdi,TF_RDI(%rsp)
  311 alltraps_noen_save_segs:
  312         SAVE_SEGS
  313         movq    %rdx,TF_RDX(%rsp)
  314         movq    %rax,TF_RAX(%rsp)
  315         movq    %rcx,TF_RCX(%rsp)
  316         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  317         jz      alltraps_pushregs_no_rax
  318         call    handle_ibrs_entry
  319         jmp     alltraps_pushregs_no_rax
  320 
  321 IDTVEC(dblfault)
  322         subq    $TF_ERR,%rsp
  323         movl    $T_DOUBLEFLT,TF_TRAPNO(%rsp)
  324         movq    $0,TF_ADDR(%rsp)
  325         movq    $0,TF_ERR(%rsp)
  326         movq    %rdi,TF_RDI(%rsp)
  327         movq    %rsi,TF_RSI(%rsp)
  328         movq    %rdx,TF_RDX(%rsp)
  329         movq    %rcx,TF_RCX(%rsp)
  330         movq    %r8,TF_R8(%rsp)
  331         movq    %r9,TF_R9(%rsp)
  332         movq    %rax,TF_RAX(%rsp)
  333         movq    %rbx,TF_RBX(%rsp)
  334         movq    %rbp,TF_RBP(%rsp)
  335         movq    %r10,TF_R10(%rsp)
  336         movq    %r11,TF_R11(%rsp)
  337         movq    %r12,TF_R12(%rsp)
  338         movq    %r13,TF_R13(%rsp)
  339         movq    %r14,TF_R14(%rsp)
  340         movq    %r15,TF_R15(%rsp)
  341         SAVE_SEGS
  342         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  343         pushfq
  344         andq    $~(PSL_D | PSL_AC),(%rsp)
  345         popfq
  346         movq    TF_SIZE(%rsp),%rdx
  347         movl    %edx,%eax
  348         shrq    $32,%rdx
  349         movl    $MSR_GSBASE,%ecx
  350         wrmsr
  351         movq    %cr3,%rax
  352         movq    %rax,PCPU(SAVED_UCR3)
  353         movq    PCPU(KCR3),%rax
  354         cmpq    $~0,%rax
  355         je      2f
  356         movq    %rax,%cr3
  357 2:      KMSAN_ENTER
  358         movq    %rsp,%rdi
  359         call    dblfault_handler
  360         KMSAN_LEAVE
  361 3:      hlt
  362         jmp     3b
  363 
  364         ALIGN_TEXT
  365 IDTVEC(page_pti)
  366         testb   $SEL_RPL_MASK,PTI_CS-PTI_ERR(%rsp)
  367         jz      page_k
  368         swapgs
  369         lfence
  370         pushq   %rax
  371         movq    %cr3,%rax
  372         movq    %rax,PCPU(SAVED_UCR3)
  373         cmpq    $~0,PCPU(UCR3)
  374         jne     1f
  375         popq    %rax
  376         jmp     page_u
  377 1:      pushq   %rdx
  378         PTI_UUENTRY has_err=1
  379         jmp     page_u
  380         ALIGN_TEXT
  381 IDTVEC(page)
  382         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) /* Did we come from kernel? */
  383         jnz     page_u_swapgs           /* already running with kernel GS.base */
  384 page_k:
  385         lfence
  386         subq    $TF_ERR,%rsp
  387         movq    %rdi,TF_RDI(%rsp)       /* free up GP registers */
  388         movq    %rax,TF_RAX(%rsp)
  389         movq    %rdx,TF_RDX(%rsp)
  390         movq    %rcx,TF_RCX(%rsp)
  391         jmp     page_cr2
  392         ALIGN_TEXT
  393 page_u_swapgs:
  394         swapgs
  395         lfence
  396 page_u:
  397         subq    $TF_ERR,%rsp
  398         movq    %rdi,TF_RDI(%rsp)
  399         movq    %rax,TF_RAX(%rsp)
  400         movq    %rdx,TF_RDX(%rsp)
  401         movq    %rcx,TF_RCX(%rsp)
  402         movq    PCPU(CURPCB),%rdi
  403         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  404         movq    PCPU(SAVED_UCR3),%rax
  405         movq    %rax,PCB_SAVED_UCR3(%rdi)
  406         call    handle_ibrs_entry
  407 page_cr2:
  408         movq    %cr2,%rdi               /* preserve %cr2 before ..  */
  409         movq    %rdi,TF_ADDR(%rsp)      /* enabling interrupts. */
  410         SAVE_SEGS
  411         movl    $T_PAGEFLT,TF_TRAPNO(%rsp)
  412         testl   $PSL_I,TF_RFLAGS(%rsp)
  413         jz      alltraps_pushregs_no_rax
  414         sti
  415         jmp     alltraps_pushregs_no_rax
  416 
  417         /*
  418          * We have to special-case this one.  If we get a trap in doreti() at
  419          * the iretq stage, we'll reenter with the wrong gs state.  We'll have
  420          * to do a special the swapgs in this case even coming from the kernel.
  421          * XXX linux has a trap handler for their equivalent of load_gs().
  422          *
  423          * On the stack, we have the hardware interrupt frame to return
  424          * to usermode (faulted) and another frame with error code, for
  425          * fault.  For PTI, copy both frames to the main thread stack.
  426          * Handle the potential 16-byte alignment adjustment incurred
  427          * during the second fault by copying both frames independently
  428          * while unwinding the stack in between.
  429          */
  430         .macro PROTF_ENTRY name,trapno
  431 \name\()_pti_doreti:
  432         swapgs
  433         lfence
  434         cmpq    $~0,PCPU(UCR3)
  435         je      1f
  436         pushq   %rax
  437         pushq   %rdx
  438         movq    PCPU(KCR3),%rax
  439         movq    %rax,%cr3
  440         movq    PCPU(RSP0),%rax
  441         subq    $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
  442         MOVE_STACKS     (PTI_SIZE / 8)
  443         addq    $PTI_SIZE,%rax
  444         movq    PTI_RSP(%rsp),%rsp
  445         MOVE_STACKS     (PTI_SIZE / 8 - 3)
  446         subq    $PTI_SIZE,%rax
  447         movq    %rax,%rsp
  448         popq    %rdx
  449         popq    %rax
  450 1:      swapgs
  451         jmp     X\name
  452 IDTVEC(\name\()_pti)
  453         cmpq    $doreti_iret,PTI_RIP-2*8(%rsp)
  454         je      \name\()_pti_doreti
  455         testb   $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
  456         jz      X\name          /* lfence is not needed until %gs: use */
  457         PTI_UENTRY has_err=1
  458         swapgs  /* fence provided by PTI_UENTRY */
  459 IDTVEC(\name)
  460         subq    $TF_ERR,%rsp
  461         movl    $\trapno,TF_TRAPNO(%rsp)
  462         jmp     prot_addrf
  463         .endm
  464 
  465         PROTF_ENTRY     missing, T_SEGNPFLT
  466         PROTF_ENTRY     stk, T_STKFLT
  467         PROTF_ENTRY     prot, T_PROTFLT
  468 
  469 prot_addrf:
  470         movq    $0,TF_ADDR(%rsp)
  471         movq    %rdi,TF_RDI(%rsp)       /* free up a GP register */
  472         movq    %rax,TF_RAX(%rsp)
  473         movq    %rdx,TF_RDX(%rsp)
  474         movq    %rcx,TF_RCX(%rsp)
  475         movw    %fs,TF_FS(%rsp)
  476         movw    %gs,TF_GS(%rsp)
  477         leaq    doreti_iret(%rip),%rdi
  478         cmpq    %rdi,TF_RIP(%rsp)
  479         je      5f                      /* kernel but with user gsbase!! */
  480         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  481         jz      6f                      /* already running with kernel GS.base */
  482         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  483         jz      2f
  484         cmpw    $KUF32SEL,TF_FS(%rsp)
  485         jne     1f
  486         rdfsbase %rax
  487 1:      cmpw    $KUG32SEL,TF_GS(%rsp)
  488         jne     2f
  489         rdgsbase %rdx
  490 2:      swapgs
  491         lfence
  492         movq    PCPU(CURPCB),%rdi
  493         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  494         jz      4f
  495         cmpw    $KUF32SEL,TF_FS(%rsp)
  496         jne     3f
  497         movq    %rax,PCB_FSBASE(%rdi)
  498 3:      cmpw    $KUG32SEL,TF_GS(%rsp)
  499         jne     4f
  500         movq    %rdx,PCB_GSBASE(%rdi)
  501         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)  /* full iret from user #gp */
  502 4:      call    handle_ibrs_entry
  503         movw    %es,TF_ES(%rsp)
  504         movw    %ds,TF_DS(%rsp)
  505         testl   $PSL_I,TF_RFLAGS(%rsp)
  506         jz      alltraps_pushregs_no_rax
  507         sti
  508         jmp     alltraps_pushregs_no_rax
  509 
  510 5:      swapgs
  511 6:      lfence
  512         movq    PCPU(CURPCB),%rdi
  513         jmp     4b
  514 
  515 /*
  516  * Fast syscall entry point.  We enter here with just our new %cs/%ss set,
  517  * and the new privilige level.  We are still running on the old user stack
  518  * pointer.  We have to juggle a few things around to find our stack etc.
  519  * swapgs gives us access to our PCPU space only.
  520  *
  521  * We do not support invoking this from a custom segment registers,
  522  * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
  523  */
  524         SUPERALIGN_TEXT
  525 IDTVEC(fast_syscall_pti)
  526         swapgs
  527         cmpq    $~0,PCPU(UCR3)
  528         je      fast_syscall_common
  529         movq    %rax,PCPU(SCRATCH_RAX)
  530         movq    PCPU(KCR3),%rax
  531         movq    %rax,%cr3
  532         movq    PCPU(SCRATCH_RAX),%rax
  533         jmp     fast_syscall_common
  534         SUPERALIGN_TEXT
  535 IDTVEC(fast_syscall)
  536         swapgs
  537 fast_syscall_common:
  538         movq    %rsp,PCPU(SCRATCH_RSP)
  539         movq    PCPU(RSP0),%rsp
  540         /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
  541         subq    $TF_SIZE,%rsp
  542         /* defer TF_RSP till we have a spare register */
  543         movq    %r11,TF_RFLAGS(%rsp)
  544         movq    %rcx,TF_RIP(%rsp)       /* %rcx original value is in %r10 */
  545         movq    PCPU(SCRATCH_RSP),%r11  /* %r11 already saved */
  546         movq    %r11,TF_RSP(%rsp)       /* user stack pointer */
  547         /*
  548          * Save a few arg registers early to free them for use in
  549          * handle_ibrs_entry().  %r10 is especially tricky.  It is not an
  550          * arg register, but it holds the arg register %rcx.  Profiling
  551          * preserves %rcx, but may clobber %r10.  Profiling may also
  552          * clobber %r11, but %r11 (original %eflags) has been saved.
  553          */
  554         movq    %rax,TF_RAX(%rsp)       /* syscall number */
  555         movq    %rdx,TF_RDX(%rsp)       /* arg 3 */
  556         movq    %r10,TF_RCX(%rsp)       /* arg 4 */
  557         SAVE_SEGS
  558         call    handle_ibrs_entry
  559         movq    PCPU(CURPCB),%r11
  560         andl    $~PCB_FULL_IRET,PCB_FLAGS(%r11)
  561         sti
  562         movq    $KUDSEL,TF_SS(%rsp)
  563         movq    $KUCSEL,TF_CS(%rsp)
  564         movq    $2,TF_ERR(%rsp)
  565         movq    %rdi,TF_RDI(%rsp)       /* arg 1 */
  566         movq    %rsi,TF_RSI(%rsp)       /* arg 2 */
  567         movq    %r8,TF_R8(%rsp)         /* arg 5 */
  568         movq    %r9,TF_R9(%rsp)         /* arg 6 */
  569         movq    %rbx,TF_RBX(%rsp)       /* C preserved */
  570         movq    %rbp,TF_RBP(%rsp)       /* C preserved */
  571         movq    %r12,TF_R12(%rsp)       /* C preserved */
  572         movq    %r13,TF_R13(%rsp)       /* C preserved */
  573         movq    %r14,TF_R14(%rsp)       /* C preserved */
  574         movq    %r15,TF_R15(%rsp)       /* C preserved */
  575         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  576         movq    PCPU(CURTHREAD),%rdi
  577         movq    %rsp,TD_FRAME(%rdi)
  578         movl    TF_RFLAGS(%rsp),%esi
  579         andl    $PSL_T,%esi
  580         call    amd64_syscall
  581 1:      movq    PCPU(CURPCB),%rax
  582         /* Disable interrupts before testing PCB_FULL_IRET. */
  583         cli
  584         testl   $PCB_FULL_IRET,PCB_FLAGS(%rax)
  585         jnz     4f
  586         /* Check for and handle AST's on return to userland. */
  587         movq    PCPU(CURTHREAD),%rax
  588         cmpl    $0,TD_AST(%rax)
  589         jne     3f
  590         call    handle_ibrs_exit
  591         callq   *mds_handler
  592         /* Restore preserved registers. */
  593         movq    TF_RDI(%rsp),%rdi       /* bonus; preserve arg 1 */
  594         movq    TF_RSI(%rsp),%rsi       /* bonus: preserve arg 2 */
  595         movq    TF_RDX(%rsp),%rdx       /* return value 2 */
  596         movq    TF_RAX(%rsp),%rax       /* return value 1 */
  597         movq    TF_RFLAGS(%rsp),%r11    /* original %rflags */
  598         movq    TF_RIP(%rsp),%rcx       /* original %rip */
  599         movq    TF_RSP(%rsp),%rsp       /* user stack pointer */
  600         xorl    %r8d,%r8d               /* zero the rest of GPRs */
  601         xorl    %r10d,%r10d
  602         cmpq    $~0,PCPU(UCR3)
  603         je      2f
  604         movq    PCPU(UCR3),%r9
  605         andq    PCPU(UCR3_LOAD_MASK),%r9
  606         movq    %r9,%cr3
  607 2:      xorl    %r9d,%r9d
  608         movq    $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
  609         swapgs
  610         sysretq
  611 
  612 3:      /* AST scheduled. */
  613         sti
  614         movq    %rsp,%rdi
  615         call    ast
  616         jmp     1b
  617 
  618 4:      /* Requested full context restore, use doreti for that. */
  619         jmp     doreti
  620 
  621 /*
  622  * Here for CYA insurance, in case a "syscall" instruction gets
  623  * issued from 32 bit compatibility mode. MSR_CSTAR has to point
  624  * to *something* if EFER_SCE is enabled.
  625  */
  626 IDTVEC(fast_syscall32)
  627         sysret
  628 
  629 /*
  630  * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
  631  * generation of exception until the next instruction is executed,
  632  * which might be a kernel entry.  So we must execute the handler
  633  * on IST stack and be ready for non-kernel GSBASE.
  634  */
  635 IDTVEC(dbg)
  636         subq    $TF_RIP,%rsp
  637         movl    $(T_TRCTRAP),TF_TRAPNO(%rsp)
  638         movq    $0,TF_ADDR(%rsp)
  639         movq    $0,TF_ERR(%rsp)
  640         movq    %rdi,TF_RDI(%rsp)
  641         movq    %rsi,TF_RSI(%rsp)
  642         movq    %rdx,TF_RDX(%rsp)
  643         movq    %rcx,TF_RCX(%rsp)
  644         movq    %r8,TF_R8(%rsp)
  645         movq    %r9,TF_R9(%rsp)
  646         movq    %rax,TF_RAX(%rsp)
  647         movq    %rbx,TF_RBX(%rsp)
  648         movq    %rbp,TF_RBP(%rsp)
  649         movq    %r10,TF_R10(%rsp)
  650         movq    %r11,TF_R11(%rsp)
  651         movq    %r12,TF_R12(%rsp)
  652         movq    %r13,TF_R13(%rsp)
  653         movq    %r14,TF_R14(%rsp)
  654         movq    %r15,TF_R15(%rsp)
  655         SAVE_SEGS
  656         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  657         pushfq
  658         andq    $~(PSL_D | PSL_AC),(%rsp)
  659         popfq
  660         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  661         jnz     dbg_fromuserspace
  662         lfence
  663         /*
  664          * We've interrupted the kernel.  See comment in NMI handler about
  665          * registers use.
  666          */
  667         movq    %cr2,%r15
  668         movl    $MSR_GSBASE,%ecx
  669         rdmsr
  670         movq    %rax,%r12
  671         shlq    $32,%rdx
  672         orq     %rdx,%r12
  673         /* Retrieve and load the canonical value for GS.base. */
  674         movq    TF_SIZE(%rsp),%rdx
  675         movl    %edx,%eax
  676         shrq    $32,%rdx
  677         wrmsr
  678         movq    %cr3,%r13
  679         movq    PCPU(KCR3),%rax
  680         cmpq    $~0,%rax
  681         je      1f
  682         movq    %rax,%cr3
  683 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  684         je      2f
  685         movl    $MSR_IA32_SPEC_CTRL,%ecx
  686         rdmsr
  687         movl    %eax,%r14d
  688         call    handle_ibrs_entry
  689 2:      movq    %rsp,%rdi
  690         call    trap
  691         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  692         je      3f
  693         movl    %r14d,%eax
  694         xorl    %edx,%edx
  695         movl    $MSR_IA32_SPEC_CTRL,%ecx
  696         wrmsr
  697         /*
  698          * Put back the preserved MSR_GSBASE value.
  699          */
  700 3:      movl    $MSR_GSBASE,%ecx
  701         movq    %r12,%rdx
  702         movl    %edx,%eax
  703         shrq    $32,%rdx
  704         wrmsr
  705         movq    %r13,%cr3
  706         movq    %r15,%cr2
  707         RESTORE_REGS
  708         addq    $TF_RIP,%rsp
  709         jmp     doreti_iret
  710 dbg_fromuserspace:
  711         /*
  712          * Switch to kernel GSBASE and kernel page table, and copy frame
  713          * from the IST stack to the normal kernel stack, since trap()
  714          * re-enables interrupts, and since we might trap on DB# while
  715          * in trap().
  716          */
  717         swapgs
  718         lfence
  719         movq    PCPU(KCR3),%rax
  720         cmpq    $~0,%rax
  721         je      1f
  722         movq    %rax,%cr3
  723 1:      movq    PCPU(RSP0),%rax
  724         movl    $TF_SIZE,%ecx
  725         subq    %rcx,%rax
  726         movq    %rax,%rdi
  727         movq    %rsp,%rsi
  728         rep;movsb
  729         movq    %rax,%rsp
  730         call    handle_ibrs_entry
  731         movq    PCPU(CURPCB),%rdi
  732         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)
  733         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  734         jz      3f
  735         cmpw    $KUF32SEL,TF_FS(%rsp)
  736         jne     2f
  737         rdfsbase %rax
  738         movq    %rax,PCB_FSBASE(%rdi)
  739 2:      cmpw    $KUG32SEL,TF_GS(%rsp)
  740         jne     3f
  741         movl    $MSR_KGSBASE,%ecx
  742         rdmsr
  743         shlq    $32,%rdx
  744         orq     %rdx,%rax
  745         movq    %rax,PCB_GSBASE(%rdi)
  746 3:      jmp     calltrap
  747 
  748 /*
  749  * NMI handling is special.
  750  *
  751  * First, NMIs do not respect the state of the processor's RFLAGS.IF
  752  * bit.  The NMI handler may be entered at any time, including when
  753  * the processor is in a critical section with RFLAGS.IF == 0.
  754  * The processor's GS.base value could be invalid on entry to the
  755  * handler.
  756  *
  757  * Second, the processor treats NMIs specially, blocking further NMIs
  758  * until an 'iretq' instruction is executed.  We thus need to execute
  759  * the NMI handler with interrupts disabled, to prevent a nested interrupt
  760  * from executing an 'iretq' instruction and inadvertently taking the
  761  * processor out of NMI mode.
  762  *
  763  * Third, the NMI handler runs on its own stack (tss_ist2). The canonical
  764  * GS.base value for the processor is stored just above the bottom of its
  765  * NMI stack.  For NMIs taken from kernel mode, the current value in
  766  * the processor's GS.base is saved at entry to C-preserved register %r12,
  767  * the canonical value for GS.base is then loaded into the processor, and
  768  * the saved value is restored at exit time.  For NMIs taken from user mode,
  769  * the cheaper 'SWAPGS' instructions are used for swapping GS.base.
  770  */
  771 
  772 IDTVEC(nmi)
  773         subq    $TF_RIP,%rsp
  774         movl    $(T_NMI),TF_TRAPNO(%rsp)
  775         movq    $0,TF_ADDR(%rsp)
  776         movq    $0,TF_ERR(%rsp)
  777         movq    %rdi,TF_RDI(%rsp)
  778         movq    %rsi,TF_RSI(%rsp)
  779         movq    %rdx,TF_RDX(%rsp)
  780         movq    %rcx,TF_RCX(%rsp)
  781         movq    %r8,TF_R8(%rsp)
  782         movq    %r9,TF_R9(%rsp)
  783         movq    %rax,TF_RAX(%rsp)
  784         movq    %rbx,TF_RBX(%rsp)
  785         movq    %rbp,TF_RBP(%rsp)
  786         movq    %r10,TF_R10(%rsp)
  787         movq    %r11,TF_R11(%rsp)
  788         movq    %r12,TF_R12(%rsp)
  789         movq    %r13,TF_R13(%rsp)
  790         movq    %r14,TF_R14(%rsp)
  791         movq    %r15,TF_R15(%rsp)
  792         SAVE_SEGS
  793         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  794         pushfq
  795         andq    $~(PSL_D | PSL_AC),(%rsp)
  796         popfq
  797         xorl    %ebx,%ebx
  798         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  799         jnz     nmi_fromuserspace
  800         /*
  801          * We've interrupted the kernel.  Preserve in callee-saved regs:
  802          * GS.base in %r12,
  803          * %cr3 in %r13,
  804          * possibly lower half of MSR_IA32_SPEC_CTL in %r14d,
  805          * %cr2 in %r15.
  806          */
  807         lfence
  808         movq    %cr2,%r15
  809         movl    $MSR_GSBASE,%ecx
  810         rdmsr
  811         movq    %rax,%r12
  812         shlq    $32,%rdx
  813         orq     %rdx,%r12
  814         /* Retrieve and load the canonical value for GS.base. */
  815         movq    TF_SIZE(%rsp),%rdx
  816         movl    %edx,%eax
  817         shrq    $32,%rdx
  818         wrmsr
  819         movq    %cr3,%r13
  820         movq    PCPU(KCR3),%rax
  821         cmpq    $~0,%rax
  822         je      1f
  823         movq    %rax,%cr3
  824 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  825         je      nmi_calltrap
  826         movl    $MSR_IA32_SPEC_CTRL,%ecx
  827         rdmsr
  828         movl    %eax,%r14d
  829         call    handle_ibrs_entry
  830         jmp     nmi_calltrap
  831 nmi_fromuserspace:
  832         incl    %ebx
  833         swapgs
  834         lfence
  835         movq    %cr3,%r13
  836         movq    PCPU(KCR3),%rax
  837         cmpq    $~0,%rax
  838         je      1f
  839         movq    %rax,%cr3
  840 1:      call    handle_ibrs_entry
  841         movq    PCPU(CURPCB),%rdi
  842         testq   %rdi,%rdi
  843         jz      3f
  844         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)
  845         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  846         jz      3f
  847         cmpw    $KUF32SEL,TF_FS(%rsp)
  848         jne     2f
  849         rdfsbase %rax
  850         movq    %rax,PCB_FSBASE(%rdi)
  851 2:      cmpw    $KUG32SEL,TF_GS(%rsp)
  852         jne     3f
  853         movl    $MSR_KGSBASE,%ecx
  854         rdmsr
  855         shlq    $32,%rdx
  856         orq     %rdx,%rax
  857         movq    %rax,PCB_GSBASE(%rdi)
  858 3:
  859 /* Note: this label is also used by ddb and gdb: */
  860 nmi_calltrap:
  861         KMSAN_ENTER
  862         movq    %rsp,%rdi
  863         call    trap
  864         KMSAN_LEAVE
  865 #ifdef HWPMC_HOOKS
  866         /*
  867          * Capture a userspace callchain if needed.
  868          *
  869          * - Check if the current trap was from user mode.
  870          * - Check if the current thread is valid.
  871          * - Check if the thread requires a user call chain to be
  872          *   captured.
  873          *
  874          * We are still in NMI mode at this point.
  875          */
  876         testl   %ebx,%ebx
  877         jz      nocallchain     /* not from userspace */
  878         movq    PCPU(CURTHREAD),%rax
  879         orq     %rax,%rax       /* curthread present? */
  880         jz      nocallchain
  881         /*
  882          * Move execution to the regular kernel stack, because we
  883          * committed to return through doreti.
  884          */
  885         movq    %rsp,%rsi       /* source stack pointer */
  886         movq    $TF_SIZE,%rcx
  887         movq    PCPU(RSP0),%rdx
  888         subq    %rcx,%rdx
  889         movq    %rdx,%rdi       /* destination stack pointer */
  890         shrq    $3,%rcx         /* trap frame size in long words */
  891         pushfq
  892         andq    $~(PSL_D | PSL_AC),(%rsp)
  893         popfq
  894         rep
  895         movsq                   /* copy trapframe */
  896         movq    %rdx,%rsp       /* we are on the regular kstack */
  897 
  898         testl   $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
  899         jz      nocallchain
  900         /*
  901          * A user callchain is to be captured, so:
  902          * - Take the processor out of "NMI" mode by faking an "iret",
  903          *   to allow for nested NMI interrupts.
  904          * - Enable interrupts, so that copyin() can work.
  905          */
  906         movl    %ss,%eax
  907         pushq   %rax            /* tf_ss */
  908         pushq   %rdx            /* tf_rsp (on kernel stack) */
  909         pushfq                  /* tf_rflags */
  910         movl    %cs,%eax
  911         pushq   %rax            /* tf_cs */
  912         pushq   $outofnmi       /* tf_rip */
  913         iretq
  914 outofnmi:
  915         /*
  916          * At this point the processor has exited NMI mode and is running
  917          * with interrupts turned off on the normal kernel stack.
  918          *
  919          * If a pending NMI gets recognized at or after this point, it
  920          * will cause a kernel callchain to be traced.
  921          *
  922          * We turn interrupts back on, and call the user callchain capture hook.
  923          */
  924         movq    pmc_hook,%rax
  925         orq     %rax,%rax
  926         jz      nocallchain
  927         movq    PCPU(CURTHREAD),%rdi            /* thread */
  928         movq    $PMC_FN_USER_CALLCHAIN,%rsi     /* command */
  929         movq    %rsp,%rdx                       /* frame */
  930         sti
  931         call    *%rax
  932         cli
  933 nocallchain:
  934 #endif
  935         testl   %ebx,%ebx       /* %ebx != 0 => return to userland */
  936         jnz     doreti_exit
  937         /*
  938          * Restore speculation control MSR, if preserved.
  939          */
  940         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  941         je      1f
  942         movl    %r14d,%eax
  943         xorl    %edx,%edx
  944         movl    $MSR_IA32_SPEC_CTRL,%ecx
  945         wrmsr
  946         /*
  947          * Put back the preserved MSR_GSBASE value.
  948          */
  949 1:      movl    $MSR_GSBASE,%ecx
  950         movq    %r12,%rdx
  951         movl    %edx,%eax
  952         shrq    $32,%rdx
  953         wrmsr
  954         cmpb    $0, nmi_flush_l1d_sw(%rip)
  955         je      2f
  956         call    flush_l1d_sw            /* bhyve L1TF assist */
  957 2:      movq    %r13,%cr3
  958         movq    %r15,%cr2
  959         RESTORE_REGS
  960         addq    $TF_RIP,%rsp
  961         jmp     doreti_iret
  962 
  963 /*
  964  * MC# handling is similar to NMI.
  965  *
  966  * As with NMIs, machine check exceptions do not respect RFLAGS.IF and
  967  * can occur at any time with a GS.base value that does not correspond
  968  * to the privilege level in CS.
  969  *
  970  * Machine checks are not unblocked by iretq, but it is best to run
  971  * the handler with interrupts disabled since the exception may have
  972  * interrupted a critical section.
  973  *
  974  * The MC# handler runs on its own stack (tss_ist3).  The canonical
  975  * GS.base value for the processor is stored just above the bottom of
  976  * its MC# stack.  For exceptions taken from kernel mode, the current
  977  * value in the processor's GS.base is saved at entry to C-preserved
  978  * register %r12, the canonical value for GS.base is then loaded into
  979  * the processor, and the saved value is restored at exit time.  For
  980  * exceptions taken from user mode, the cheaper 'SWAPGS' instructions
  981  * are used for swapping GS.base.
  982  */
  983 
  984 IDTVEC(mchk)
  985         subq    $TF_RIP,%rsp
  986         movl    $(T_MCHK),TF_TRAPNO(%rsp)
  987         movq    $0,TF_ADDR(%rsp)
  988         movq    $0,TF_ERR(%rsp)
  989         movq    %rdi,TF_RDI(%rsp)
  990         movq    %rsi,TF_RSI(%rsp)
  991         movq    %rdx,TF_RDX(%rsp)
  992         movq    %rcx,TF_RCX(%rsp)
  993         movq    %r8,TF_R8(%rsp)
  994         movq    %r9,TF_R9(%rsp)
  995         movq    %rax,TF_RAX(%rsp)
  996         movq    %rbx,TF_RBX(%rsp)
  997         movq    %rbp,TF_RBP(%rsp)
  998         movq    %r10,TF_R10(%rsp)
  999         movq    %r11,TF_R11(%rsp)
 1000         movq    %r12,TF_R12(%rsp)
 1001         movq    %r13,TF_R13(%rsp)
 1002         movq    %r14,TF_R14(%rsp)
 1003         movq    %r15,TF_R15(%rsp)
 1004         SAVE_SEGS
 1005         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
 1006         pushfq
 1007         andq    $~(PSL_D | PSL_AC),(%rsp)
 1008         popfq
 1009         xorl    %ebx,%ebx
 1010         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1011         jnz     mchk_fromuserspace
 1012         /*
 1013          * We've interrupted the kernel.  See comment in NMI handler about
 1014          * registers use.
 1015          */
 1016         movq    %cr2,%r15
 1017         movl    $MSR_GSBASE,%ecx
 1018         rdmsr
 1019         movq    %rax,%r12
 1020         shlq    $32,%rdx
 1021         orq     %rdx,%r12
 1022         /* Retrieve and load the canonical value for GS.base. */
 1023         movq    TF_SIZE(%rsp),%rdx
 1024         movl    %edx,%eax
 1025         shrq    $32,%rdx
 1026         wrmsr
 1027         movq    %cr3,%r13
 1028         movq    PCPU(KCR3),%rax
 1029         cmpq    $~0,%rax
 1030         je      1f
 1031         movq    %rax,%cr3
 1032 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
 1033         je      mchk_calltrap
 1034         movl    $MSR_IA32_SPEC_CTRL,%ecx
 1035         rdmsr
 1036         movl    %eax,%r14d
 1037         call    handle_ibrs_entry
 1038         jmp     mchk_calltrap
 1039 mchk_fromuserspace:
 1040         incl    %ebx
 1041         swapgs
 1042         movq    %cr3,%r13
 1043         movq    PCPU(KCR3),%rax
 1044         cmpq    $~0,%rax
 1045         je      1f
 1046         movq    %rax,%cr3
 1047 1:      call    handle_ibrs_entry
 1048 /* Note: this label is also used by ddb and gdb: */
 1049 mchk_calltrap:
 1050         KMSAN_ENTER
 1051         movq    %rsp,%rdi
 1052         call    mca_intr
 1053         KMSAN_LEAVE
 1054         testl   %ebx,%ebx       /* %ebx != 0 => return to userland */
 1055         jnz     doreti_exit
 1056         /*
 1057          * Restore speculation control MSR, if preserved.
 1058          */
 1059         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
 1060         je      1f
 1061         movl    %r14d,%eax
 1062         xorl    %edx,%edx
 1063         movl    $MSR_IA32_SPEC_CTRL,%ecx
 1064         wrmsr
 1065         /*
 1066          * Put back the preserved MSR_GSBASE value.
 1067          */
 1068 1:      movl    $MSR_GSBASE,%ecx
 1069         movq    %r12,%rdx
 1070         movl    %edx,%eax
 1071         shrq    $32,%rdx
 1072         wrmsr
 1073         movq    %r13,%cr3
 1074         movq    %r15,%cr2
 1075         RESTORE_REGS
 1076         addq    $TF_RIP,%rsp
 1077         jmp     doreti_iret
 1078 
 1079 ENTRY(fork_trampoline)
 1080         movq    %r12,%rdi               /* function */
 1081         movq    %rbx,%rsi               /* arg1 */
 1082         movq    %rsp,%rdx               /* trapframe pointer */
 1083         call    fork_exit
 1084         jmp     doreti                  /* Handle any ASTs */
 1085 
 1086 /*
 1087  * To efficiently implement classification of trap and interrupt handlers
 1088  * for profiling, there must be only trap handlers between the labels btrap
 1089  * and bintr, and only interrupt handlers between the labels bintr and
 1090  * eintr.  This is implemented (partly) by including files that contain
 1091  * some of the handlers.  Before including the files, set up a normal asm
 1092  * environment so that the included files doen't need to know that they are
 1093  * included.
 1094  */
 1095 
 1096 #ifdef COMPAT_FREEBSD32
 1097         .data
 1098         .p2align 4
 1099         .text
 1100         SUPERALIGN_TEXT
 1101 
 1102 #include <amd64/ia32/ia32_exception.S>
 1103 #endif
 1104 
 1105         .data
 1106         .p2align 4
 1107         .text
 1108         SUPERALIGN_TEXT
 1109 #include <amd64/amd64/apic_vector.S>
 1110 
 1111 #ifdef DEV_ATPIC
 1112         .data
 1113         .p2align 4
 1114         .text
 1115         SUPERALIGN_TEXT
 1116 
 1117 #include <amd64/amd64/atpic_vector.S>
 1118 #endif
 1119 
 1120 /*
 1121  * void doreti(struct trapframe)
 1122  *
 1123  * Handle return from interrupts, traps and syscalls.
 1124  */
 1125         .text
 1126         SUPERALIGN_TEXT
 1127         .type   doreti,@function
 1128         .globl  doreti
 1129 doreti:
 1130         /*
 1131          * Check if ASTs can be handled now.
 1132          */
 1133         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */
 1134         jz      doreti_exit             /* can't handle ASTs now if not */
 1135 
 1136 doreti_ast:
 1137         /*
 1138          * Check for ASTs atomically with returning.  Disabling CPU
 1139          * interrupts provides sufficient locking even in the SMP case,
 1140          * since we will be informed of any new ASTs by an IPI.
 1141          */
 1142         cli
 1143         movq    PCPU(CURTHREAD),%rax
 1144         cmpl    $0,TD_AST(%rax)
 1145         je      doreti_exit
 1146         sti
 1147         movq    %rsp,%rdi       /* pass a pointer to the trapframe */
 1148         call    ast
 1149         jmp     doreti_ast
 1150 
 1151         /*
 1152          * doreti_exit: pop registers, iret.
 1153          *
 1154          *      The segment register pop is a special case, since it may
 1155          *      fault if (for example) a sigreturn specifies bad segment
 1156          *      registers.  The fault is handled in trap.c.
 1157          */
 1158 doreti_exit:
 1159         movq    PCPU(CURPCB),%r8
 1160 
 1161         /*
 1162          * Do not reload segment registers for kernel.
 1163          * Since we do not reload segments registers with sane
 1164          * values on kernel entry, descriptors referenced by
 1165          * segments registers might be not valid.  This is fatal
 1166          * for user mode, but is not a problem for the kernel.
 1167          */
 1168         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1169         jz      ld_regs
 1170         testl   $PCB_FULL_IRET,PCB_FLAGS(%r8)
 1171         jz      ld_regs
 1172         andl    $~PCB_FULL_IRET,PCB_FLAGS(%r8)
 1173         testl   $TF_HASSEGS,TF_FLAGS(%rsp)
 1174         je      set_segs
 1175 
 1176 do_segs:
 1177         /* Restore %fs and fsbase */
 1178         movw    TF_FS(%rsp),%ax
 1179         .globl  ld_fs
 1180 ld_fs:
 1181         movw    %ax,%fs
 1182         cmpw    $KUF32SEL,%ax
 1183         jne     1f
 1184         movl    $MSR_FSBASE,%ecx
 1185         movl    PCB_FSBASE(%r8),%eax
 1186         movl    PCB_FSBASE+4(%r8),%edx
 1187         .globl  ld_fsbase
 1188 ld_fsbase:
 1189         wrmsr
 1190 1:
 1191         /* Restore %gs and gsbase */
 1192         movw    TF_GS(%rsp),%si
 1193         pushfq
 1194         cli
 1195         movl    $MSR_GSBASE,%ecx
 1196         /* Save current kernel %gs base into %r12d:%r13d */
 1197         rdmsr
 1198         movl    %eax,%r12d
 1199         movl    %edx,%r13d
 1200         .globl  ld_gs
 1201 ld_gs:
 1202         movw    %si,%gs
 1203         /* Save user %gs base into %r14d:%r15d */
 1204         rdmsr
 1205         movl    %eax,%r14d
 1206         movl    %edx,%r15d
 1207         /* Restore kernel %gs base */
 1208         movl    %r12d,%eax
 1209         movl    %r13d,%edx
 1210         wrmsr
 1211         popfq
 1212         /*
 1213          * Restore user %gs base, either from PCB if used for TLS, or
 1214          * from the previously saved msr read.
 1215          */
 1216         movl    $MSR_KGSBASE,%ecx
 1217         cmpw    $KUG32SEL,%si
 1218         jne     1f
 1219         movl    PCB_GSBASE(%r8),%eax
 1220         movl    PCB_GSBASE+4(%r8),%edx
 1221         jmp     ld_gsbase
 1222 1:
 1223         movl    %r14d,%eax
 1224         movl    %r15d,%edx
 1225         .globl  ld_gsbase
 1226 ld_gsbase:
 1227         wrmsr   /* May trap if non-canonical, but only for TLS. */
 1228         .globl  ld_es
 1229 ld_es:
 1230         movw    TF_ES(%rsp),%es
 1231         .globl  ld_ds
 1232 ld_ds:
 1233         movw    TF_DS(%rsp),%ds
 1234 ld_regs:
 1235         RESTORE_REGS
 1236         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 1237         jz      2f                      /* keep running with kernel GS.base */
 1238         cli
 1239         call    handle_ibrs_exit_rs
 1240         callq   *mds_handler
 1241         cmpq    $~0,PCPU(UCR3)
 1242         je      1f
 1243         pushq   %rdx
 1244         movq    PCPU(PTI_RSP0),%rdx
 1245         subq    $PTI_SIZE,%rdx
 1246         movq    %rax,PTI_RAX(%rdx)
 1247         popq    %rax
 1248         movq    %rax,PTI_RDX(%rdx)
 1249         movq    TF_RIP(%rsp),%rax
 1250         movq    %rax,PTI_RIP(%rdx)
 1251         movq    TF_CS(%rsp),%rax
 1252         movq    %rax,PTI_CS(%rdx)
 1253         movq    TF_RFLAGS(%rsp),%rax
 1254         movq    %rax,PTI_RFLAGS(%rdx)
 1255         movq    TF_RSP(%rsp),%rax
 1256         movq    %rax,PTI_RSP(%rdx)
 1257         movq    TF_SS(%rsp),%rax
 1258         movq    %rax,PTI_SS(%rdx)
 1259         movq    PCPU(UCR3),%rax
 1260         andq    PCPU(UCR3_LOAD_MASK),%rax
 1261         movq    $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK)
 1262         swapgs
 1263         movq    %rdx,%rsp
 1264         movq    %rax,%cr3
 1265         popq    %rdx
 1266         popq    %rax
 1267         addq    $8,%rsp
 1268         jmp     doreti_iret
 1269 1:      swapgs
 1270 2:      addq    $TF_RIP,%rsp
 1271         .globl  doreti_iret
 1272 doreti_iret:
 1273         iretq
 1274 
 1275 set_segs:
 1276         movw    $KUDSEL,%ax
 1277         movw    %ax,TF_DS(%rsp)
 1278         movw    %ax,TF_ES(%rsp)
 1279         movw    $KUF32SEL,TF_FS(%rsp)
 1280         movw    $KUG32SEL,TF_GS(%rsp)
 1281         jmp     do_segs
 1282 
 1283         /*
 1284          * doreti_iret_fault.  Alternative return code for
 1285          * the case where we get a fault in the doreti_exit code
 1286          * above.  trap() (amd64/amd64/trap.c) catches this specific
 1287          * case, sends the process a signal and continues in the
 1288          * corresponding place in the code below.
 1289          */
 1290         ALIGN_TEXT
 1291         .globl  doreti_iret_fault
 1292 doreti_iret_fault:
 1293         subq    $TF_RIP,%rsp            /* space including tf_err, tf_trapno */
 1294         movq    %rax,TF_RAX(%rsp)
 1295         movq    %rdx,TF_RDX(%rsp)
 1296         movq    %rcx,TF_RCX(%rsp)
 1297         call    handle_ibrs_entry
 1298         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1299         jz      1f
 1300         sti
 1301 1:
 1302         SAVE_SEGS
 1303         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
 1304         movq    %rdi,TF_RDI(%rsp)
 1305         movq    %rsi,TF_RSI(%rsp)
 1306         movq    %r8,TF_R8(%rsp)
 1307         movq    %r9,TF_R9(%rsp)
 1308         movq    %rbx,TF_RBX(%rsp)
 1309         movq    %rbp,TF_RBP(%rsp)
 1310         movq    %r10,TF_R10(%rsp)
 1311         movq    %r11,TF_R11(%rsp)
 1312         movq    %r12,TF_R12(%rsp)
 1313         movq    %r13,TF_R13(%rsp)
 1314         movq    %r14,TF_R14(%rsp)
 1315         movq    %r15,TF_R15(%rsp)
 1316         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1317         movq    $0,TF_ERR(%rsp) /* XXX should be the error code */
 1318         movq    $0,TF_ADDR(%rsp)
 1319         jmp     calltrap
 1320 
 1321         ALIGN_TEXT
 1322         .globl  ds_load_fault
 1323 ds_load_fault:
 1324         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1325         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1326         jz      1f
 1327         sti
 1328 1:
 1329         movq    %rsp,%rdi
 1330         call    trap
 1331         movw    $KUDSEL,TF_DS(%rsp)
 1332         jmp     doreti
 1333 
 1334         ALIGN_TEXT
 1335         .globl  es_load_fault
 1336 es_load_fault:
 1337         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1338         testl   $PSL_I,TF_RFLAGS(%rsp)
 1339         jz      1f
 1340         sti
 1341 1:
 1342         movq    %rsp,%rdi
 1343         call    trap
 1344         movw    $KUDSEL,TF_ES(%rsp)
 1345         jmp     doreti
 1346 
 1347         ALIGN_TEXT
 1348         .globl  fs_load_fault
 1349 fs_load_fault:
 1350         testl   $PSL_I,TF_RFLAGS(%rsp)
 1351         jz      1f
 1352         sti
 1353 1:
 1354         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1355         movq    %rsp,%rdi
 1356         call    trap
 1357         movw    $KUF32SEL,TF_FS(%rsp)
 1358         jmp     doreti
 1359 
 1360         ALIGN_TEXT
 1361         .globl  gs_load_fault
 1362 gs_load_fault:
 1363         popfq
 1364         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1365         testl   $PSL_I,TF_RFLAGS(%rsp)
 1366         jz      1f
 1367         sti
 1368 1:
 1369         movq    %rsp,%rdi
 1370         call    trap
 1371         movw    $KUG32SEL,TF_GS(%rsp)
 1372         jmp     doreti
 1373 
 1374         ALIGN_TEXT
 1375         .globl  fsbase_load_fault
 1376 fsbase_load_fault:
 1377         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1378         testl   $PSL_I,TF_RFLAGS(%rsp)
 1379         jz      1f
 1380         sti
 1381 1:
 1382         movq    %rsp,%rdi
 1383         call    trap
 1384         movq    PCPU(CURTHREAD),%r8
 1385         movq    TD_PCB(%r8),%r8
 1386         movq    $0,PCB_FSBASE(%r8)
 1387         jmp     doreti
 1388 
 1389         ALIGN_TEXT
 1390         .globl  gsbase_load_fault
 1391 gsbase_load_fault:
 1392         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1393         testl   $PSL_I,TF_RFLAGS(%rsp)
 1394         jz      1f
 1395         sti
 1396 1:
 1397         movq    %rsp,%rdi
 1398         call    trap
 1399         movq    PCPU(CURTHREAD),%r8
 1400         movq    TD_PCB(%r8),%r8
 1401         movq    $0,PCB_GSBASE(%r8)
 1402         jmp     doreti
 1403 
 1404 #ifdef HWPMC_HOOKS
 1405         ENTRY(end_exceptions)
 1406 #endif

Cache object: 35aed9b8adb2c2da43b46039b16f13d8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.