The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/exception.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1990 William F. Jolitz.
    3  * Copyright (c) 1990 The Regents of the University of California.
    4  * Copyright (c) 2007-2018 The FreeBSD Foundation
    5  * All rights reserved.
    6  *
    7  * Portions of this software were developed by A. Joseph Koshy under
    8  * sponsorship from the FreeBSD Foundation and Google, Inc.
    9  *
   10  * Portions of this software were developed by
   11  * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
   12  * the FreeBSD Foundation.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  * $FreeBSD$
   39  */
   40 
   41 #include "opt_atpic.h"
   42 #include "opt_hwpmc_hooks.h"
   43 
   44 #include "assym.inc"
   45 
   46 #include <machine/psl.h>
   47 #include <machine/asmacros.h>
   48 #include <machine/trap.h>
   49 #include <machine/specialreg.h>
   50 
   51 #ifdef KDTRACE_HOOKS
   52         .bss
   53         .globl  dtrace_invop_jump_addr
   54         .align  8
   55         .type   dtrace_invop_jump_addr,@object
   56         .size   dtrace_invop_jump_addr,8
   57 dtrace_invop_jump_addr:
   58         .zero   8
   59         .globl  dtrace_invop_calltrap_addr
   60         .align  8
   61         .type   dtrace_invop_calltrap_addr,@object
   62         .size   dtrace_invop_calltrap_addr,8
   63 dtrace_invop_calltrap_addr:
   64         .zero   8
   65 #endif
   66         .text
   67 #ifdef HWPMC_HOOKS
   68         ENTRY(start_exceptions)
   69 #endif
   70 
   71 /*****************************************************************************/
   72 /* Trap handling                                                             */
   73 /*****************************************************************************/
   74 /*
   75  * Trap and fault vector routines.
   76  *
   77  * All traps are 'interrupt gates', SDT_SYSIGT.  An interrupt gate pushes
   78  * state on the stack but also disables interrupts.  This is important for
   79  * us for the use of the swapgs instruction.  We cannot be interrupted
   80  * until the GS.base value is correct.  For most traps, we automatically
   81  * then enable interrupts if the interrupted context had them enabled.
   82  * This is equivalent to the i386 port's use of SDT_SYS386TGT.
   83  *
   84  * The cpu will push a certain amount of state onto the kernel stack for
   85  * the current process.  See amd64/include/frame.h.
   86  * This includes the current RFLAGS (status register, which includes
   87  * the interrupt disable state prior to the trap), the code segment register,
   88  * and the return instruction pointer are pushed by the cpu.  The cpu
   89  * will also push an 'error' code for certain traps.  We push a dummy
   90  * error code for those traps where the cpu doesn't in order to maintain
   91  * a consistent frame.  We also push a contrived 'trap number'.
   92  *
   93  * The CPU does not push the general registers, so we must do that, and we
   94  * must restore them prior to calling 'iret'.  The CPU adjusts %cs and %ss
   95  * but does not mess with %ds, %es, %gs or %fs.  We swap the %gs base for
   96  * for the kernel mode operation shortly, without changes to the selector
   97  * loaded.  Since superuser long mode works with any selectors loaded into
   98  * segment registers other then %cs, which makes them mostly unused in long
   99  * mode, and kernel does not reference %fs, leave them alone.  The segment
  100  * registers are reloaded on return to the usermode.
  101  */
  102 
  103 MCOUNT_LABEL(user)
  104 MCOUNT_LABEL(btrap)
  105 
  106 /* Traps that we leave interrupts disabled for. */
  107         .macro  TRAP_NOEN       l, trapno
  108         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u
  109 \l\()_pti_k:
  110         subq    $TF_RIP,%rsp
  111         movl    $\trapno,TF_TRAPNO(%rsp)
  112         movq    $0,TF_ADDR(%rsp)
  113         movq    $0,TF_ERR(%rsp)
  114         jmp     alltraps_noen_k
  115 \l\()_pti_u:
  116         subq    $TF_RIP,%rsp
  117         movl    $\trapno,TF_TRAPNO(%rsp)
  118         movq    $0,TF_ADDR(%rsp)
  119         movq    $0,TF_ERR(%rsp)
  120         jmp     alltraps_noen_u
  121         
  122         .globl  X\l
  123         .type   X\l,@function
  124 X\l:
  125         subq    $TF_RIP,%rsp
  126         movl    $\trapno,TF_TRAPNO(%rsp)
  127         movq    $0,TF_ADDR(%rsp)
  128         movq    $0,TF_ERR(%rsp)
  129         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  130         jz      alltraps_noen_k
  131         swapgs
  132         lfence
  133         jmp     alltraps_noen_u
  134         .endm
  135 
  136         TRAP_NOEN       bpt, T_BPTFLT
  137 #ifdef KDTRACE_HOOKS
  138         TRAP_NOEN       dtrace_ret, T_DTRACE_RET
  139 #endif
  140 
  141 /* Regular traps; The cpu does not supply tf_err for these. */
  142         .macro  TRAP    l, trapno
  143         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u
  144 \l\()_pti_k:
  145         subq    $TF_RIP,%rsp
  146         movl    $\trapno,TF_TRAPNO(%rsp)
  147         movq    $0,TF_ADDR(%rsp)
  148         movq    $0,TF_ERR(%rsp)
  149         jmp     alltraps_k
  150 \l\()_pti_u:
  151         subq    $TF_RIP,%rsp
  152         movl    $\trapno,TF_TRAPNO(%rsp)
  153         movq    $0,TF_ADDR(%rsp)
  154         movq    $0,TF_ERR(%rsp)
  155         jmp     alltraps_u
  156 
  157         .globl  X\l
  158         .type   X\l,@function
  159 X\l:
  160         subq    $TF_RIP,%rsp
  161         movl    $\trapno,TF_TRAPNO(%rsp)
  162         movq    $0,TF_ADDR(%rsp)
  163         movq    $0,TF_ERR(%rsp)
  164         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  165         jz      alltraps_k
  166         swapgs
  167         lfence
  168         jmp     alltraps_u
  169         .endm
  170 
  171         TRAP    div, T_DIVIDE
  172         TRAP    ofl, T_OFLOW
  173         TRAP    bnd, T_BOUND
  174         TRAP    ill, T_PRIVINFLT
  175         TRAP    dna, T_DNA
  176         TRAP    fpusegm, T_FPOPFLT
  177         TRAP    rsvd, T_RESERVED
  178         TRAP    fpu, T_ARITHTRAP
  179         TRAP    xmm, T_XMMFLT
  180 
  181 /* This group of traps have tf_err already pushed by the cpu. */
  182         .macro  TRAP_ERR        l, trapno
  183         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u,has_err=1
  184 \l\()_pti_k:
  185         subq    $TF_ERR,%rsp
  186         movl    $\trapno,TF_TRAPNO(%rsp)
  187         movq    $0,TF_ADDR(%rsp)
  188         jmp     alltraps_k
  189 \l\()_pti_u:
  190         subq    $TF_ERR,%rsp
  191         movl    $\trapno,TF_TRAPNO(%rsp)
  192         movq    $0,TF_ADDR(%rsp)
  193         jmp     alltraps_u
  194         .globl  X\l
  195         .type   X\l,@function
  196 X\l:
  197         subq    $TF_ERR,%rsp
  198         movl    $\trapno,TF_TRAPNO(%rsp)
  199         movq    $0,TF_ADDR(%rsp)
  200         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  201         jz      alltraps_k
  202         swapgs
  203         lfence
  204         jmp     alltraps_u
  205         .endm
  206 
  207         TRAP_ERR        tss, T_TSSFLT
  208         TRAP_ERR        align, T_ALIGNFLT
  209 
  210         /*
  211          * alltraps_u/k entry points.
  212          * SWAPGS must be already performed by prologue,
  213          * if this is the first time in the kernel from userland.
  214          * Reenable interrupts if they were enabled before the trap.
  215          * This approximates SDT_SYS386TGT on the i386 port.
  216          */
  217         SUPERALIGN_TEXT
  218         .globl  alltraps_u
  219         .type   alltraps_u,@function
  220 alltraps_u:
  221         movq    %rdi,TF_RDI(%rsp)
  222         movq    %rdx,TF_RDX(%rsp)
  223         movq    %rax,TF_RAX(%rsp)
  224         movq    %rcx,TF_RCX(%rsp)
  225         movq    PCPU(CURPCB),%rdi
  226         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  227         call    handle_ibrs_entry
  228         jmp     alltraps_save_segs
  229         SUPERALIGN_TEXT
  230         .globl  alltraps_k
  231         .type   alltraps_k,@function
  232 alltraps_k:
  233         lfence
  234         movq    %rdi,TF_RDI(%rsp)
  235         movq    %rdx,TF_RDX(%rsp)
  236         movq    %rax,TF_RAX(%rsp)
  237         movq    %rcx,TF_RCX(%rsp)
  238 alltraps_save_segs:
  239         SAVE_SEGS
  240         testl   $PSL_I,TF_RFLAGS(%rsp)
  241         jz      alltraps_pushregs_no_rax
  242         sti
  243 alltraps_pushregs_no_rax:
  244         movq    %rsi,TF_RSI(%rsp)
  245         movq    %r8,TF_R8(%rsp)
  246         movq    %r9,TF_R9(%rsp)
  247         movq    %rbx,TF_RBX(%rsp)
  248         movq    %rbp,TF_RBP(%rsp)
  249         movq    %r10,TF_R10(%rsp)
  250         movq    %r11,TF_R11(%rsp)
  251         movq    %r12,TF_R12(%rsp)
  252         movq    %r13,TF_R13(%rsp)
  253         movq    %r14,TF_R14(%rsp)
  254         movq    %r15,TF_R15(%rsp)
  255         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  256         pushfq
  257         andq    $~(PSL_D | PSL_AC),(%rsp)
  258         popfq
  259         FAKE_MCOUNT(TF_RIP(%rsp))
  260 #ifdef KDTRACE_HOOKS
  261         /*
  262          * DTrace Function Boundary Trace (fbt) probes are triggered
  263          * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint
  264          * interrupt. For all other trap types, just handle them in
  265          * the usual way.
  266          */
  267         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  268         jnz     calltrap                /* ignore userland traps */
  269         cmpl    $T_BPTFLT,TF_TRAPNO(%rsp)
  270         jne     calltrap
  271 
  272         /* Check if there is no DTrace hook registered. */
  273         cmpq    $0,dtrace_invop_jump_addr
  274         je      calltrap
  275 
  276         /*
  277          * Set our jump address for the jump back in the event that
  278          * the breakpoint wasn't caused by DTrace at all.
  279          */
  280         movq    $calltrap,dtrace_invop_calltrap_addr(%rip)
  281 
  282         /* Jump to the code hooked in by DTrace. */
  283         jmpq    *dtrace_invop_jump_addr
  284 #endif
  285         .globl  calltrap
  286         .type   calltrap,@function
  287 calltrap:
  288         movq    %rsp,%rdi
  289         call    trap_check
  290         MEXITCOUNT
  291         jmp     doreti                  /* Handle any pending ASTs */
  292 
  293         /*
  294          * alltraps_noen_u/k entry points.
  295          * Again, SWAPGS must be already performed by prologue, if needed.
  296          * Unlike alltraps above, we want to leave the interrupts disabled.
  297          * This corresponds to SDT_SYS386IGT on the i386 port.
  298          */
  299         SUPERALIGN_TEXT
  300         .globl  alltraps_noen_u
  301         .type   alltraps_noen_u,@function
  302 alltraps_noen_u:
  303         movq    %rdi,TF_RDI(%rsp)
  304         movq    PCPU(CURPCB),%rdi
  305         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  306         jmp     alltraps_noen_save_segs
  307         SUPERALIGN_TEXT
  308         .globl  alltraps_noen_k
  309         .type   alltraps_noen_k,@function
  310 alltraps_noen_k:
  311         lfence
  312         movq    %rdi,TF_RDI(%rsp)
  313 alltraps_noen_save_segs:
  314         SAVE_SEGS
  315         movq    %rdx,TF_RDX(%rsp)
  316         movq    %rax,TF_RAX(%rsp)
  317         movq    %rcx,TF_RCX(%rsp)
  318         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  319         jz      alltraps_pushregs_no_rax
  320         call    handle_ibrs_entry
  321         jmp     alltraps_pushregs_no_rax
  322 
  323 IDTVEC(dblfault)
  324         subq    $TF_ERR,%rsp
  325         movl    $T_DOUBLEFLT,TF_TRAPNO(%rsp)
  326         movq    $0,TF_ADDR(%rsp)
  327         movq    $0,TF_ERR(%rsp)
  328         movq    %rdi,TF_RDI(%rsp)
  329         movq    %rsi,TF_RSI(%rsp)
  330         movq    %rdx,TF_RDX(%rsp)
  331         movq    %rcx,TF_RCX(%rsp)
  332         movq    %r8,TF_R8(%rsp)
  333         movq    %r9,TF_R9(%rsp)
  334         movq    %rax,TF_RAX(%rsp)
  335         movq    %rbx,TF_RBX(%rsp)
  336         movq    %rbp,TF_RBP(%rsp)
  337         movq    %r10,TF_R10(%rsp)
  338         movq    %r11,TF_R11(%rsp)
  339         movq    %r12,TF_R12(%rsp)
  340         movq    %r13,TF_R13(%rsp)
  341         movq    %r14,TF_R14(%rsp)
  342         movq    %r15,TF_R15(%rsp)
  343         SAVE_SEGS
  344         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  345         pushfq
  346         andq    $~(PSL_D | PSL_AC),(%rsp)
  347         popfq
  348         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  349         jz      1f                      /* already running with kernel GS.base */
  350         swapgs
  351 1:      lfence
  352         movq    %cr3,%rax
  353         movq    %rax,PCPU(SAVED_UCR3)
  354         movq    PCPU(KCR3),%rax
  355         cmpq    $~0,%rax
  356         je      2f
  357         movq    %rax,%cr3
  358 2:      movq    %rsp,%rdi
  359         call    dblfault_handler
  360 3:      hlt
  361         jmp     3b
  362 
  363         ALIGN_TEXT
  364 IDTVEC(page_pti)
  365         testb   $SEL_RPL_MASK,PTI_CS-PTI_ERR(%rsp)
  366         jz      page_k
  367         swapgs
  368         lfence
  369         pushq   %rax
  370         movq    %cr3,%rax
  371         movq    %rax,PCPU(SAVED_UCR3)
  372         cmpq    $~0,PCPU(UCR3)
  373         jne     1f
  374         popq    %rax
  375         jmp     page_u
  376 1:      pushq   %rdx
  377         PTI_UUENTRY has_err=1
  378         jmp     page_u
  379         ALIGN_TEXT
  380 IDTVEC(page)
  381         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) /* Did we come from kernel? */
  382         jnz     page_u_swapgs           /* already running with kernel GS.base */
  383 page_k:
  384         lfence
  385         subq    $TF_ERR,%rsp
  386         movq    %rdi,TF_RDI(%rsp)       /* free up GP registers */
  387         movq    %rax,TF_RAX(%rsp)
  388         movq    %rdx,TF_RDX(%rsp)
  389         movq    %rcx,TF_RCX(%rsp)
  390         jmp     page_cr2
  391         ALIGN_TEXT
  392 page_u_swapgs:
  393         swapgs
  394         lfence
  395 page_u:
  396         subq    $TF_ERR,%rsp
  397         movq    %rdi,TF_RDI(%rsp)
  398         movq    %rax,TF_RAX(%rsp)
  399         movq    %rdx,TF_RDX(%rsp)
  400         movq    %rcx,TF_RCX(%rsp)
  401         movq    PCPU(CURPCB),%rdi
  402         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  403         movq    PCPU(SAVED_UCR3),%rax
  404         movq    %rax,PCB_SAVED_UCR3(%rdi)
  405         call    handle_ibrs_entry
  406 page_cr2:
  407         movq    %cr2,%rdi               /* preserve %cr2 before ..  */
  408         movq    %rdi,TF_ADDR(%rsp)      /* enabling interrupts. */
  409         SAVE_SEGS
  410         movl    $T_PAGEFLT,TF_TRAPNO(%rsp)
  411         testl   $PSL_I,TF_RFLAGS(%rsp)
  412         jz      alltraps_pushregs_no_rax
  413         sti
  414         jmp     alltraps_pushregs_no_rax
  415 
  416         /*
  417          * We have to special-case this one.  If we get a trap in doreti() at
  418          * the iretq stage, we'll reenter with the wrong gs state.  We'll have
  419          * to do a special the swapgs in this case even coming from the kernel.
  420          * XXX linux has a trap handler for their equivalent of load_gs().
  421          *
  422          * On the stack, we have the hardware interrupt frame to return
  423          * to usermode (faulted) and another frame with error code, for
  424          * fault.  For PTI, copy both frames to the main thread stack.
  425          * Handle the potential 16-byte alignment adjustment incurred
  426          * during the second fault by copying both frames independently
  427          * while unwinding the stack in between.
  428          */
  429         .macro PROTF_ENTRY name,trapno
  430 \name\()_pti_doreti:
  431         swapgs
  432         lfence
  433         cmpq    $~0,PCPU(UCR3)
  434         je      1f
  435         pushq   %rax
  436         pushq   %rdx
  437         movq    PCPU(KCR3),%rax
  438         movq    %rax,%cr3
  439         movq    PCPU(RSP0),%rax
  440         subq    $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
  441         MOVE_STACKS     (PTI_SIZE / 8)
  442         addq    $PTI_SIZE,%rax
  443         movq    PTI_RSP(%rsp),%rsp
  444         MOVE_STACKS     (PTI_SIZE / 8 - 3)
  445         subq    $PTI_SIZE,%rax
  446         movq    %rax,%rsp
  447         popq    %rdx
  448         popq    %rax
  449 1:      swapgs
  450         jmp     X\name
  451 IDTVEC(\name\()_pti)
  452         cmpq    $doreti_iret,PTI_RIP-2*8(%rsp)
  453         je      \name\()_pti_doreti
  454         testb   $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
  455         jz      X\name          /* lfence is not needed until %gs: use */
  456         PTI_UENTRY has_err=1
  457         swapgs  /* fence provided by PTI_UENTRY */
  458 IDTVEC(\name)
  459         subq    $TF_ERR,%rsp
  460         movl    $\trapno,TF_TRAPNO(%rsp)
  461         jmp     prot_addrf
  462         .endm
  463 
  464         PROTF_ENTRY     missing, T_SEGNPFLT
  465         PROTF_ENTRY     stk, T_STKFLT
  466         PROTF_ENTRY     prot, T_PROTFLT
  467 
  468 prot_addrf:
  469         movq    $0,TF_ADDR(%rsp)
  470         movq    %rdi,TF_RDI(%rsp)       /* free up a GP register */
  471         movq    %rax,TF_RAX(%rsp)
  472         movq    %rdx,TF_RDX(%rsp)
  473         movq    %rcx,TF_RCX(%rsp)
  474         movw    %fs,TF_FS(%rsp)
  475         movw    %gs,TF_GS(%rsp)
  476         leaq    doreti_iret(%rip),%rdi
  477         cmpq    %rdi,TF_RIP(%rsp)
  478         je      5f                      /* kernel but with user gsbase!! */
  479         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  480         jz      6f                      /* already running with kernel GS.base */
  481         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  482         jz      2f
  483         cmpw    $KUF32SEL,TF_FS(%rsp)
  484         jne     1f
  485         rdfsbase %rax
  486 1:      cmpw    $KUG32SEL,TF_GS(%rsp)
  487         jne     2f
  488         rdgsbase %rdx
  489 2:      swapgs
  490         lfence
  491         movq    PCPU(CURPCB),%rdi
  492         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  493         jz      4f
  494         cmpw    $KUF32SEL,TF_FS(%rsp)
  495         jne     3f
  496         movq    %rax,PCB_FSBASE(%rdi)
  497 3:      cmpw    $KUG32SEL,TF_GS(%rsp)
  498         jne     4f
  499         movq    %rdx,PCB_GSBASE(%rdi)
  500         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)  /* full iret from user #gp */
  501 4:      call    handle_ibrs_entry
  502         movw    %es,TF_ES(%rsp)
  503         movw    %ds,TF_DS(%rsp)
  504         testl   $PSL_I,TF_RFLAGS(%rsp)
  505         jz      alltraps_pushregs_no_rax
  506         sti
  507         jmp     alltraps_pushregs_no_rax
  508 
  509 5:      swapgs
  510 6:      lfence
  511         movq    PCPU(CURPCB),%rdi
  512         jmp     4b
  513 
  514 /*
  515  * Fast syscall entry point.  We enter here with just our new %cs/%ss set,
  516  * and the new privilige level.  We are still running on the old user stack
  517  * pointer.  We have to juggle a few things around to find our stack etc.
  518  * swapgs gives us access to our PCPU space only.
  519  *
  520  * We do not support invoking this from a custom segment registers,
  521  * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
  522  */
  523         SUPERALIGN_TEXT
  524 IDTVEC(fast_syscall_pti)
  525         swapgs
  526         lfence
  527         cmpq    $~0,PCPU(UCR3)
  528         je      fast_syscall_common
  529         movq    %rax,PCPU(SCRATCH_RAX)
  530         movq    PCPU(KCR3),%rax
  531         movq    %rax,%cr3
  532         movq    PCPU(SCRATCH_RAX),%rax
  533         jmp     fast_syscall_common
  534         SUPERALIGN_TEXT
  535 IDTVEC(fast_syscall)
  536         swapgs
  537         lfence
  538 fast_syscall_common:
  539         movq    %rsp,PCPU(SCRATCH_RSP)
  540         movq    PCPU(RSP0),%rsp
  541         /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
  542         subq    $TF_SIZE,%rsp
  543         /* defer TF_RSP till we have a spare register */
  544         movq    %r11,TF_RFLAGS(%rsp)
  545         movq    %rcx,TF_RIP(%rsp)       /* %rcx original value is in %r10 */
  546         movq    PCPU(SCRATCH_RSP),%r11  /* %r11 already saved */
  547         movq    %r11,TF_RSP(%rsp)       /* user stack pointer */
  548         /*
  549          * Save a few arg registers early to free them for use in
  550          * handle_ibrs_entry().  %r10 is especially tricky.  It is not an
  551          * arg register, but it holds the arg register %rcx.  Profiling
  552          * preserves %rcx, but may clobber %r10.  Profiling may also
  553          * clobber %r11, but %r11 (original %eflags) has been saved.
  554          */
  555         movq    %rax,TF_RAX(%rsp)       /* syscall number */
  556         movq    %rdx,TF_RDX(%rsp)       /* arg 3 */
  557         movq    %r10,TF_RCX(%rsp)       /* arg 4 */
  558         SAVE_SEGS
  559         call    handle_ibrs_entry
  560         movq    PCPU(CURPCB),%r11
  561         andl    $~PCB_FULL_IRET,PCB_FLAGS(%r11)
  562         sti
  563         movq    $KUDSEL,TF_SS(%rsp)
  564         movq    $KUCSEL,TF_CS(%rsp)
  565         movq    $2,TF_ERR(%rsp)
  566         movq    %rdi,TF_RDI(%rsp)       /* arg 1 */
  567         movq    %rsi,TF_RSI(%rsp)       /* arg 2 */
  568         movq    %r8,TF_R8(%rsp)         /* arg 5 */
  569         movq    %r9,TF_R9(%rsp)         /* arg 6 */
  570         movq    %rbx,TF_RBX(%rsp)       /* C preserved */
  571         movq    %rbp,TF_RBP(%rsp)       /* C preserved */
  572         movq    %r12,TF_R12(%rsp)       /* C preserved */
  573         movq    %r13,TF_R13(%rsp)       /* C preserved */
  574         movq    %r14,TF_R14(%rsp)       /* C preserved */
  575         movq    %r15,TF_R15(%rsp)       /* C preserved */
  576         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  577         FAKE_MCOUNT(TF_RIP(%rsp))
  578         movq    PCPU(CURTHREAD),%rdi
  579         movq    %rsp,TD_FRAME(%rdi)
  580         movl    TF_RFLAGS(%rsp),%esi
  581         andl    $PSL_T,%esi
  582         call    amd64_syscall
  583 1:      movq    PCPU(CURPCB),%rax
  584         /* Disable interrupts before testing PCB_FULL_IRET. */
  585         cli
  586         testl   $PCB_FULL_IRET,PCB_FLAGS(%rax)
  587         jnz     4f
  588         /* Check for and handle AST's on return to userland. */
  589         movq    PCPU(CURTHREAD),%rax
  590         testl   $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
  591         jne     3f
  592         call    handle_ibrs_exit
  593         callq   *mds_handler
  594         /* Restore preserved registers. */
  595         MEXITCOUNT
  596         movq    TF_RDI(%rsp),%rdi       /* bonus; preserve arg 1 */
  597         movq    TF_RSI(%rsp),%rsi       /* bonus: preserve arg 2 */
  598         movq    TF_RDX(%rsp),%rdx       /* return value 2 */
  599         movq    TF_RAX(%rsp),%rax       /* return value 1 */
  600         movq    TF_RFLAGS(%rsp),%r11    /* original %rflags */
  601         movq    TF_RIP(%rsp),%rcx       /* original %rip */
  602         movq    TF_RSP(%rsp),%rsp       /* user stack pointer */
  603         xorl    %r8d,%r8d               /* zero the rest of GPRs */
  604         xorl    %r10d,%r10d
  605         cmpq    $~0,PCPU(UCR3)
  606         je      2f
  607         movq    PCPU(UCR3),%r9
  608         movq    %r9,%cr3
  609 2:      xorl    %r9d,%r9d
  610         swapgs
  611         sysretq
  612 
  613 3:      /* AST scheduled. */
  614         sti
  615         movq    %rsp,%rdi
  616         call    ast
  617         jmp     1b
  618 
  619 4:      /* Requested full context restore, use doreti for that. */
  620         MEXITCOUNT
  621         jmp     doreti
  622 
  623 /*
  624  * Here for CYA insurance, in case a "syscall" instruction gets
  625  * issued from 32 bit compatibility mode. MSR_CSTAR has to point
  626  * to *something* if EFER_SCE is enabled.
  627  */
  628 IDTVEC(fast_syscall32)
  629         sysret
  630 
  631 /*
  632  * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
  633  * generation of exception until the next instruction is executed,
  634  * which might be a kernel entry.  So we must execute the handler
  635  * on IST stack and be ready for non-kernel GSBASE.
  636  */
  637 IDTVEC(dbg)
  638         subq    $TF_RIP,%rsp
  639         movl    $(T_TRCTRAP),TF_TRAPNO(%rsp)
  640         movq    $0,TF_ADDR(%rsp)
  641         movq    $0,TF_ERR(%rsp)
  642         movq    %rdi,TF_RDI(%rsp)
  643         movq    %rsi,TF_RSI(%rsp)
  644         movq    %rdx,TF_RDX(%rsp)
  645         movq    %rcx,TF_RCX(%rsp)
  646         movq    %r8,TF_R8(%rsp)
  647         movq    %r9,TF_R9(%rsp)
  648         movq    %rax,TF_RAX(%rsp)
  649         movq    %rbx,TF_RBX(%rsp)
  650         movq    %rbp,TF_RBP(%rsp)
  651         movq    %r10,TF_R10(%rsp)
  652         movq    %r11,TF_R11(%rsp)
  653         movq    %r12,TF_R12(%rsp)
  654         movq    %r13,TF_R13(%rsp)
  655         movq    %r14,TF_R14(%rsp)
  656         movq    %r15,TF_R15(%rsp)
  657         SAVE_SEGS
  658         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  659         pushfq
  660         andq    $~(PSL_D | PSL_AC),(%rsp)
  661         popfq
  662         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  663         jnz     dbg_fromuserspace
  664         lfence
  665         /*
  666          * We've interrupted the kernel.  Preserve GS.base in %r12,
  667          * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
  668          */
  669         movl    $MSR_GSBASE,%ecx
  670         rdmsr
  671         movq    %rax,%r12
  672         shlq    $32,%rdx
  673         orq     %rdx,%r12
  674         /* Retrieve and load the canonical value for GS.base. */
  675         movq    TF_SIZE(%rsp),%rdx
  676         movl    %edx,%eax
  677         shrq    $32,%rdx
  678         wrmsr
  679         movq    %cr3,%r13
  680         movq    PCPU(KCR3),%rax
  681         cmpq    $~0,%rax
  682         je      1f
  683         movq    %rax,%cr3
  684 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  685         je      2f
  686         movl    $MSR_IA32_SPEC_CTRL,%ecx
  687         rdmsr
  688         movl    %eax,%r14d
  689         call    handle_ibrs_entry
  690 2:      FAKE_MCOUNT(TF_RIP(%rsp))
  691         movq    %rsp,%rdi
  692         call    trap
  693         MEXITCOUNT
  694         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  695         je      3f
  696         movl    %r14d,%eax
  697         xorl    %edx,%edx
  698         movl    $MSR_IA32_SPEC_CTRL,%ecx
  699         wrmsr
  700         /*
  701          * Put back the preserved MSR_GSBASE value.
  702          */
  703 3:      movl    $MSR_GSBASE,%ecx
  704         movq    %r12,%rdx
  705         movl    %edx,%eax
  706         shrq    $32,%rdx
  707         wrmsr
  708         movq    %r13,%cr3
  709         RESTORE_REGS
  710         addq    $TF_RIP,%rsp
  711         jmp     doreti_iret
  712 dbg_fromuserspace:
  713         /*
  714          * Switch to kernel GSBASE and kernel page table, and copy frame
  715          * from the IST stack to the normal kernel stack, since trap()
  716          * re-enables interrupts, and since we might trap on DB# while
  717          * in trap().
  718          */
  719         swapgs
  720         lfence
  721         movq    PCPU(KCR3),%rax
  722         cmpq    $~0,%rax
  723         je      1f
  724         movq    %rax,%cr3
  725 1:      movq    PCPU(RSP0),%rax
  726         movl    $TF_SIZE,%ecx
  727         subq    %rcx,%rax
  728         movq    %rax,%rdi
  729         movq    %rsp,%rsi
  730         rep;movsb
  731         movq    %rax,%rsp
  732         call    handle_ibrs_entry
  733         movq    PCPU(CURPCB),%rdi
  734         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)
  735         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  736         jz      3f
  737         cmpw    $KUF32SEL,TF_FS(%rsp)
  738         jne     2f
  739         rdfsbase %rax
  740         movq    %rax,PCB_FSBASE(%rdi)
  741 2:      cmpw    $KUG32SEL,TF_GS(%rsp)
  742         jne     3f
  743         movl    $MSR_KGSBASE,%ecx
  744         rdmsr
  745         shlq    $32,%rdx
  746         orq     %rdx,%rax
  747         movq    %rax,PCB_GSBASE(%rdi)
  748 3:      jmp     calltrap
  749 
  750 /*
  751  * NMI handling is special.
  752  *
  753  * First, NMIs do not respect the state of the processor's RFLAGS.IF
  754  * bit.  The NMI handler may be entered at any time, including when
  755  * the processor is in a critical section with RFLAGS.IF == 0.
  756  * The processor's GS.base value could be invalid on entry to the
  757  * handler.
  758  *
  759  * Second, the processor treats NMIs specially, blocking further NMIs
  760  * until an 'iretq' instruction is executed.  We thus need to execute
  761  * the NMI handler with interrupts disabled, to prevent a nested interrupt
  762  * from executing an 'iretq' instruction and inadvertently taking the
  763  * processor out of NMI mode.
  764  *
  765  * Third, the NMI handler runs on its own stack (tss_ist2). The canonical
  766  * GS.base value for the processor is stored just above the bottom of its
  767  * NMI stack.  For NMIs taken from kernel mode, the current value in
  768  * the processor's GS.base is saved at entry to C-preserved register %r12,
  769  * the canonical value for GS.base is then loaded into the processor, and
  770  * the saved value is restored at exit time.  For NMIs taken from user mode,
  771  * the cheaper 'SWAPGS' instructions are used for swapping GS.base.
  772  */
  773 
  774 IDTVEC(nmi)
  775         subq    $TF_RIP,%rsp
  776         movl    $(T_NMI),TF_TRAPNO(%rsp)
  777         movq    $0,TF_ADDR(%rsp)
  778         movq    $0,TF_ERR(%rsp)
  779         movq    %rdi,TF_RDI(%rsp)
  780         movq    %rsi,TF_RSI(%rsp)
  781         movq    %rdx,TF_RDX(%rsp)
  782         movq    %rcx,TF_RCX(%rsp)
  783         movq    %r8,TF_R8(%rsp)
  784         movq    %r9,TF_R9(%rsp)
  785         movq    %rax,TF_RAX(%rsp)
  786         movq    %rbx,TF_RBX(%rsp)
  787         movq    %rbp,TF_RBP(%rsp)
  788         movq    %r10,TF_R10(%rsp)
  789         movq    %r11,TF_R11(%rsp)
  790         movq    %r12,TF_R12(%rsp)
  791         movq    %r13,TF_R13(%rsp)
  792         movq    %r14,TF_R14(%rsp)
  793         movq    %r15,TF_R15(%rsp)
  794         SAVE_SEGS
  795         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  796         pushfq
  797         andq    $~(PSL_D | PSL_AC),(%rsp)
  798         popfq
  799         xorl    %ebx,%ebx
  800         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  801         jnz     nmi_fromuserspace
  802         /*
  803          * We've interrupted the kernel.  Preserve GS.base in %r12,
  804          * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
  805          */
  806         lfence
  807         movl    $MSR_GSBASE,%ecx
  808         rdmsr
  809         movq    %rax,%r12
  810         shlq    $32,%rdx
  811         orq     %rdx,%r12
  812         /* Retrieve and load the canonical value for GS.base. */
  813         movq    TF_SIZE(%rsp),%rdx
  814         movl    %edx,%eax
  815         shrq    $32,%rdx
  816         wrmsr
  817         movq    %cr3,%r13
  818         movq    PCPU(KCR3),%rax
  819         cmpq    $~0,%rax
  820         je      1f
  821         movq    %rax,%cr3
  822 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  823         je      nmi_calltrap
  824         movl    $MSR_IA32_SPEC_CTRL,%ecx
  825         rdmsr
  826         movl    %eax,%r14d
  827         call    handle_ibrs_entry
  828         jmp     nmi_calltrap
  829 nmi_fromuserspace:
  830         incl    %ebx
  831         swapgs
  832         lfence
  833         movq    %cr3,%r13
  834         movq    PCPU(KCR3),%rax
  835         cmpq    $~0,%rax
  836         je      1f
  837         movq    %rax,%cr3
  838 1:      call    handle_ibrs_entry
  839         movq    PCPU(CURPCB),%rdi
  840         testq   %rdi,%rdi
  841         jz      3f
  842         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)
  843         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  844         jz      3f
  845         cmpw    $KUF32SEL,TF_FS(%rsp)
  846         jne     2f
  847         rdfsbase %rax
  848         movq    %rax,PCB_FSBASE(%rdi)
  849 2:      cmpw    $KUG32SEL,TF_GS(%rsp)
  850         jne     3f
  851         movl    $MSR_KGSBASE,%ecx
  852         rdmsr
  853         shlq    $32,%rdx
  854         orq     %rdx,%rax
  855         movq    %rax,PCB_GSBASE(%rdi)
  856 3:
  857 /* Note: this label is also used by ddb and gdb: */
  858 nmi_calltrap:
  859         FAKE_MCOUNT(TF_RIP(%rsp))
  860         movq    %rsp,%rdi
  861         call    trap
  862         MEXITCOUNT
  863 #ifdef HWPMC_HOOKS
  864         /*
  865          * Capture a userspace callchain if needed.
  866          *
  867          * - Check if the current trap was from user mode.
  868          * - Check if the current thread is valid.
  869          * - Check if the thread requires a user call chain to be
  870          *   captured.
  871          *
  872          * We are still in NMI mode at this point.
  873          */
  874         testl   %ebx,%ebx
  875         jz      nocallchain     /* not from userspace */
  876         movq    PCPU(CURTHREAD),%rax
  877         orq     %rax,%rax       /* curthread present? */
  878         jz      nocallchain
  879         /*
  880          * Move execution to the regular kernel stack, because we
  881          * committed to return through doreti.
  882          */
  883         movq    %rsp,%rsi       /* source stack pointer */
  884         movq    $TF_SIZE,%rcx
  885         movq    PCPU(RSP0),%rdx
  886         subq    %rcx,%rdx
  887         movq    %rdx,%rdi       /* destination stack pointer */
  888         shrq    $3,%rcx         /* trap frame size in long words */
  889         pushfq
  890         andq    $~(PSL_D | PSL_AC),(%rsp)
  891         popfq
  892         rep
  893         movsq                   /* copy trapframe */
  894         movq    %rdx,%rsp       /* we are on the regular kstack */
  895 
  896         testl   $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
  897         jz      nocallchain
  898         /*
  899          * A user callchain is to be captured, so:
  900          * - Take the processor out of "NMI" mode by faking an "iret",
  901          *   to allow for nested NMI interrupts.
  902          * - Enable interrupts, so that copyin() can work.
  903          */
  904         movl    %ss,%eax
  905         pushq   %rax            /* tf_ss */
  906         pushq   %rdx            /* tf_rsp (on kernel stack) */
  907         pushfq                  /* tf_rflags */
  908         movl    %cs,%eax
  909         pushq   %rax            /* tf_cs */
  910         pushq   $outofnmi       /* tf_rip */
  911         iretq
  912 outofnmi:
  913         /*
  914          * At this point the processor has exited NMI mode and is running
  915          * with interrupts turned off on the normal kernel stack.
  916          *
  917          * If a pending NMI gets recognized at or after this point, it
  918          * will cause a kernel callchain to be traced.
  919          *
  920          * We turn interrupts back on, and call the user callchain capture hook.
  921          */
  922         movq    pmc_hook,%rax
  923         orq     %rax,%rax
  924         jz      nocallchain
  925         movq    PCPU(CURTHREAD),%rdi            /* thread */
  926         movq    $PMC_FN_USER_CALLCHAIN,%rsi     /* command */
  927         movq    %rsp,%rdx                       /* frame */
  928         sti
  929         call    *%rax
  930         cli
  931 nocallchain:
  932 #endif
  933         testl   %ebx,%ebx       /* %ebx == 0 => return to userland */
  934         jnz     doreti_exit
  935         /*
  936          * Restore speculation control MSR, if preserved.
  937          */
  938         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  939         je      1f
  940         movl    %r14d,%eax
  941         xorl    %edx,%edx
  942         movl    $MSR_IA32_SPEC_CTRL,%ecx
  943         wrmsr
  944         /*
  945          * Put back the preserved MSR_GSBASE value.
  946          */
  947 1:      movl    $MSR_GSBASE,%ecx
  948         movq    %r12,%rdx
  949         movl    %edx,%eax
  950         shrq    $32,%rdx
  951         wrmsr
  952         cmpb    $0, nmi_flush_l1d_sw(%rip)
  953         je      2f
  954         call    flush_l1d_sw            /* bhyve L1TF assist */
  955 2:      movq    %r13,%cr3
  956         RESTORE_REGS
  957         addq    $TF_RIP,%rsp
  958         jmp     doreti_iret
  959 
  960 /*
  961  * MC# handling is similar to NMI.
  962  *
  963  * As with NMIs, machine check exceptions do not respect RFLAGS.IF and
  964  * can occur at any time with a GS.base value that does not correspond
  965  * to the privilege level in CS.
  966  *
  967  * Machine checks are not unblocked by iretq, but it is best to run
  968  * the handler with interrupts disabled since the exception may have
  969  * interrupted a critical section.
  970  *
  971  * The MC# handler runs on its own stack (tss_ist3).  The canonical
  972  * GS.base value for the processor is stored just above the bottom of
  973  * its MC# stack.  For exceptions taken from kernel mode, the current
  974  * value in the processor's GS.base is saved at entry to C-preserved
  975  * register %r12, the canonical value for GS.base is then loaded into
  976  * the processor, and the saved value is restored at exit time.  For
  977  * exceptions taken from user mode, the cheaper 'SWAPGS' instructions
  978  * are used for swapping GS.base.
  979  */
  980 
  981 IDTVEC(mchk)
  982         subq    $TF_RIP,%rsp
  983         movl    $(T_MCHK),TF_TRAPNO(%rsp)
  984         movq    $0,TF_ADDR(%rsp)
  985         movq    $0,TF_ERR(%rsp)
  986         movq    %rdi,TF_RDI(%rsp)
  987         movq    %rsi,TF_RSI(%rsp)
  988         movq    %rdx,TF_RDX(%rsp)
  989         movq    %rcx,TF_RCX(%rsp)
  990         movq    %r8,TF_R8(%rsp)
  991         movq    %r9,TF_R9(%rsp)
  992         movq    %rax,TF_RAX(%rsp)
  993         movq    %rbx,TF_RBX(%rsp)
  994         movq    %rbp,TF_RBP(%rsp)
  995         movq    %r10,TF_R10(%rsp)
  996         movq    %r11,TF_R11(%rsp)
  997         movq    %r12,TF_R12(%rsp)
  998         movq    %r13,TF_R13(%rsp)
  999         movq    %r14,TF_R14(%rsp)
 1000         movq    %r15,TF_R15(%rsp)
 1001         SAVE_SEGS
 1002         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
 1003         pushfq
 1004         andq    $~(PSL_D | PSL_AC),(%rsp)
 1005         popfq
 1006         xorl    %ebx,%ebx
 1007         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1008         jnz     mchk_fromuserspace
 1009         /*
 1010          * We've interrupted the kernel.  Preserve GS.base in %r12,
 1011          * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
 1012          */
 1013         movl    $MSR_GSBASE,%ecx
 1014         rdmsr
 1015         movq    %rax,%r12
 1016         shlq    $32,%rdx
 1017         orq     %rdx,%r12
 1018         /* Retrieve and load the canonical value for GS.base. */
 1019         movq    TF_SIZE(%rsp),%rdx
 1020         movl    %edx,%eax
 1021         shrq    $32,%rdx
 1022         wrmsr
 1023         movq    %cr3,%r13
 1024         movq    PCPU(KCR3),%rax
 1025         cmpq    $~0,%rax
 1026         je      1f
 1027         movq    %rax,%cr3
 1028 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
 1029         je      mchk_calltrap
 1030         movl    $MSR_IA32_SPEC_CTRL,%ecx
 1031         rdmsr
 1032         movl    %eax,%r14d
 1033         call    handle_ibrs_entry
 1034         jmp     mchk_calltrap
 1035 mchk_fromuserspace:
 1036         incl    %ebx
 1037         swapgs
 1038         movq    %cr3,%r13
 1039         movq    PCPU(KCR3),%rax
 1040         cmpq    $~0,%rax
 1041         je      1f
 1042         movq    %rax,%cr3
 1043 1:      call    handle_ibrs_entry
 1044 /* Note: this label is also used by ddb and gdb: */
 1045 mchk_calltrap:
 1046         FAKE_MCOUNT(TF_RIP(%rsp))
 1047         movq    %rsp,%rdi
 1048         call    mca_intr
 1049         MEXITCOUNT
 1050         testl   %ebx,%ebx       /* %ebx == 0 => return to userland */
 1051         jnz     doreti_exit
 1052         /*
 1053          * Restore speculation control MSR, if preserved.
 1054          */
 1055         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
 1056         je      1f
 1057         movl    %r14d,%eax
 1058         xorl    %edx,%edx
 1059         movl    $MSR_IA32_SPEC_CTRL,%ecx
 1060         wrmsr
 1061         /*
 1062          * Put back the preserved MSR_GSBASE value.
 1063          */
 1064 1:      movl    $MSR_GSBASE,%ecx
 1065         movq    %r12,%rdx
 1066         movl    %edx,%eax
 1067         shrq    $32,%rdx
 1068         wrmsr
 1069         movq    %r13,%cr3
 1070         RESTORE_REGS
 1071         addq    $TF_RIP,%rsp
 1072         jmp     doreti_iret
 1073 
 1074 ENTRY(fork_trampoline)
 1075         movq    %r12,%rdi               /* function */
 1076         movq    %rbx,%rsi               /* arg1 */
 1077         movq    %rsp,%rdx               /* trapframe pointer */
 1078         call    fork_exit
 1079         MEXITCOUNT
 1080         jmp     doreti                  /* Handle any ASTs */
 1081 
 1082 /*
 1083  * To efficiently implement classification of trap and interrupt handlers
 1084  * for profiling, there must be only trap handlers between the labels btrap
 1085  * and bintr, and only interrupt handlers between the labels bintr and
 1086  * eintr.  This is implemented (partly) by including files that contain
 1087  * some of the handlers.  Before including the files, set up a normal asm
 1088  * environment so that the included files doen't need to know that they are
 1089  * included.
 1090  */
 1091 
 1092 #ifdef COMPAT_FREEBSD32
 1093         .data
 1094         .p2align 4
 1095         .text
 1096         SUPERALIGN_TEXT
 1097 
 1098 #include <amd64/ia32/ia32_exception.S>
 1099 #endif
 1100 
 1101         .data
 1102         .p2align 4
 1103         .text
 1104         SUPERALIGN_TEXT
 1105 MCOUNT_LABEL(bintr)
 1106 
 1107 #include <amd64/amd64/apic_vector.S>
 1108 
 1109 #ifdef DEV_ATPIC
 1110         .data
 1111         .p2align 4
 1112         .text
 1113         SUPERALIGN_TEXT
 1114 
 1115 #include <amd64/amd64/atpic_vector.S>
 1116 #endif
 1117 
 1118         .text
 1119 MCOUNT_LABEL(eintr)
 1120 
 1121 /*
 1122  * void doreti(struct trapframe)
 1123  *
 1124  * Handle return from interrupts, traps and syscalls.
 1125  */
 1126         .text
 1127         SUPERALIGN_TEXT
 1128         .type   doreti,@function
 1129         .globl  doreti
 1130 doreti:
 1131         FAKE_MCOUNT($bintr)             /* init "from" bintr -> doreti */
 1132         /*
 1133          * Check if ASTs can be handled now.
 1134          */
 1135         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */
 1136         jz      doreti_exit             /* can't handle ASTs now if not */
 1137 
 1138 doreti_ast:
 1139         /*
 1140          * Check for ASTs atomically with returning.  Disabling CPU
 1141          * interrupts provides sufficient locking even in the SMP case,
 1142          * since we will be informed of any new ASTs by an IPI.
 1143          */
 1144         cli
 1145         movq    PCPU(CURTHREAD),%rax
 1146         testl   $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
 1147         je      doreti_exit
 1148         sti
 1149         movq    %rsp,%rdi       /* pass a pointer to the trapframe */
 1150         call    ast
 1151         jmp     doreti_ast
 1152 
 1153         /*
 1154          * doreti_exit: pop registers, iret.
 1155          *
 1156          *      The segment register pop is a special case, since it may
 1157          *      fault if (for example) a sigreturn specifies bad segment
 1158          *      registers.  The fault is handled in trap.c.
 1159          */
 1160 doreti_exit:
 1161         MEXITCOUNT
 1162         movq    PCPU(CURPCB),%r8
 1163 
 1164         /*
 1165          * Do not reload segment registers for kernel.
 1166          * Since we do not reload segments registers with sane
 1167          * values on kernel entry, descriptors referenced by
 1168          * segments registers might be not valid.  This is fatal
 1169          * for user mode, but is not a problem for the kernel.
 1170          */
 1171         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1172         jz      ld_regs
 1173         testl   $PCB_FULL_IRET,PCB_FLAGS(%r8)
 1174         jz      ld_regs
 1175         andl    $~PCB_FULL_IRET,PCB_FLAGS(%r8)
 1176         testl   $TF_HASSEGS,TF_FLAGS(%rsp)
 1177         je      set_segs
 1178 
 1179 do_segs:
 1180         /* Restore %fs and fsbase */
 1181         movw    TF_FS(%rsp),%ax
 1182         .globl  ld_fs
 1183 ld_fs:
 1184         movw    %ax,%fs
 1185         cmpw    $KUF32SEL,%ax
 1186         jne     1f
 1187         movl    $MSR_FSBASE,%ecx
 1188         movl    PCB_FSBASE(%r8),%eax
 1189         movl    PCB_FSBASE+4(%r8),%edx
 1190         .globl  ld_fsbase
 1191 ld_fsbase:
 1192         wrmsr
 1193 1:
 1194         /* Restore %gs and gsbase */
 1195         movw    TF_GS(%rsp),%si
 1196         pushfq
 1197         cli
 1198         movl    $MSR_GSBASE,%ecx
 1199         /* Save current kernel %gs base into %r12d:%r13d */
 1200         rdmsr
 1201         movl    %eax,%r12d
 1202         movl    %edx,%r13d
 1203         .globl  ld_gs
 1204 ld_gs:
 1205         movw    %si,%gs
 1206         /* Save user %gs base into %r14d:%r15d */
 1207         rdmsr
 1208         movl    %eax,%r14d
 1209         movl    %edx,%r15d
 1210         /* Restore kernel %gs base */
 1211         movl    %r12d,%eax
 1212         movl    %r13d,%edx
 1213         wrmsr
 1214         popfq
 1215         /*
 1216          * Restore user %gs base, either from PCB if used for TLS, or
 1217          * from the previously saved msr read.
 1218          */
 1219         movl    $MSR_KGSBASE,%ecx
 1220         cmpw    $KUG32SEL,%si
 1221         jne     1f
 1222         movl    PCB_GSBASE(%r8),%eax
 1223         movl    PCB_GSBASE+4(%r8),%edx
 1224         jmp     ld_gsbase
 1225 1:
 1226         movl    %r14d,%eax
 1227         movl    %r15d,%edx
 1228         .globl  ld_gsbase
 1229 ld_gsbase:
 1230         wrmsr   /* May trap if non-canonical, but only for TLS. */
 1231         .globl  ld_es
 1232 ld_es:
 1233         movw    TF_ES(%rsp),%es
 1234         .globl  ld_ds
 1235 ld_ds:
 1236         movw    TF_DS(%rsp),%ds
 1237 ld_regs:
 1238         RESTORE_REGS
 1239         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 1240         jz      2f                      /* keep running with kernel GS.base */
 1241         cli
 1242         call    handle_ibrs_exit_rs
 1243         callq   *mds_handler
 1244         cmpq    $~0,PCPU(UCR3)
 1245         je      1f
 1246         pushq   %rdx
 1247         movq    PCPU(PTI_RSP0),%rdx
 1248         subq    $PTI_SIZE,%rdx
 1249         movq    %rax,PTI_RAX(%rdx)
 1250         popq    %rax
 1251         movq    %rax,PTI_RDX(%rdx)
 1252         movq    TF_RIP(%rsp),%rax
 1253         movq    %rax,PTI_RIP(%rdx)
 1254         movq    TF_CS(%rsp),%rax
 1255         movq    %rax,PTI_CS(%rdx)
 1256         movq    TF_RFLAGS(%rsp),%rax
 1257         movq    %rax,PTI_RFLAGS(%rdx)
 1258         movq    TF_RSP(%rsp),%rax
 1259         movq    %rax,PTI_RSP(%rdx)
 1260         movq    TF_SS(%rsp),%rax
 1261         movq    %rax,PTI_SS(%rdx)
 1262         movq    PCPU(UCR3),%rax
 1263         swapgs
 1264         movq    %rdx,%rsp
 1265         movq    %rax,%cr3
 1266         popq    %rdx
 1267         popq    %rax
 1268         addq    $8,%rsp
 1269         jmp     doreti_iret
 1270 1:      swapgs
 1271 2:      addq    $TF_RIP,%rsp
 1272         .globl  doreti_iret
 1273 doreti_iret:
 1274         iretq
 1275 
 1276 set_segs:
 1277         movw    $KUDSEL,%ax
 1278         movw    %ax,TF_DS(%rsp)
 1279         movw    %ax,TF_ES(%rsp)
 1280         movw    $KUF32SEL,TF_FS(%rsp)
 1281         movw    $KUG32SEL,TF_GS(%rsp)
 1282         jmp     do_segs
 1283 
 1284         /*
 1285          * doreti_iret_fault.  Alternative return code for
 1286          * the case where we get a fault in the doreti_exit code
 1287          * above.  trap() (amd64/amd64/trap.c) catches this specific
 1288          * case, sends the process a signal and continues in the
 1289          * corresponding place in the code below.
 1290          */
 1291         ALIGN_TEXT
 1292         .globl  doreti_iret_fault
 1293 doreti_iret_fault:
 1294         subq    $TF_RIP,%rsp            /* space including tf_err, tf_trapno */
 1295         movq    %rax,TF_RAX(%rsp)
 1296         movq    %rdx,TF_RDX(%rsp)
 1297         movq    %rcx,TF_RCX(%rsp)
 1298         call    handle_ibrs_entry
 1299         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1300         jz      1f
 1301         sti
 1302 1:
 1303         SAVE_SEGS
 1304         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
 1305         movq    %rdi,TF_RDI(%rsp)
 1306         movq    %rsi,TF_RSI(%rsp)
 1307         movq    %r8,TF_R8(%rsp)
 1308         movq    %r9,TF_R9(%rsp)
 1309         movq    %rbx,TF_RBX(%rsp)
 1310         movq    %rbp,TF_RBP(%rsp)
 1311         movq    %r10,TF_R10(%rsp)
 1312         movq    %r11,TF_R11(%rsp)
 1313         movq    %r12,TF_R12(%rsp)
 1314         movq    %r13,TF_R13(%rsp)
 1315         movq    %r14,TF_R14(%rsp)
 1316         movq    %r15,TF_R15(%rsp)
 1317         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1318         movq    $0,TF_ERR(%rsp) /* XXX should be the error code */
 1319         movq    $0,TF_ADDR(%rsp)
 1320         FAKE_MCOUNT(TF_RIP(%rsp))
 1321         jmp     calltrap
 1322 
 1323         ALIGN_TEXT
 1324         .globl  ds_load_fault
 1325 ds_load_fault:
 1326         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1327         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1328         jz      1f
 1329         sti
 1330 1:
 1331         movq    %rsp,%rdi
 1332         call    trap
 1333         movw    $KUDSEL,TF_DS(%rsp)
 1334         jmp     doreti
 1335 
 1336         ALIGN_TEXT
 1337         .globl  es_load_fault
 1338 es_load_fault:
 1339         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1340         testl   $PSL_I,TF_RFLAGS(%rsp)
 1341         jz      1f
 1342         sti
 1343 1:
 1344         movq    %rsp,%rdi
 1345         call    trap
 1346         movw    $KUDSEL,TF_ES(%rsp)
 1347         jmp     doreti
 1348 
 1349         ALIGN_TEXT
 1350         .globl  fs_load_fault
 1351 fs_load_fault:
 1352         testl   $PSL_I,TF_RFLAGS(%rsp)
 1353         jz      1f
 1354         sti
 1355 1:
 1356         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1357         movq    %rsp,%rdi
 1358         call    trap
 1359         movw    $KUF32SEL,TF_FS(%rsp)
 1360         jmp     doreti
 1361 
 1362         ALIGN_TEXT
 1363         .globl  gs_load_fault
 1364 gs_load_fault:
 1365         popfq
 1366         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1367         testl   $PSL_I,TF_RFLAGS(%rsp)
 1368         jz      1f
 1369         sti
 1370 1:
 1371         movq    %rsp,%rdi
 1372         call    trap
 1373         movw    $KUG32SEL,TF_GS(%rsp)
 1374         jmp     doreti
 1375 
 1376         ALIGN_TEXT
 1377         .globl  fsbase_load_fault
 1378 fsbase_load_fault:
 1379         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1380         testl   $PSL_I,TF_RFLAGS(%rsp)
 1381         jz      1f
 1382         sti
 1383 1:
 1384         movq    %rsp,%rdi
 1385         call    trap
 1386         movq    PCPU(CURTHREAD),%r8
 1387         movq    TD_PCB(%r8),%r8
 1388         movq    $0,PCB_FSBASE(%r8)
 1389         jmp     doreti
 1390 
 1391         ALIGN_TEXT
 1392         .globl  gsbase_load_fault
 1393 gsbase_load_fault:
 1394         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1395         testl   $PSL_I,TF_RFLAGS(%rsp)
 1396         jz      1f
 1397         sti
 1398 1:
 1399         movq    %rsp,%rdi
 1400         call    trap
 1401         movq    PCPU(CURTHREAD),%r8
 1402         movq    TD_PCB(%r8),%r8
 1403         movq    $0,PCB_GSBASE(%r8)
 1404         jmp     doreti
 1405 
 1406 #ifdef HWPMC_HOOKS
 1407         ENTRY(end_exceptions)
 1408 #endif

Cache object: a9ff8071a46b138239fe53af256f9834


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.