The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/exception.S

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1990 William F. Jolitz.
    3  * Copyright (c) 1990 The Regents of the University of California.
    4  * Copyright (c) 2007-2018 The FreeBSD Foundation
    5  * All rights reserved.
    6  *
    7  * Portions of this software were developed by A. Joseph Koshy under
    8  * sponsorship from the FreeBSD Foundation and Google, Inc.
    9  *
   10  * Portions of this software were developed by
   11  * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
   12  * the FreeBSD Foundation.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  * $FreeBSD$
   39  */
   40 
   41 #include "opt_atpic.h"
   42 #include "opt_compat.h"
   43 #include "opt_hwpmc_hooks.h"
   44 
   45 #include "assym.s"
   46 
   47 #include <machine/asmacros.h>
   48 #include <machine/psl.h>
   49 #include <machine/trap.h>
   50 #include <machine/specialreg.h>
   51 
   52 #ifdef KDTRACE_HOOKS
   53         .bss
   54         .globl  dtrace_invop_jump_addr
   55         .align  8
   56         .type   dtrace_invop_jump_addr,@object
   57         .size   dtrace_invop_jump_addr,8
   58 dtrace_invop_jump_addr:
   59         .zero   8
   60         .globl  dtrace_invop_calltrap_addr
   61         .align  8
   62         .type   dtrace_invop_calltrap_addr,@object
   63         .size   dtrace_invop_calltrap_addr,8
   64 dtrace_invop_calltrap_addr:
   65         .zero   8
   66 #endif
   67         .text
   68 #ifdef HWPMC_HOOKS
   69         ENTRY(start_exceptions)
   70 #endif
   71 
   72 /*****************************************************************************/
   73 /* Trap handling                                                             */
   74 /*****************************************************************************/
   75 /*
   76  * Trap and fault vector routines.
   77  *
   78  * All traps are 'interrupt gates', SDT_SYSIGT.  An interrupt gate pushes
   79  * state on the stack but also disables interrupts.  This is important for
   80  * us for the use of the swapgs instruction.  We cannot be interrupted
   81  * until the GS.base value is correct.  For most traps, we automatically
   82  * then enable interrupts if the interrupted context had them enabled.
   83  * This is equivalent to the i386 port's use of SDT_SYS386TGT.
   84  *
   85  * The cpu will push a certain amount of state onto the kernel stack for
   86  * the current process.  See amd64/include/frame.h.
   87  * This includes the current RFLAGS (status register, which includes
   88  * the interrupt disable state prior to the trap), the code segment register,
   89  * and the return instruction pointer are pushed by the cpu.  The cpu
   90  * will also push an 'error' code for certain traps.  We push a dummy
   91  * error code for those traps where the cpu doesn't in order to maintain
   92  * a consistent frame.  We also push a contrived 'trap number'.
   93  *
   94  * The CPU does not push the general registers, so we must do that, and we
   95  * must restore them prior to calling 'iret'.  The CPU adjusts %cs and %ss
   96  * but does not mess with %ds, %es, %gs or %fs.  We swap the %gs base for
   97  * for the kernel mode operation shortly, without changes to the selector
   98  * loaded.  Since superuser long mode works with any selectors loaded into
   99  * segment registers other then %cs, which makes them mostly unused in long
  100  * mode, and kernel does not reference %fs, leave them alone.  The segment
  101  * registers are reloaded on return to the usermode.
  102  */
  103 
  104 MCOUNT_LABEL(user)
  105 MCOUNT_LABEL(btrap)
  106 
  107 /* Traps that we leave interrupts disabled for. */
  108         .macro  TRAP_NOEN       l, trapno
  109         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u
  110 \l\()_pti_k:
  111         subq    $TF_RIP,%rsp
  112         movl    $\trapno,TF_TRAPNO(%rsp)
  113         movq    $0,TF_ADDR(%rsp)
  114         movq    $0,TF_ERR(%rsp)
  115         jmp     alltraps_noen_k
  116 \l\()_pti_u:
  117         subq    $TF_RIP,%rsp
  118         movl    $\trapno,TF_TRAPNO(%rsp)
  119         movq    $0,TF_ADDR(%rsp)
  120         movq    $0,TF_ERR(%rsp)
  121         jmp     alltraps_noen_u
  122         
  123         .globl  X\l
  124         .type   X\l,@function
  125 X\l:
  126         subq    $TF_RIP,%rsp
  127         movl    $\trapno,TF_TRAPNO(%rsp)
  128         movq    $0,TF_ADDR(%rsp)
  129         movq    $0,TF_ERR(%rsp)
  130         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  131         jz      alltraps_noen_k
  132         swapgs
  133         lfence
  134         jmp     alltraps_noen_u
  135         .endm
  136 
  137         TRAP_NOEN       bpt, T_BPTFLT
  138 #ifdef KDTRACE_HOOKS
  139         TRAP_NOEN       dtrace_ret, T_DTRACE_RET
  140 #endif
  141 
  142 /* Regular traps; The cpu does not supply tf_err for these. */
  143         .macro  TRAP    l, trapno
  144         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u
  145 \l\()_pti_k:
  146         subq    $TF_RIP,%rsp
  147         movl    $\trapno,TF_TRAPNO(%rsp)
  148         movq    $0,TF_ADDR(%rsp)
  149         movq    $0,TF_ERR(%rsp)
  150         jmp     alltraps_k
  151 \l\()_pti_u:
  152         subq    $TF_RIP,%rsp
  153         movl    $\trapno,TF_TRAPNO(%rsp)
  154         movq    $0,TF_ADDR(%rsp)
  155         movq    $0,TF_ERR(%rsp)
  156         jmp     alltraps_u
  157 
  158         .globl  X\l
  159         .type   X\l,@function
  160 X\l:
  161         subq    $TF_RIP,%rsp
  162         movl    $\trapno,TF_TRAPNO(%rsp)
  163         movq    $0,TF_ADDR(%rsp)
  164         movq    $0,TF_ERR(%rsp)
  165         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  166         jz      alltraps_k
  167         swapgs
  168         lfence
  169         jmp     alltraps_u
  170         .endm
  171 
  172         TRAP    div, T_DIVIDE
  173         TRAP    ofl, T_OFLOW
  174         TRAP    bnd, T_BOUND
  175         TRAP    ill, T_PRIVINFLT
  176         TRAP    dna, T_DNA
  177         TRAP    fpusegm, T_FPOPFLT
  178         TRAP    rsvd, T_RESERVED
  179         TRAP    fpu, T_ARITHTRAP
  180         TRAP    xmm, T_XMMFLT
  181 
  182 /* This group of traps have tf_err already pushed by the cpu. */
  183         .macro  TRAP_ERR        l, trapno
  184         PTI_ENTRY       \l,\l\()_pti_k,\l\()_pti_u,has_err=1
  185 \l\()_pti_k:
  186         subq    $TF_ERR,%rsp
  187         movl    $\trapno,TF_TRAPNO(%rsp)
  188         movq    $0,TF_ADDR(%rsp)
  189         jmp     alltraps_k
  190 \l\()_pti_u:
  191         subq    $TF_ERR,%rsp
  192         movl    $\trapno,TF_TRAPNO(%rsp)
  193         movq    $0,TF_ADDR(%rsp)
  194         jmp     alltraps_u
  195         .globl  X\l
  196         .type   X\l,@function
  197 X\l:
  198         subq    $TF_ERR,%rsp
  199         movl    $\trapno,TF_TRAPNO(%rsp)
  200         movq    $0,TF_ADDR(%rsp)
  201         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  202         jz      alltraps_k
  203         swapgs
  204         lfence
  205         jmp     alltraps_u
  206         .endm
  207 
  208         TRAP_ERR        tss, T_TSSFLT
  209         TRAP_ERR        align, T_ALIGNFLT
  210 
  211         /*
  212          * alltraps_u/k entry points.
  213          * SWAPGS must be already performed by prologue,
  214          * if this is the first time in the kernel from userland.
  215          * Reenable interrupts if they were enabled before the trap.
  216          * This approximates SDT_SYS386TGT on the i386 port.
  217          */
  218         SUPERALIGN_TEXT
  219         .globl  alltraps_u
  220         .type   alltraps_u,@function
  221 alltraps_u:
  222         movq    %rdi,TF_RDI(%rsp)
  223         movq    %rdx,TF_RDX(%rsp)
  224         movq    %rax,TF_RAX(%rsp)
  225         movq    %rcx,TF_RCX(%rsp)
  226         movq    PCPU(CURPCB),%rdi
  227         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  228         call    handle_ibrs_entry
  229         jmp     alltraps_save_segs
  230         SUPERALIGN_TEXT
  231         .globl  alltraps_k
  232         .type   alltraps_k,@function
  233 alltraps_k:
  234         lfence
  235         movq    %rdi,TF_RDI(%rsp)
  236         movq    %rdx,TF_RDX(%rsp)
  237         movq    %rax,TF_RAX(%rsp)
  238         movq    %rcx,TF_RCX(%rsp)
  239 alltraps_save_segs:
  240         SAVE_SEGS
  241         testl   $PSL_I,TF_RFLAGS(%rsp)
  242         jz      alltraps_pushregs_no_rax
  243         sti
  244 alltraps_pushregs_no_rax:
  245         movq    %rsi,TF_RSI(%rsp)
  246         movq    %r8,TF_R8(%rsp)
  247         movq    %r9,TF_R9(%rsp)
  248         movq    %rbx,TF_RBX(%rsp)
  249         movq    %rbp,TF_RBP(%rsp)
  250         movq    %r10,TF_R10(%rsp)
  251         movq    %r11,TF_R11(%rsp)
  252         movq    %r12,TF_R12(%rsp)
  253         movq    %r13,TF_R13(%rsp)
  254         movq    %r14,TF_R14(%rsp)
  255         movq    %r15,TF_R15(%rsp)
  256         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  257         cld
  258         FAKE_MCOUNT(TF_RIP(%rsp))
  259 #ifdef KDTRACE_HOOKS
  260         /*
  261          * DTrace Function Boundary Trace (fbt) probes are triggered
  262          * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint
  263          * interrupt. For all other trap types, just handle them in
  264          * the usual way.
  265          */
  266         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  267         jnz     calltrap                /* ignore userland traps */
  268         cmpl    $T_BPTFLT,TF_TRAPNO(%rsp)
  269         jne     calltrap
  270 
  271         /* Check if there is no DTrace hook registered. */
  272         cmpq    $0,dtrace_invop_jump_addr
  273         je      calltrap
  274 
  275         /*
  276          * Set our jump address for the jump back in the event that
  277          * the breakpoint wasn't caused by DTrace at all.
  278          */
  279         movq    $calltrap,dtrace_invop_calltrap_addr(%rip)
  280 
  281         /* Jump to the code hooked in by DTrace. */
  282         jmpq    *dtrace_invop_jump_addr
  283 #endif
  284         .globl  calltrap
  285         .type   calltrap,@function
  286 calltrap:
  287         movq    %rsp,%rdi
  288         call    trap_check
  289         MEXITCOUNT
  290         jmp     doreti                  /* Handle any pending ASTs */
  291 
  292         /*
  293          * alltraps_noen_u/k entry points.
  294          * Again, SWAPGS must be already performed by prologue, if needed.
  295          * Unlike alltraps above, we want to leave the interrupts disabled.
  296          * This corresponds to SDT_SYS386IGT on the i386 port.
  297          */
  298         SUPERALIGN_TEXT
  299         .globl  alltraps_noen_u
  300         .type   alltraps_noen_u,@function
  301 alltraps_noen_u:
  302         movq    %rdi,TF_RDI(%rsp)
  303         movq    PCPU(CURPCB),%rdi
  304         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  305         jmp     alltraps_noen_save_segs
  306         SUPERALIGN_TEXT
  307         .globl  alltraps_noen_k
  308         .type   alltraps_noen_k,@function
  309 alltraps_noen_k:
  310         lfence
  311         movq    %rdi,TF_RDI(%rsp)
  312 alltraps_noen_save_segs:
  313         SAVE_SEGS
  314         movq    %rdx,TF_RDX(%rsp)
  315         movq    %rax,TF_RAX(%rsp)
  316         movq    %rcx,TF_RCX(%rsp)
  317         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  318         jz      alltraps_pushregs_no_rax
  319         call    handle_ibrs_entry
  320         jmp     alltraps_pushregs_no_rax
  321 
  322 IDTVEC(dblfault)
  323         subq    $TF_ERR,%rsp
  324         movl    $T_DOUBLEFLT,TF_TRAPNO(%rsp)
  325         movq    $0,TF_ADDR(%rsp)
  326         movq    $0,TF_ERR(%rsp)
  327         movq    %rdi,TF_RDI(%rsp)
  328         movq    %rsi,TF_RSI(%rsp)
  329         movq    %rdx,TF_RDX(%rsp)
  330         movq    %rcx,TF_RCX(%rsp)
  331         movq    %r8,TF_R8(%rsp)
  332         movq    %r9,TF_R9(%rsp)
  333         movq    %rax,TF_RAX(%rsp)
  334         movq    %rbx,TF_RBX(%rsp)
  335         movq    %rbp,TF_RBP(%rsp)
  336         movq    %r10,TF_R10(%rsp)
  337         movq    %r11,TF_R11(%rsp)
  338         movq    %r12,TF_R12(%rsp)
  339         movq    %r13,TF_R13(%rsp)
  340         movq    %r14,TF_R14(%rsp)
  341         movq    %r15,TF_R15(%rsp)
  342         SAVE_SEGS
  343         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  344         cld
  345         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  346         jz      1f                      /* already running with kernel GS.base */
  347         swapgs
  348 1:      lfence
  349         movq    PCPU(KCR3),%rax
  350         cmpq    $~0,%rax
  351         je      2f
  352         movq    %rax,%cr3
  353 2:      movq    %rsp,%rdi
  354         call    dblfault_handler
  355 3:      hlt
  356         jmp     3b
  357 
  358         ALIGN_TEXT
  359 IDTVEC(page_pti)
  360         testb   $SEL_RPL_MASK,PTI_CS-PTI_ERR(%rsp)
  361         jz      page_k
  362         swapgs
  363         lfence
  364         pushq   %rax
  365         movq    %cr3,%rax
  366         movq    %rax,PCPU(SAVED_UCR3)
  367         cmpq    $~0,PCPU(UCR3)
  368         jne     1f
  369         popq    %rax
  370         jmp     page_u
  371 1:      pushq   %rdx
  372         PTI_UUENTRY has_err=1
  373         jmp     page_u
  374         ALIGN_TEXT
  375 IDTVEC(page)
  376         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) /* Did we come from kernel? */
  377         jnz     page_u_swapgs           /* already running with kernel GS.base */
  378 page_k:
  379         lfence
  380         subq    $TF_ERR,%rsp
  381         movq    %rdi,TF_RDI(%rsp)       /* free up GP registers */
  382         movq    %rax,TF_RAX(%rsp)
  383         movq    %rdx,TF_RDX(%rsp)
  384         movq    %rcx,TF_RCX(%rsp)
  385         jmp     page_cr2
  386         ALIGN_TEXT
  387 page_u_swapgs:
  388         swapgs
  389         lfence
  390 page_u:
  391         subq    $TF_ERR,%rsp
  392         movq    %rdi,TF_RDI(%rsp)
  393         movq    %rax,TF_RAX(%rsp)
  394         movq    %rdx,TF_RDX(%rsp)
  395         movq    %rcx,TF_RCX(%rsp)
  396         movq    PCPU(CURPCB),%rdi
  397         andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
  398         movq    PCPU(SAVED_UCR3),%rax
  399         movq    %rax,PCB_SAVED_UCR3(%rdi)
  400         call    handle_ibrs_entry
  401 page_cr2:
  402         movq    %cr2,%rdi               /* preserve %cr2 before ..  */
  403         movq    %rdi,TF_ADDR(%rsp)      /* enabling interrupts. */
  404         SAVE_SEGS
  405         movl    $T_PAGEFLT,TF_TRAPNO(%rsp)
  406         testl   $PSL_I,TF_RFLAGS(%rsp)
  407         jz      alltraps_pushregs_no_rax
  408         sti
  409         jmp     alltraps_pushregs_no_rax
  410 
  411         /*
  412          * We have to special-case this one.  If we get a trap in doreti() at
  413          * the iretq stage, we'll reenter with the wrong gs state.  We'll have
  414          * to do a special the swapgs in this case even coming from the kernel.
  415          * XXX linux has a trap handler for their equivalent of load_gs().
  416          *
  417          * On the stack, we have the hardware interrupt frame to return
  418          * to usermode (faulted) and another frame with error code, for
  419          * fault.  For PTI, copy both frames to the main thread stack.
  420          * Handle the potential 16-byte alignment adjustment incurred
  421          * during the second fault by copying both frames independently
  422          * while unwinding the stack in between.
  423          */
  424         .macro PROTF_ENTRY name,trapno
  425 \name\()_pti_doreti:
  426         swapgs
  427         lfence
  428         cmpq    $~0,PCPU(UCR3)
  429         je      1f
  430         pushq   %rax
  431         pushq   %rdx
  432         movq    PCPU(KCR3),%rax
  433         movq    %rax,%cr3
  434         movq    PCPU(RSP0),%rax
  435         subq    $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
  436         MOVE_STACKS     (PTI_SIZE / 8)
  437         addq    $PTI_SIZE,%rax
  438         movq    PTI_RSP(%rsp),%rsp
  439         MOVE_STACKS     (PTI_SIZE / 8 - 3)
  440         subq    $PTI_SIZE,%rax
  441         movq    %rax,%rsp
  442         popq    %rdx
  443         popq    %rax
  444 1:      swapgs
  445         jmp     X\name
  446 IDTVEC(\name\()_pti)
  447         cmpq    $doreti_iret,PTI_RIP-2*8(%rsp)
  448         je      \name\()_pti_doreti
  449         testb   $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
  450         jz      X\name          /* lfence is not needed until %gs: use */
  451         PTI_UENTRY has_err=1
  452         swapgs  /* fence provided by PTI_UENTRY */
  453 IDTVEC(\name)
  454         subq    $TF_ERR,%rsp
  455         movl    $\trapno,TF_TRAPNO(%rsp)
  456         jmp     prot_addrf
  457         .endm
  458 
  459         PROTF_ENTRY     missing, T_SEGNPFLT
  460         PROTF_ENTRY     stk, T_STKFLT
  461         PROTF_ENTRY     prot, T_PROTFLT
  462 
  463 prot_addrf:
  464         movq    $0,TF_ADDR(%rsp)
  465         movq    %rdi,TF_RDI(%rsp)       /* free up a GP register */
  466         movq    %rax,TF_RAX(%rsp)
  467         movq    %rdx,TF_RDX(%rsp)
  468         movq    %rcx,TF_RCX(%rsp)
  469         movw    %fs,TF_FS(%rsp)
  470         movw    %gs,TF_GS(%rsp)
  471         leaq    doreti_iret(%rip),%rdi
  472         cmpq    %rdi,TF_RIP(%rsp)
  473         je      5f                      /* kernel but with user gsbase!! */
  474         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
  475         jz      6f                      /* already running with kernel GS.base */
  476         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  477         jz      2f
  478         cmpw    $KUF32SEL,TF_FS(%rsp)
  479         jne     1f
  480         rdfsbase %rax
  481 1:      cmpw    $KUG32SEL,TF_GS(%rsp)
  482         jne     2f
  483         rdgsbase %rdx
  484 2:      swapgs
  485         lfence
  486         movq    PCPU(CURPCB),%rdi
  487         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  488         jz      4f
  489         cmpw    $KUF32SEL,TF_FS(%rsp)
  490         jne     3f
  491         movq    %rax,PCB_FSBASE(%rdi)
  492 3:      cmpw    $KUG32SEL,TF_GS(%rsp)
  493         jne     4f
  494         movq    %rdx,PCB_GSBASE(%rdi)
  495         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)  /* full iret from user #gp */
  496 4:      call    handle_ibrs_entry
  497         movw    %es,TF_ES(%rsp)
  498         movw    %ds,TF_DS(%rsp)
  499         testl   $PSL_I,TF_RFLAGS(%rsp)
  500         jz      alltraps_pushregs_no_rax
  501         sti
  502         jmp     alltraps_pushregs_no_rax
  503 
  504 5:      swapgs
  505 6:      lfence
  506         movq    PCPU(CURPCB),%rdi
  507         jmp     4b
  508 
  509 /*
  510  * Fast syscall entry point.  We enter here with just our new %cs/%ss set,
  511  * and the new privilige level.  We are still running on the old user stack
  512  * pointer.  We have to juggle a few things around to find our stack etc.
  513  * swapgs gives us access to our PCPU space only.
  514  *
  515  * We do not support invoking this from a custom segment registers,
  516  * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
  517  */
  518         SUPERALIGN_TEXT
  519 IDTVEC(fast_syscall_pti)
  520         swapgs
  521         lfence
  522         movq    %rax,PCPU(SCRATCH_RAX)
  523         cmpq    $~0,PCPU(UCR3)
  524         je      fast_syscall_common
  525         movq    PCPU(KCR3),%rax
  526         movq    %rax,%cr3
  527         jmp     fast_syscall_common
  528         SUPERALIGN_TEXT
  529 IDTVEC(fast_syscall)
  530         swapgs
  531         lfence
  532         movq    %rax,PCPU(SCRATCH_RAX)
  533 fast_syscall_common:
  534         movq    %rsp,PCPU(SCRATCH_RSP)
  535         movq    PCPU(RSP0),%rsp
  536         /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
  537         subq    $TF_SIZE,%rsp
  538         /* defer TF_RSP till we have a spare register */
  539         movq    %r11,TF_RFLAGS(%rsp)
  540         movq    %rcx,TF_RIP(%rsp)       /* %rcx original value is in %r10 */
  541         movq    PCPU(SCRATCH_RSP),%r11  /* %r11 already saved */
  542         movq    %r11,TF_RSP(%rsp)       /* user stack pointer */
  543         movq    PCPU(SCRATCH_RAX),%rax
  544         movq    %rax,TF_RAX(%rsp)       /* syscall number */
  545         movq    %rdx,TF_RDX(%rsp)       /* arg 3 */
  546         SAVE_SEGS
  547         call    handle_ibrs_entry
  548         movq    PCPU(CURPCB),%r11
  549         andl    $~PCB_FULL_IRET,PCB_FLAGS(%r11)
  550         sti
  551         movq    $KUDSEL,TF_SS(%rsp)
  552         movq    $KUCSEL,TF_CS(%rsp)
  553         movq    $2,TF_ERR(%rsp)
  554         movq    %rdi,TF_RDI(%rsp)       /* arg 1 */
  555         movq    %rsi,TF_RSI(%rsp)       /* arg 2 */
  556         movq    %r10,TF_RCX(%rsp)       /* arg 4 */
  557         movq    %r8,TF_R8(%rsp)         /* arg 5 */
  558         movq    %r9,TF_R9(%rsp)         /* arg 6 */
  559         movq    %rbx,TF_RBX(%rsp)       /* C preserved */
  560         movq    %rbp,TF_RBP(%rsp)       /* C preserved */
  561         movq    %r12,TF_R12(%rsp)       /* C preserved */
  562         movq    %r13,TF_R13(%rsp)       /* C preserved */
  563         movq    %r14,TF_R14(%rsp)       /* C preserved */
  564         movq    %r15,TF_R15(%rsp)       /* C preserved */
  565         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  566         FAKE_MCOUNT(TF_RIP(%rsp))
  567         movq    PCPU(CURTHREAD),%rdi
  568         movq    %rsp,TD_FRAME(%rdi)
  569         movl    TF_RFLAGS(%rsp),%esi
  570         andl    $PSL_T,%esi
  571         call    amd64_syscall
  572 1:      movq    PCPU(CURPCB),%rax
  573         /* Disable interrupts before testing PCB_FULL_IRET. */
  574         cli
  575         testl   $PCB_FULL_IRET,PCB_FLAGS(%rax)
  576         jnz     4f
  577         /* Check for and handle AST's on return to userland. */
  578         movq    PCPU(CURTHREAD),%rax
  579         testl   $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
  580         jne     3f
  581         call    handle_ibrs_exit
  582         callq   *mds_handler
  583         /* Restore preserved registers. */
  584         MEXITCOUNT
  585         movq    TF_RDI(%rsp),%rdi       /* bonus; preserve arg 1 */
  586         movq    TF_RSI(%rsp),%rsi       /* bonus: preserve arg 2 */
  587         movq    TF_RDX(%rsp),%rdx       /* return value 2 */
  588         movq    TF_RAX(%rsp),%rax       /* return value 1 */
  589         movq    TF_RFLAGS(%rsp),%r11    /* original %rflags */
  590         movq    TF_RIP(%rsp),%rcx       /* original %rip */
  591         movq    TF_RSP(%rsp),%rsp       /* user stack pointer */
  592         xorl    %r8d,%r8d               /* zero the rest of GPRs */
  593         xorl    %r10d,%r10d
  594         cmpq    $~0,PCPU(UCR3)
  595         je      2f
  596         movq    PCPU(UCR3),%r9
  597         movq    %r9,%cr3
  598 2:      xorl    %r9d,%r9d
  599         swapgs
  600         sysretq
  601 
  602 3:      /* AST scheduled. */
  603         sti
  604         movq    %rsp,%rdi
  605         call    ast
  606         jmp     1b
  607 
  608 4:      /* Requested full context restore, use doreti for that. */
  609         MEXITCOUNT
  610         jmp     doreti
  611 
  612 /*
  613  * Here for CYA insurance, in case a "syscall" instruction gets
  614  * issued from 32 bit compatibility mode. MSR_CSTAR has to point
  615  * to *something* if EFER_SCE is enabled.
  616  */
  617 IDTVEC(fast_syscall32)
  618         sysret
  619 
  620 /*
  621  * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
  622  * generation of exception until the next instruction is executed,
  623  * which might be a kernel entry.  So we must execute the handler
  624  * on IST stack and be ready for non-kernel GSBASE.
  625  */
  626 IDTVEC(dbg)
  627         subq    $TF_RIP,%rsp
  628         movl    $(T_TRCTRAP),TF_TRAPNO(%rsp)
  629         movq    $0,TF_ADDR(%rsp)
  630         movq    $0,TF_ERR(%rsp)
  631         movq    %rdi,TF_RDI(%rsp)
  632         movq    %rsi,TF_RSI(%rsp)
  633         movq    %rdx,TF_RDX(%rsp)
  634         movq    %rcx,TF_RCX(%rsp)
  635         movq    %r8,TF_R8(%rsp)
  636         movq    %r9,TF_R9(%rsp)
  637         movq    %rax,TF_RAX(%rsp)
  638         movq    %rbx,TF_RBX(%rsp)
  639         movq    %rbp,TF_RBP(%rsp)
  640         movq    %r10,TF_R10(%rsp)
  641         movq    %r11,TF_R11(%rsp)
  642         movq    %r12,TF_R12(%rsp)
  643         movq    %r13,TF_R13(%rsp)
  644         movq    %r14,TF_R14(%rsp)
  645         movq    %r15,TF_R15(%rsp)
  646         SAVE_SEGS
  647         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  648         cld
  649         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  650         jnz     dbg_fromuserspace
  651         lfence
  652         /*
  653          * We've interrupted the kernel.  Preserve GS.base in %r12,
  654          * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
  655          */
  656         movl    $MSR_GSBASE,%ecx
  657         rdmsr
  658         movq    %rax,%r12
  659         shlq    $32,%rdx
  660         orq     %rdx,%r12
  661         /* Retrieve and load the canonical value for GS.base. */
  662         movq    TF_SIZE(%rsp),%rdx
  663         movl    %edx,%eax
  664         shrq    $32,%rdx
  665         wrmsr
  666         movq    %cr3,%r13
  667         movq    PCPU(KCR3),%rax
  668         cmpq    $~0,%rax
  669         je      1f
  670         movq    %rax,%cr3
  671 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  672         je      2f
  673         movl    $MSR_IA32_SPEC_CTRL,%ecx
  674         rdmsr
  675         movl    %eax,%r14d
  676         call    handle_ibrs_entry
  677 2:      FAKE_MCOUNT(TF_RIP(%rsp))
  678         movq    %rsp,%rdi
  679         call    trap
  680         MEXITCOUNT
  681         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  682         je      3f
  683         movl    %r14d,%eax
  684         xorl    %edx,%edx
  685         movl    $MSR_IA32_SPEC_CTRL,%ecx
  686         wrmsr
  687         /*
  688          * Put back the preserved MSR_GSBASE value.
  689          */
  690 3:      movl    $MSR_GSBASE,%ecx
  691         movq    %r12,%rdx
  692         movl    %edx,%eax
  693         shrq    $32,%rdx
  694         wrmsr
  695         movq    %r13,%cr3
  696         RESTORE_REGS
  697         addq    $TF_RIP,%rsp
  698         jmp     doreti_iret
  699 dbg_fromuserspace:
  700         /*
  701          * Switch to kernel GSBASE and kernel page table, and copy frame
  702          * from the IST stack to the normal kernel stack, since trap()
  703          * re-enables interrupts, and since we might trap on DB# while
  704          * in trap().
  705          */
  706         swapgs
  707         lfence
  708         movq    PCPU(KCR3),%rax
  709         cmpq    $~0,%rax
  710         je      1f
  711         movq    %rax,%cr3
  712 1:      movq    PCPU(RSP0),%rax
  713         movl    $TF_SIZE,%ecx
  714         subq    %rcx,%rax
  715         movq    %rax,%rdi
  716         movq    %rsp,%rsi
  717         rep;movsb
  718         movq    %rax,%rsp
  719         call    handle_ibrs_entry
  720         movq    PCPU(CURPCB),%rdi
  721         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)
  722         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  723         jz      3f
  724         cmpw    $KUF32SEL,TF_FS(%rsp)
  725         jne     2f
  726         rdfsbase %rax
  727         movq    %rax,PCB_FSBASE(%rdi)
  728 2:      cmpw    $KUG32SEL,TF_GS(%rsp)
  729         jne     3f
  730         movl    $MSR_KGSBASE,%ecx
  731         rdmsr
  732         shlq    $32,%rdx
  733         orq     %rdx,%rax
  734         movq    %rax,PCB_GSBASE(%rdi)
  735 3:      jmp     calltrap
  736 
  737 /*
  738  * NMI handling is special.
  739  *
  740  * First, NMIs do not respect the state of the processor's RFLAGS.IF
  741  * bit.  The NMI handler may be entered at any time, including when
  742  * the processor is in a critical section with RFLAGS.IF == 0.
  743  * The processor's GS.base value could be invalid on entry to the
  744  * handler.
  745  *
  746  * Second, the processor treats NMIs specially, blocking further NMIs
  747  * until an 'iretq' instruction is executed.  We thus need to execute
  748  * the NMI handler with interrupts disabled, to prevent a nested interrupt
  749  * from executing an 'iretq' instruction and inadvertently taking the
  750  * processor out of NMI mode.
  751  *
  752  * Third, the NMI handler runs on its own stack (tss_ist2). The canonical
  753  * GS.base value for the processor is stored just above the bottom of its
  754  * NMI stack.  For NMIs taken from kernel mode, the current value in
  755  * the processor's GS.base is saved at entry to C-preserved register %r12,
  756  * the canonical value for GS.base is then loaded into the processor, and
  757  * the saved value is restored at exit time.  For NMIs taken from user mode,
  758  * the cheaper 'SWAPGS' instructions are used for swapping GS.base.
  759  */
  760 
  761 IDTVEC(nmi)
  762         subq    $TF_RIP,%rsp
  763         movl    $(T_NMI),TF_TRAPNO(%rsp)
  764         movq    $0,TF_ADDR(%rsp)
  765         movq    $0,TF_ERR(%rsp)
  766         movq    %rdi,TF_RDI(%rsp)
  767         movq    %rsi,TF_RSI(%rsp)
  768         movq    %rdx,TF_RDX(%rsp)
  769         movq    %rcx,TF_RCX(%rsp)
  770         movq    %r8,TF_R8(%rsp)
  771         movq    %r9,TF_R9(%rsp)
  772         movq    %rax,TF_RAX(%rsp)
  773         movq    %rbx,TF_RBX(%rsp)
  774         movq    %rbp,TF_RBP(%rsp)
  775         movq    %r10,TF_R10(%rsp)
  776         movq    %r11,TF_R11(%rsp)
  777         movq    %r12,TF_R12(%rsp)
  778         movq    %r13,TF_R13(%rsp)
  779         movq    %r14,TF_R14(%rsp)
  780         movq    %r15,TF_R15(%rsp)
  781         SAVE_SEGS
  782         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  783         cld
  784         xorl    %ebx,%ebx
  785         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  786         jnz     nmi_fromuserspace
  787         /*
  788          * We've interrupted the kernel.  Preserve GS.base in %r12,
  789          * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
  790          */
  791         lfence
  792         movl    $MSR_GSBASE,%ecx
  793         rdmsr
  794         movq    %rax,%r12
  795         shlq    $32,%rdx
  796         orq     %rdx,%r12
  797         /* Retrieve and load the canonical value for GS.base. */
  798         movq    TF_SIZE(%rsp),%rdx
  799         movl    %edx,%eax
  800         shrq    $32,%rdx
  801         wrmsr
  802         movq    %cr3,%r13
  803         movq    PCPU(KCR3),%rax
  804         cmpq    $~0,%rax
  805         je      1f
  806         movq    %rax,%cr3
  807 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  808         je      nmi_calltrap
  809         movl    $MSR_IA32_SPEC_CTRL,%ecx
  810         rdmsr
  811         movl    %eax,%r14d
  812         call    handle_ibrs_entry
  813         jmp     nmi_calltrap
  814 nmi_fromuserspace:
  815         incl    %ebx
  816         swapgs
  817         lfence
  818         movq    %cr3,%r13
  819         movq    PCPU(KCR3),%rax
  820         cmpq    $~0,%rax
  821         je      1f
  822         movq    %rax,%cr3
  823 1:      call    handle_ibrs_entry
  824         movq    PCPU(CURPCB),%rdi
  825         testq   %rdi,%rdi
  826         jz      3f
  827         orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)
  828         testb   $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
  829         jz      3f
  830         cmpw    $KUF32SEL,TF_FS(%rsp)
  831         jne     2f
  832         rdfsbase %rax
  833         movq    %rax,PCB_FSBASE(%rdi)
  834 2:      cmpw    $KUG32SEL,TF_GS(%rsp)
  835         jne     3f
  836         movl    $MSR_KGSBASE,%ecx
  837         rdmsr
  838         shlq    $32,%rdx
  839         orq     %rdx,%rax
  840         movq    %rax,PCB_GSBASE(%rdi)
  841 3:
  842 /* Note: this label is also used by ddb and gdb: */
  843 nmi_calltrap:
  844         FAKE_MCOUNT(TF_RIP(%rsp))
  845         movq    %rsp,%rdi
  846         call    trap
  847         MEXITCOUNT
  848 #ifdef HWPMC_HOOKS
  849         /*
  850          * Capture a userspace callchain if needed.
  851          *
  852          * - Check if the current trap was from user mode.
  853          * - Check if the current thread is valid.
  854          * - Check if the thread requires a user call chain to be
  855          *   captured.
  856          *
  857          * We are still in NMI mode at this point.
  858          */
  859         testl   %ebx,%ebx
  860         jz      nocallchain     /* not from userspace */
  861         movq    PCPU(CURTHREAD),%rax
  862         orq     %rax,%rax       /* curthread present? */
  863         jz      nocallchain
  864         /*
  865          * Move execution to the regular kernel stack, because we
  866          * committed to return through doreti.
  867          */
  868         movq    %rsp,%rsi       /* source stack pointer */
  869         movq    $TF_SIZE,%rcx
  870         movq    PCPU(RSP0),%rdx
  871         subq    %rcx,%rdx
  872         movq    %rdx,%rdi       /* destination stack pointer */
  873         shrq    $3,%rcx         /* trap frame size in long words */
  874         cld
  875         rep
  876         movsq                   /* copy trapframe */
  877         movq    %rdx,%rsp       /* we are on the regular kstack */
  878 
  879         testl   $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
  880         jz      nocallchain
  881         /*
  882          * A user callchain is to be captured, so:
  883          * - Take the processor out of "NMI" mode by faking an "iret",
  884          *   to allow for nested NMI interrupts.
  885          * - Enable interrupts, so that copyin() can work.
  886          */
  887         movl    %ss,%eax
  888         pushq   %rax            /* tf_ss */
  889         pushq   %rdx            /* tf_rsp (on kernel stack) */
  890         pushfq                  /* tf_rflags */
  891         movl    %cs,%eax
  892         pushq   %rax            /* tf_cs */
  893         pushq   $outofnmi       /* tf_rip */
  894         iretq
  895 outofnmi:
  896         /*
  897          * At this point the processor has exited NMI mode and is running
  898          * with interrupts turned off on the normal kernel stack.
  899          *
  900          * If a pending NMI gets recognized at or after this point, it
  901          * will cause a kernel callchain to be traced.
  902          *
  903          * We turn interrupts back on, and call the user callchain capture hook.
  904          */
  905         movq    pmc_hook,%rax
  906         orq     %rax,%rax
  907         jz      nocallchain
  908         movq    PCPU(CURTHREAD),%rdi            /* thread */
  909         movq    $PMC_FN_USER_CALLCHAIN,%rsi     /* command */
  910         movq    %rsp,%rdx                       /* frame */
  911         sti
  912         call    *%rax
  913         cli
  914 nocallchain:
  915 #endif
  916         testl   %ebx,%ebx       /* %ebx == 0 => return to userland */
  917         jnz     doreti_exit
  918         /*
  919          * Restore speculation control MSR, if preserved.
  920          */
  921         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
  922         je      1f
  923         movl    %r14d,%eax
  924         xorl    %edx,%edx
  925         movl    $MSR_IA32_SPEC_CTRL,%ecx
  926         wrmsr
  927         /*
  928          * Put back the preserved MSR_GSBASE value.
  929          */
  930 1:      movl    $MSR_GSBASE,%ecx
  931         movq    %r12,%rdx
  932         movl    %edx,%eax
  933         shrq    $32,%rdx
  934         wrmsr
  935         cmpb    $0, nmi_flush_l1d_sw(%rip)
  936         je      2f
  937         call    flush_l1d_sw            /* bhyve L1TF assist */
  938 2:      movq    %r13,%cr3
  939         RESTORE_REGS
  940         addq    $TF_RIP,%rsp
  941         jmp     doreti_iret
  942 
  943 /*
  944  * MC# handling is similar to NMI.
  945  *
  946  * As with NMIs, machine check exceptions do not respect RFLAGS.IF and
  947  * can occur at any time with a GS.base value that does not correspond
  948  * to the privilege level in CS.
  949  *
  950  * Machine checks are not unblocked by iretq, but it is best to run
  951  * the handler with interrupts disabled since the exception may have
  952  * interrupted a critical section.
  953  *
  954  * The MC# handler runs on its own stack (tss_ist3).  The canonical
  955  * GS.base value for the processor is stored just above the bottom of
  956  * its MC# stack.  For exceptions taken from kernel mode, the current
  957  * value in the processor's GS.base is saved at entry to C-preserved
  958  * register %r12, the canonical value for GS.base is then loaded into
  959  * the processor, and the saved value is restored at exit time.  For
  960  * exceptions taken from user mode, the cheaper 'SWAPGS' instructions
  961  * are used for swapping GS.base.
  962  */
  963 
  964 IDTVEC(mchk)
  965         subq    $TF_RIP,%rsp
  966         movl    $(T_MCHK),TF_TRAPNO(%rsp)
  967         movq    $0,TF_ADDR(%rsp)
  968         movq    $0,TF_ERR(%rsp)
  969         movq    %rdi,TF_RDI(%rsp)
  970         movq    %rsi,TF_RSI(%rsp)
  971         movq    %rdx,TF_RDX(%rsp)
  972         movq    %rcx,TF_RCX(%rsp)
  973         movq    %r8,TF_R8(%rsp)
  974         movq    %r9,TF_R9(%rsp)
  975         movq    %rax,TF_RAX(%rsp)
  976         movq    %rbx,TF_RBX(%rsp)
  977         movq    %rbp,TF_RBP(%rsp)
  978         movq    %r10,TF_R10(%rsp)
  979         movq    %r11,TF_R11(%rsp)
  980         movq    %r12,TF_R12(%rsp)
  981         movq    %r13,TF_R13(%rsp)
  982         movq    %r14,TF_R14(%rsp)
  983         movq    %r15,TF_R15(%rsp)
  984         SAVE_SEGS
  985         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
  986         cld
  987         xorl    %ebx,%ebx
  988         testb   $SEL_RPL_MASK,TF_CS(%rsp)
  989         jnz     mchk_fromuserspace
  990         /*
  991          * We've interrupted the kernel.  Preserve GS.base in %r12,
  992          * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
  993          */
  994         movl    $MSR_GSBASE,%ecx
  995         rdmsr
  996         movq    %rax,%r12
  997         shlq    $32,%rdx
  998         orq     %rdx,%r12
  999         /* Retrieve and load the canonical value for GS.base. */
 1000         movq    TF_SIZE(%rsp),%rdx
 1001         movl    %edx,%eax
 1002         shrq    $32,%rdx
 1003         wrmsr
 1004         movq    %cr3,%r13
 1005         movq    PCPU(KCR3),%rax
 1006         cmpq    $~0,%rax
 1007         je      1f
 1008         movq    %rax,%cr3
 1009 1:      testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
 1010         je      mchk_calltrap
 1011         movl    $MSR_IA32_SPEC_CTRL,%ecx
 1012         rdmsr
 1013         movl    %eax,%r14d
 1014         call    handle_ibrs_entry
 1015         jmp     mchk_calltrap
 1016 mchk_fromuserspace:
 1017         incl    %ebx
 1018         swapgs
 1019         movq    %cr3,%r13
 1020         movq    PCPU(KCR3),%rax
 1021         cmpq    $~0,%rax
 1022         je      1f
 1023         movq    %rax,%cr3
 1024 1:      call    handle_ibrs_entry
 1025 /* Note: this label is also used by ddb and gdb: */
 1026 mchk_calltrap:
 1027         FAKE_MCOUNT(TF_RIP(%rsp))
 1028         movq    %rsp,%rdi
 1029         call    mca_intr
 1030         MEXITCOUNT
 1031         testl   %ebx,%ebx       /* %ebx == 0 => return to userland */
 1032         jnz     doreti_exit
 1033         /*
 1034          * Restore speculation control MSR, if preserved.
 1035          */
 1036         testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
 1037         je      1f
 1038         movl    %r14d,%eax
 1039         xorl    %edx,%edx
 1040         movl    $MSR_IA32_SPEC_CTRL,%ecx
 1041         wrmsr
 1042         /*
 1043          * Put back the preserved MSR_GSBASE value.
 1044          */
 1045 1:      movl    $MSR_GSBASE,%ecx
 1046         movq    %r12,%rdx
 1047         movl    %edx,%eax
 1048         shrq    $32,%rdx
 1049         wrmsr
 1050         movq    %r13,%cr3
 1051         RESTORE_REGS
 1052         addq    $TF_RIP,%rsp
 1053         jmp     doreti_iret
 1054 
 1055 ENTRY(fork_trampoline)
 1056         movq    %r12,%rdi               /* function */
 1057         movq    %rbx,%rsi               /* arg1 */
 1058         movq    %rsp,%rdx               /* trapframe pointer */
 1059         call    fork_exit
 1060         MEXITCOUNT
 1061         jmp     doreti                  /* Handle any ASTs */
 1062 
 1063 /*
 1064  * To efficiently implement classification of trap and interrupt handlers
 1065  * for profiling, there must be only trap handlers between the labels btrap
 1066  * and bintr, and only interrupt handlers between the labels bintr and
 1067  * eintr.  This is implemented (partly) by including files that contain
 1068  * some of the handlers.  Before including the files, set up a normal asm
 1069  * environment so that the included files doen't need to know that they are
 1070  * included.
 1071  */
 1072 
 1073 #ifdef COMPAT_FREEBSD32
 1074         .data
 1075         .p2align 4
 1076         .text
 1077         SUPERALIGN_TEXT
 1078 
 1079 #include <amd64/ia32/ia32_exception.S>
 1080 #endif
 1081 
 1082         .data
 1083         .p2align 4
 1084         .text
 1085         SUPERALIGN_TEXT
 1086 MCOUNT_LABEL(bintr)
 1087 
 1088 #include <amd64/amd64/apic_vector.S>
 1089 
 1090 #ifdef DEV_ATPIC
 1091         .data
 1092         .p2align 4
 1093         .text
 1094         SUPERALIGN_TEXT
 1095 
 1096 #include <amd64/amd64/atpic_vector.S>
 1097 #endif
 1098 
 1099         .text
 1100 MCOUNT_LABEL(eintr)
 1101 
 1102 /*
 1103  * void doreti(struct trapframe)
 1104  *
 1105  * Handle return from interrupts, traps and syscalls.
 1106  */
 1107         .text
 1108         SUPERALIGN_TEXT
 1109         .type   doreti,@function
 1110         .globl  doreti
 1111 doreti:
 1112         FAKE_MCOUNT($bintr)             /* init "from" bintr -> doreti */
 1113         /*
 1114          * Check if ASTs can be handled now.
 1115          */
 1116         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */
 1117         jz      doreti_exit             /* can't handle ASTs now if not */
 1118 
 1119 doreti_ast:
 1120         /*
 1121          * Check for ASTs atomically with returning.  Disabling CPU
 1122          * interrupts provides sufficient locking even in the SMP case,
 1123          * since we will be informed of any new ASTs by an IPI.
 1124          */
 1125         cli
 1126         movq    PCPU(CURTHREAD),%rax
 1127         testl   $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
 1128         je      doreti_exit
 1129         sti
 1130         movq    %rsp,%rdi       /* pass a pointer to the trapframe */
 1131         call    ast
 1132         jmp     doreti_ast
 1133 
 1134         /*
 1135          * doreti_exit: pop registers, iret.
 1136          *
 1137          *      The segment register pop is a special case, since it may
 1138          *      fault if (for example) a sigreturn specifies bad segment
 1139          *      registers.  The fault is handled in trap.c.
 1140          */
 1141 doreti_exit:
 1142         MEXITCOUNT
 1143         movq    PCPU(CURPCB),%r8
 1144 
 1145         /*
 1146          * Do not reload segment registers for kernel.
 1147          * Since we do not reload segments registers with sane
 1148          * values on kernel entry, descriptors referenced by
 1149          * segments registers might be not valid.  This is fatal
 1150          * for user mode, but is not a problem for the kernel.
 1151          */
 1152         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1153         jz      ld_regs
 1154         testl   $PCB_FULL_IRET,PCB_FLAGS(%r8)
 1155         jz      ld_regs
 1156         andl    $~PCB_FULL_IRET,PCB_FLAGS(%r8)
 1157         testl   $TF_HASSEGS,TF_FLAGS(%rsp)
 1158         je      set_segs
 1159 
 1160 do_segs:
 1161         /* Restore %fs and fsbase */
 1162         movw    TF_FS(%rsp),%ax
 1163         .globl  ld_fs
 1164 ld_fs:
 1165         movw    %ax,%fs
 1166         cmpw    $KUF32SEL,%ax
 1167         jne     1f
 1168         movl    $MSR_FSBASE,%ecx
 1169         movl    PCB_FSBASE(%r8),%eax
 1170         movl    PCB_FSBASE+4(%r8),%edx
 1171         .globl  ld_fsbase
 1172 ld_fsbase:
 1173         wrmsr
 1174 1:
 1175         /* Restore %gs and gsbase */
 1176         movw    TF_GS(%rsp),%si
 1177         pushfq
 1178         cli
 1179         movl    $MSR_GSBASE,%ecx
 1180         /* Save current kernel %gs base into %r12d:%r13d */
 1181         rdmsr
 1182         movl    %eax,%r12d
 1183         movl    %edx,%r13d
 1184         .globl  ld_gs
 1185 ld_gs:
 1186         movw    %si,%gs
 1187         /* Save user %gs base into %r14d:%r15d */
 1188         rdmsr
 1189         movl    %eax,%r14d
 1190         movl    %edx,%r15d
 1191         /* Restore kernel %gs base */
 1192         movl    %r12d,%eax
 1193         movl    %r13d,%edx
 1194         wrmsr
 1195         popfq
 1196         /*
 1197          * Restore user %gs base, either from PCB if used for TLS, or
 1198          * from the previously saved msr read.
 1199          */
 1200         movl    $MSR_KGSBASE,%ecx
 1201         cmpw    $KUG32SEL,%si
 1202         jne     1f
 1203         movl    PCB_GSBASE(%r8),%eax
 1204         movl    PCB_GSBASE+4(%r8),%edx
 1205         jmp     ld_gsbase
 1206 1:
 1207         movl    %r14d,%eax
 1208         movl    %r15d,%edx
 1209         .globl  ld_gsbase
 1210 ld_gsbase:
 1211         wrmsr   /* May trap if non-canonical, but only for TLS. */
 1212         .globl  ld_es
 1213 ld_es:
 1214         movw    TF_ES(%rsp),%es
 1215         .globl  ld_ds
 1216 ld_ds:
 1217         movw    TF_DS(%rsp),%ds
 1218 ld_regs:
 1219         RESTORE_REGS
 1220         testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
 1221         jz      2f                      /* keep running with kernel GS.base */
 1222         cli
 1223         call    handle_ibrs_exit_rs
 1224         callq   *mds_handler
 1225         cmpq    $~0,PCPU(UCR3)
 1226         je      1f
 1227         pushq   %rdx
 1228         movq    PCPU(PTI_RSP0),%rdx
 1229         subq    $PTI_SIZE,%rdx
 1230         movq    %rax,PTI_RAX(%rdx)
 1231         popq    %rax
 1232         movq    %rax,PTI_RDX(%rdx)
 1233         movq    TF_RIP(%rsp),%rax
 1234         movq    %rax,PTI_RIP(%rdx)
 1235         movq    TF_CS(%rsp),%rax
 1236         movq    %rax,PTI_CS(%rdx)
 1237         movq    TF_RFLAGS(%rsp),%rax
 1238         movq    %rax,PTI_RFLAGS(%rdx)
 1239         movq    TF_RSP(%rsp),%rax
 1240         movq    %rax,PTI_RSP(%rdx)
 1241         movq    TF_SS(%rsp),%rax
 1242         movq    %rax,PTI_SS(%rdx)
 1243         movq    PCPU(UCR3),%rax
 1244         swapgs
 1245         movq    %rdx,%rsp
 1246         movq    %rax,%cr3
 1247         popq    %rdx
 1248         popq    %rax
 1249         addq    $8,%rsp
 1250         jmp     doreti_iret
 1251 1:      swapgs
 1252 2:      addq    $TF_RIP,%rsp
 1253         .globl  doreti_iret
 1254 doreti_iret:
 1255         iretq
 1256 
 1257 set_segs:
 1258         movw    $KUDSEL,%ax
 1259         movw    %ax,TF_DS(%rsp)
 1260         movw    %ax,TF_ES(%rsp)
 1261         movw    $KUF32SEL,TF_FS(%rsp)
 1262         movw    $KUG32SEL,TF_GS(%rsp)
 1263         jmp     do_segs
 1264 
 1265         /*
 1266          * doreti_iret_fault.  Alternative return code for
 1267          * the case where we get a fault in the doreti_exit code
 1268          * above.  trap() (amd64/amd64/trap.c) catches this specific
 1269          * case, sends the process a signal and continues in the
 1270          * corresponding place in the code below.
 1271          */
 1272         ALIGN_TEXT
 1273         .globl  doreti_iret_fault
 1274 doreti_iret_fault:
 1275         subq    $TF_RIP,%rsp            /* space including tf_err, tf_trapno */
 1276         movq    %rax,TF_RAX(%rsp)
 1277         movq    %rdx,TF_RDX(%rsp)
 1278         movq    %rcx,TF_RCX(%rsp)
 1279         call    handle_ibrs_entry
 1280         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1281         jz      1f
 1282         sti
 1283 1:
 1284         SAVE_SEGS
 1285         movl    $TF_HASSEGS,TF_FLAGS(%rsp)
 1286         movq    %rdi,TF_RDI(%rsp)
 1287         movq    %rsi,TF_RSI(%rsp)
 1288         movq    %r8,TF_R8(%rsp)
 1289         movq    %r9,TF_R9(%rsp)
 1290         movq    %rbx,TF_RBX(%rsp)
 1291         movq    %rbp,TF_RBP(%rsp)
 1292         movq    %r10,TF_R10(%rsp)
 1293         movq    %r11,TF_R11(%rsp)
 1294         movq    %r12,TF_R12(%rsp)
 1295         movq    %r13,TF_R13(%rsp)
 1296         movq    %r14,TF_R14(%rsp)
 1297         movq    %r15,TF_R15(%rsp)
 1298         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1299         movq    $0,TF_ERR(%rsp) /* XXX should be the error code */
 1300         movq    $0,TF_ADDR(%rsp)
 1301         FAKE_MCOUNT(TF_RIP(%rsp))
 1302         jmp     calltrap
 1303 
 1304         ALIGN_TEXT
 1305         .globl  ds_load_fault
 1306 ds_load_fault:
 1307         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1308         testb   $SEL_RPL_MASK,TF_CS(%rsp)
 1309         jz      1f
 1310         sti
 1311 1:
 1312         movq    %rsp,%rdi
 1313         call    trap
 1314         movw    $KUDSEL,TF_DS(%rsp)
 1315         jmp     doreti
 1316 
 1317         ALIGN_TEXT
 1318         .globl  es_load_fault
 1319 es_load_fault:
 1320         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1321         testl   $PSL_I,TF_RFLAGS(%rsp)
 1322         jz      1f
 1323         sti
 1324 1:
 1325         movq    %rsp,%rdi
 1326         call    trap
 1327         movw    $KUDSEL,TF_ES(%rsp)
 1328         jmp     doreti
 1329 
 1330         ALIGN_TEXT
 1331         .globl  fs_load_fault
 1332 fs_load_fault:
 1333         testl   $PSL_I,TF_RFLAGS(%rsp)
 1334         jz      1f
 1335         sti
 1336 1:
 1337         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1338         movq    %rsp,%rdi
 1339         call    trap
 1340         movw    $KUF32SEL,TF_FS(%rsp)
 1341         jmp     doreti
 1342 
 1343         ALIGN_TEXT
 1344         .globl  gs_load_fault
 1345 gs_load_fault:
 1346         popfq
 1347         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1348         testl   $PSL_I,TF_RFLAGS(%rsp)
 1349         jz      1f
 1350         sti
 1351 1:
 1352         movq    %rsp,%rdi
 1353         call    trap
 1354         movw    $KUG32SEL,TF_GS(%rsp)
 1355         jmp     doreti
 1356 
 1357         ALIGN_TEXT
 1358         .globl  fsbase_load_fault
 1359 fsbase_load_fault:
 1360         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1361         testl   $PSL_I,TF_RFLAGS(%rsp)
 1362         jz      1f
 1363         sti
 1364 1:
 1365         movq    %rsp,%rdi
 1366         call    trap
 1367         movq    PCPU(CURTHREAD),%r8
 1368         movq    TD_PCB(%r8),%r8
 1369         movq    $0,PCB_FSBASE(%r8)
 1370         jmp     doreti
 1371 
 1372         ALIGN_TEXT
 1373         .globl  gsbase_load_fault
 1374 gsbase_load_fault:
 1375         movl    $T_PROTFLT,TF_TRAPNO(%rsp)
 1376         testl   $PSL_I,TF_RFLAGS(%rsp)
 1377         jz      1f
 1378         sti
 1379 1:
 1380         movq    %rsp,%rdi
 1381         call    trap
 1382         movq    PCPU(CURTHREAD),%r8
 1383         movq    TD_PCB(%r8),%r8
 1384         movq    $0,PCB_GSBASE(%r8)
 1385         jmp     doreti
 1386 
 1387 #ifdef HWPMC_HOOKS
 1388         ENTRY(end_exceptions)
 1389 #endif

Cache object: 6d3172b99cff37d710b0bfaaa751dc5c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.