1 /*-
2 * Copyright (c) 1989, 1990 William F. Jolitz.
3 * Copyright (c) 1990 The Regents of the University of California.
4 * Copyright (c) 2007-2018 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by A. Joseph Koshy under
8 * sponsorship from the FreeBSD Foundation and Google, Inc.
9 *
10 * Portions of this software were developed by
11 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
12 * the FreeBSD Foundation.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * $FreeBSD$
39 */
40
41 #include "opt_atpic.h"
42 #include "opt_hwpmc_hooks.h"
43
44 #include "assym.inc"
45
46 #include <machine/psl.h>
47 #include <machine/asmacros.h>
48 #include <machine/trap.h>
49 #include <machine/specialreg.h>
50
51 #ifdef KDTRACE_HOOKS
52 .bss
53 .globl dtrace_invop_jump_addr
54 .align 8
55 .type dtrace_invop_jump_addr,@object
56 .size dtrace_invop_jump_addr,8
57 dtrace_invop_jump_addr:
58 .zero 8
59 .globl dtrace_invop_calltrap_addr
60 .align 8
61 .type dtrace_invop_calltrap_addr,@object
62 .size dtrace_invop_calltrap_addr,8
63 dtrace_invop_calltrap_addr:
64 .zero 8
65 #endif
66 .text
67 #ifdef HWPMC_HOOKS
68 ENTRY(start_exceptions)
69 #endif
70
71 /*****************************************************************************/
72 /* Trap handling */
73 /*****************************************************************************/
74 /*
75 * Trap and fault vector routines.
76 *
77 * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes
78 * state on the stack but also disables interrupts. This is important for
79 * us for the use of the swapgs instruction. We cannot be interrupted
80 * until the GS.base value is correct. For most traps, we automatically
81 * then enable interrupts if the interrupted context had them enabled.
82 * This is equivalent to the i386 port's use of SDT_SYS386TGT.
83 *
84 * The cpu will push a certain amount of state onto the kernel stack for
85 * the current process. See amd64/include/frame.h.
86 * This includes the current RFLAGS (status register, which includes
87 * the interrupt disable state prior to the trap), the code segment register,
88 * and the return instruction pointer are pushed by the cpu. The cpu
89 * will also push an 'error' code for certain traps. We push a dummy
90 * error code for those traps where the cpu doesn't in order to maintain
91 * a consistent frame. We also push a contrived 'trap number'.
92 *
93 * The CPU does not push the general registers, so we must do that, and we
94 * must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss
95 * but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for
96 * for the kernel mode operation shortly, without changes to the selector
97 * loaded. Since superuser long mode works with any selectors loaded into
98 * segment registers other then %cs, which makes them mostly unused in long
99 * mode, and kernel does not reference %fs, leave them alone. The segment
100 * registers are reloaded on return to the usermode.
101 */
102
103 MCOUNT_LABEL(user)
104 MCOUNT_LABEL(btrap)
105
106 /* Traps that we leave interrupts disabled for. */
107 .macro TRAP_NOEN l, trapno
108 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u
109 \l\()_pti_k:
110 subq $TF_RIP,%rsp
111 movl $\trapno,TF_TRAPNO(%rsp)
112 movq $0,TF_ADDR(%rsp)
113 movq $0,TF_ERR(%rsp)
114 jmp alltraps_noen_k
115 \l\()_pti_u:
116 subq $TF_RIP,%rsp
117 movl $\trapno,TF_TRAPNO(%rsp)
118 movq $0,TF_ADDR(%rsp)
119 movq $0,TF_ERR(%rsp)
120 jmp alltraps_noen_u
121
122 .globl X\l
123 .type X\l,@function
124 X\l:
125 subq $TF_RIP,%rsp
126 movl $\trapno,TF_TRAPNO(%rsp)
127 movq $0,TF_ADDR(%rsp)
128 movq $0,TF_ERR(%rsp)
129 testb $SEL_RPL_MASK,TF_CS(%rsp)
130 jz alltraps_noen_k
131 swapgs
132 lfence
133 jmp alltraps_noen_u
134 .endm
135
136 TRAP_NOEN bpt, T_BPTFLT
137 #ifdef KDTRACE_HOOKS
138 TRAP_NOEN dtrace_ret, T_DTRACE_RET
139 #endif
140
141 /* Regular traps; The cpu does not supply tf_err for these. */
142 .macro TRAP l, trapno
143 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u
144 \l\()_pti_k:
145 subq $TF_RIP,%rsp
146 movl $\trapno,TF_TRAPNO(%rsp)
147 movq $0,TF_ADDR(%rsp)
148 movq $0,TF_ERR(%rsp)
149 jmp alltraps_k
150 \l\()_pti_u:
151 subq $TF_RIP,%rsp
152 movl $\trapno,TF_TRAPNO(%rsp)
153 movq $0,TF_ADDR(%rsp)
154 movq $0,TF_ERR(%rsp)
155 jmp alltraps_u
156
157 .globl X\l
158 .type X\l,@function
159 X\l:
160 subq $TF_RIP,%rsp
161 movl $\trapno,TF_TRAPNO(%rsp)
162 movq $0,TF_ADDR(%rsp)
163 movq $0,TF_ERR(%rsp)
164 testb $SEL_RPL_MASK,TF_CS(%rsp)
165 jz alltraps_k
166 swapgs
167 lfence
168 jmp alltraps_u
169 .endm
170
171 TRAP div, T_DIVIDE
172 TRAP ofl, T_OFLOW
173 TRAP bnd, T_BOUND
174 TRAP ill, T_PRIVINFLT
175 TRAP dna, T_DNA
176 TRAP fpusegm, T_FPOPFLT
177 TRAP rsvd, T_RESERVED
178 TRAP fpu, T_ARITHTRAP
179 TRAP xmm, T_XMMFLT
180
181 /* This group of traps have tf_err already pushed by the cpu. */
182 .macro TRAP_ERR l, trapno
183 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u,has_err=1
184 \l\()_pti_k:
185 subq $TF_ERR,%rsp
186 movl $\trapno,TF_TRAPNO(%rsp)
187 movq $0,TF_ADDR(%rsp)
188 jmp alltraps_k
189 \l\()_pti_u:
190 subq $TF_ERR,%rsp
191 movl $\trapno,TF_TRAPNO(%rsp)
192 movq $0,TF_ADDR(%rsp)
193 jmp alltraps_u
194 .globl X\l
195 .type X\l,@function
196 X\l:
197 subq $TF_ERR,%rsp
198 movl $\trapno,TF_TRAPNO(%rsp)
199 movq $0,TF_ADDR(%rsp)
200 testb $SEL_RPL_MASK,TF_CS(%rsp)
201 jz alltraps_k
202 swapgs
203 lfence
204 jmp alltraps_u
205 .endm
206
207 TRAP_ERR tss, T_TSSFLT
208 TRAP_ERR align, T_ALIGNFLT
209
210 /*
211 * alltraps_u/k entry points.
212 * SWAPGS must be already performed by prologue,
213 * if this is the first time in the kernel from userland.
214 * Reenable interrupts if they were enabled before the trap.
215 * This approximates SDT_SYS386TGT on the i386 port.
216 */
217 SUPERALIGN_TEXT
218 .globl alltraps_u
219 .type alltraps_u,@function
220 alltraps_u:
221 movq %rdi,TF_RDI(%rsp)
222 movq %rdx,TF_RDX(%rsp)
223 movq %rax,TF_RAX(%rsp)
224 movq %rcx,TF_RCX(%rsp)
225 movq PCPU(CURPCB),%rdi
226 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
227 call handle_ibrs_entry
228 jmp alltraps_save_segs
229 SUPERALIGN_TEXT
230 .globl alltraps_k
231 .type alltraps_k,@function
232 alltraps_k:
233 lfence
234 movq %rdi,TF_RDI(%rsp)
235 movq %rdx,TF_RDX(%rsp)
236 movq %rax,TF_RAX(%rsp)
237 movq %rcx,TF_RCX(%rsp)
238 alltraps_save_segs:
239 SAVE_SEGS
240 testl $PSL_I,TF_RFLAGS(%rsp)
241 jz alltraps_pushregs_no_rax
242 sti
243 alltraps_pushregs_no_rax:
244 movq %rsi,TF_RSI(%rsp)
245 movq %r8,TF_R8(%rsp)
246 movq %r9,TF_R9(%rsp)
247 movq %rbx,TF_RBX(%rsp)
248 movq %rbp,TF_RBP(%rsp)
249 movq %r10,TF_R10(%rsp)
250 movq %r11,TF_R11(%rsp)
251 movq %r12,TF_R12(%rsp)
252 movq %r13,TF_R13(%rsp)
253 movq %r14,TF_R14(%rsp)
254 movq %r15,TF_R15(%rsp)
255 movl $TF_HASSEGS,TF_FLAGS(%rsp)
256 pushfq
257 andq $~(PSL_D | PSL_AC),(%rsp)
258 popfq
259 FAKE_MCOUNT(TF_RIP(%rsp))
260 #ifdef KDTRACE_HOOKS
261 /*
262 * DTrace Function Boundary Trace (fbt) probes are triggered
263 * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint
264 * interrupt. For all other trap types, just handle them in
265 * the usual way.
266 */
267 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
268 jnz calltrap /* ignore userland traps */
269 cmpl $T_BPTFLT,TF_TRAPNO(%rsp)
270 jne calltrap
271
272 /* Check if there is no DTrace hook registered. */
273 cmpq $0,dtrace_invop_jump_addr
274 je calltrap
275
276 /*
277 * Set our jump address for the jump back in the event that
278 * the breakpoint wasn't caused by DTrace at all.
279 */
280 movq $calltrap,dtrace_invop_calltrap_addr(%rip)
281
282 /* Jump to the code hooked in by DTrace. */
283 jmpq *dtrace_invop_jump_addr
284 #endif
285 .globl calltrap
286 .type calltrap,@function
287 calltrap:
288 movq %rsp,%rdi
289 call trap_check
290 MEXITCOUNT
291 jmp doreti /* Handle any pending ASTs */
292
293 /*
294 * alltraps_noen_u/k entry points.
295 * Again, SWAPGS must be already performed by prologue, if needed.
296 * Unlike alltraps above, we want to leave the interrupts disabled.
297 * This corresponds to SDT_SYS386IGT on the i386 port.
298 */
299 SUPERALIGN_TEXT
300 .globl alltraps_noen_u
301 .type alltraps_noen_u,@function
302 alltraps_noen_u:
303 movq %rdi,TF_RDI(%rsp)
304 movq PCPU(CURPCB),%rdi
305 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
306 jmp alltraps_noen_save_segs
307 SUPERALIGN_TEXT
308 .globl alltraps_noen_k
309 .type alltraps_noen_k,@function
310 alltraps_noen_k:
311 lfence
312 movq %rdi,TF_RDI(%rsp)
313 alltraps_noen_save_segs:
314 SAVE_SEGS
315 movq %rdx,TF_RDX(%rsp)
316 movq %rax,TF_RAX(%rsp)
317 movq %rcx,TF_RCX(%rsp)
318 testb $SEL_RPL_MASK,TF_CS(%rsp)
319 jz alltraps_pushregs_no_rax
320 call handle_ibrs_entry
321 jmp alltraps_pushregs_no_rax
322
323 IDTVEC(dblfault)
324 subq $TF_ERR,%rsp
325 movl $T_DOUBLEFLT,TF_TRAPNO(%rsp)
326 movq $0,TF_ADDR(%rsp)
327 movq $0,TF_ERR(%rsp)
328 movq %rdi,TF_RDI(%rsp)
329 movq %rsi,TF_RSI(%rsp)
330 movq %rdx,TF_RDX(%rsp)
331 movq %rcx,TF_RCX(%rsp)
332 movq %r8,TF_R8(%rsp)
333 movq %r9,TF_R9(%rsp)
334 movq %rax,TF_RAX(%rsp)
335 movq %rbx,TF_RBX(%rsp)
336 movq %rbp,TF_RBP(%rsp)
337 movq %r10,TF_R10(%rsp)
338 movq %r11,TF_R11(%rsp)
339 movq %r12,TF_R12(%rsp)
340 movq %r13,TF_R13(%rsp)
341 movq %r14,TF_R14(%rsp)
342 movq %r15,TF_R15(%rsp)
343 SAVE_SEGS
344 movl $TF_HASSEGS,TF_FLAGS(%rsp)
345 pushfq
346 andq $~(PSL_D | PSL_AC),(%rsp)
347 popfq
348 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
349 jz 1f /* already running with kernel GS.base */
350 swapgs
351 1: lfence
352 movq %cr3,%rax
353 movq %rax,PCPU(SAVED_UCR3)
354 movq PCPU(KCR3),%rax
355 cmpq $~0,%rax
356 je 2f
357 movq %rax,%cr3
358 2: movq %rsp,%rdi
359 call dblfault_handler
360 3: hlt
361 jmp 3b
362
363 ALIGN_TEXT
364 IDTVEC(page_pti)
365 testb $SEL_RPL_MASK,PTI_CS-PTI_ERR(%rsp)
366 jz page_k
367 swapgs
368 lfence
369 pushq %rax
370 movq %cr3,%rax
371 movq %rax,PCPU(SAVED_UCR3)
372 cmpq $~0,PCPU(UCR3)
373 jne 1f
374 popq %rax
375 jmp page_u
376 1: pushq %rdx
377 PTI_UUENTRY has_err=1
378 jmp page_u
379 ALIGN_TEXT
380 IDTVEC(page)
381 testb $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) /* Did we come from kernel? */
382 jnz page_u_swapgs /* already running with kernel GS.base */
383 page_k:
384 lfence
385 subq $TF_ERR,%rsp
386 movq %rdi,TF_RDI(%rsp) /* free up GP registers */
387 movq %rax,TF_RAX(%rsp)
388 movq %rdx,TF_RDX(%rsp)
389 movq %rcx,TF_RCX(%rsp)
390 jmp page_cr2
391 ALIGN_TEXT
392 page_u_swapgs:
393 swapgs
394 lfence
395 page_u:
396 subq $TF_ERR,%rsp
397 movq %rdi,TF_RDI(%rsp)
398 movq %rax,TF_RAX(%rsp)
399 movq %rdx,TF_RDX(%rsp)
400 movq %rcx,TF_RCX(%rsp)
401 movq PCPU(CURPCB),%rdi
402 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
403 movq PCPU(SAVED_UCR3),%rax
404 movq %rax,PCB_SAVED_UCR3(%rdi)
405 call handle_ibrs_entry
406 page_cr2:
407 movq %cr2,%rdi /* preserve %cr2 before .. */
408 movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
409 SAVE_SEGS
410 movl $T_PAGEFLT,TF_TRAPNO(%rsp)
411 testl $PSL_I,TF_RFLAGS(%rsp)
412 jz alltraps_pushregs_no_rax
413 sti
414 jmp alltraps_pushregs_no_rax
415
416 /*
417 * We have to special-case this one. If we get a trap in doreti() at
418 * the iretq stage, we'll reenter with the wrong gs state. We'll have
419 * to do a special the swapgs in this case even coming from the kernel.
420 * XXX linux has a trap handler for their equivalent of load_gs().
421 *
422 * On the stack, we have the hardware interrupt frame to return
423 * to usermode (faulted) and another frame with error code, for
424 * fault. For PTI, copy both frames to the main thread stack.
425 * Handle the potential 16-byte alignment adjustment incurred
426 * during the second fault by copying both frames independently
427 * while unwinding the stack in between.
428 */
429 .macro PROTF_ENTRY name,trapno
430 \name\()_pti_doreti:
431 swapgs
432 lfence
433 cmpq $~0,PCPU(UCR3)
434 je 1f
435 pushq %rax
436 pushq %rdx
437 movq PCPU(KCR3),%rax
438 movq %rax,%cr3
439 movq PCPU(RSP0),%rax
440 subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
441 MOVE_STACKS (PTI_SIZE / 8)
442 addq $PTI_SIZE,%rax
443 movq PTI_RSP(%rsp),%rsp
444 MOVE_STACKS (PTI_SIZE / 8 - 3)
445 subq $PTI_SIZE,%rax
446 movq %rax,%rsp
447 popq %rdx
448 popq %rax
449 1: swapgs
450 jmp X\name
451 IDTVEC(\name\()_pti)
452 cmpq $doreti_iret,PTI_RIP-2*8(%rsp)
453 je \name\()_pti_doreti
454 testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
455 jz X\name /* lfence is not needed until %gs: use */
456 PTI_UENTRY has_err=1
457 swapgs /* fence provided by PTI_UENTRY */
458 IDTVEC(\name)
459 subq $TF_ERR,%rsp
460 movl $\trapno,TF_TRAPNO(%rsp)
461 jmp prot_addrf
462 .endm
463
464 PROTF_ENTRY missing, T_SEGNPFLT
465 PROTF_ENTRY stk, T_STKFLT
466 PROTF_ENTRY prot, T_PROTFLT
467
468 prot_addrf:
469 movq $0,TF_ADDR(%rsp)
470 movq %rdi,TF_RDI(%rsp) /* free up a GP register */
471 movq %rax,TF_RAX(%rsp)
472 movq %rdx,TF_RDX(%rsp)
473 movq %rcx,TF_RCX(%rsp)
474 movw %fs,TF_FS(%rsp)
475 movw %gs,TF_GS(%rsp)
476 leaq doreti_iret(%rip),%rdi
477 cmpq %rdi,TF_RIP(%rsp)
478 je 5f /* kernel but with user gsbase!! */
479 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
480 jz 6f /* already running with kernel GS.base */
481 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
482 jz 2f
483 cmpw $KUF32SEL,TF_FS(%rsp)
484 jne 1f
485 rdfsbase %rax
486 1: cmpw $KUG32SEL,TF_GS(%rsp)
487 jne 2f
488 rdgsbase %rdx
489 2: swapgs
490 lfence
491 movq PCPU(CURPCB),%rdi
492 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
493 jz 4f
494 cmpw $KUF32SEL,TF_FS(%rsp)
495 jne 3f
496 movq %rax,PCB_FSBASE(%rdi)
497 3: cmpw $KUG32SEL,TF_GS(%rsp)
498 jne 4f
499 movq %rdx,PCB_GSBASE(%rdi)
500 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* full iret from user #gp */
501 4: call handle_ibrs_entry
502 movw %es,TF_ES(%rsp)
503 movw %ds,TF_DS(%rsp)
504 testl $PSL_I,TF_RFLAGS(%rsp)
505 jz alltraps_pushregs_no_rax
506 sti
507 jmp alltraps_pushregs_no_rax
508
509 5: swapgs
510 6: lfence
511 movq PCPU(CURPCB),%rdi
512 jmp 4b
513
514 /*
515 * Fast syscall entry point. We enter here with just our new %cs/%ss set,
516 * and the new privilige level. We are still running on the old user stack
517 * pointer. We have to juggle a few things around to find our stack etc.
518 * swapgs gives us access to our PCPU space only.
519 *
520 * We do not support invoking this from a custom segment registers,
521 * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
522 */
523 SUPERALIGN_TEXT
524 IDTVEC(fast_syscall_pti)
525 swapgs
526 lfence
527 cmpq $~0,PCPU(UCR3)
528 je fast_syscall_common
529 movq %rax,PCPU(SCRATCH_RAX)
530 movq PCPU(KCR3),%rax
531 movq %rax,%cr3
532 movq PCPU(SCRATCH_RAX),%rax
533 jmp fast_syscall_common
534 SUPERALIGN_TEXT
535 IDTVEC(fast_syscall)
536 swapgs
537 lfence
538 fast_syscall_common:
539 movq %rsp,PCPU(SCRATCH_RSP)
540 movq PCPU(RSP0),%rsp
541 /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
542 subq $TF_SIZE,%rsp
543 /* defer TF_RSP till we have a spare register */
544 movq %r11,TF_RFLAGS(%rsp)
545 movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
546 movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
547 movq %r11,TF_RSP(%rsp) /* user stack pointer */
548 /*
549 * Save a few arg registers early to free them for use in
550 * handle_ibrs_entry(). %r10 is especially tricky. It is not an
551 * arg register, but it holds the arg register %rcx. Profiling
552 * preserves %rcx, but may clobber %r10. Profiling may also
553 * clobber %r11, but %r11 (original %eflags) has been saved.
554 */
555 movq %rax,TF_RAX(%rsp) /* syscall number */
556 movq %rdx,TF_RDX(%rsp) /* arg 3 */
557 movq %r10,TF_RCX(%rsp) /* arg 4 */
558 SAVE_SEGS
559 call handle_ibrs_entry
560 movq PCPU(CURPCB),%r11
561 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11)
562 sti
563 movq $KUDSEL,TF_SS(%rsp)
564 movq $KUCSEL,TF_CS(%rsp)
565 movq $2,TF_ERR(%rsp)
566 movq %rdi,TF_RDI(%rsp) /* arg 1 */
567 movq %rsi,TF_RSI(%rsp) /* arg 2 */
568 movq %r8,TF_R8(%rsp) /* arg 5 */
569 movq %r9,TF_R9(%rsp) /* arg 6 */
570 movq %rbx,TF_RBX(%rsp) /* C preserved */
571 movq %rbp,TF_RBP(%rsp) /* C preserved */
572 movq %r12,TF_R12(%rsp) /* C preserved */
573 movq %r13,TF_R13(%rsp) /* C preserved */
574 movq %r14,TF_R14(%rsp) /* C preserved */
575 movq %r15,TF_R15(%rsp) /* C preserved */
576 movl $TF_HASSEGS,TF_FLAGS(%rsp)
577 FAKE_MCOUNT(TF_RIP(%rsp))
578 movq PCPU(CURTHREAD),%rdi
579 movq %rsp,TD_FRAME(%rdi)
580 movl TF_RFLAGS(%rsp),%esi
581 andl $PSL_T,%esi
582 call amd64_syscall
583 1: movq PCPU(CURPCB),%rax
584 /* Disable interrupts before testing PCB_FULL_IRET. */
585 cli
586 testl $PCB_FULL_IRET,PCB_FLAGS(%rax)
587 jnz 4f
588 /* Check for and handle AST's on return to userland. */
589 movq PCPU(CURTHREAD),%rax
590 testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
591 jne 3f
592 call handle_ibrs_exit
593 callq *mds_handler
594 /* Restore preserved registers. */
595 MEXITCOUNT
596 movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
597 movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */
598 movq TF_RDX(%rsp),%rdx /* return value 2 */
599 movq TF_RAX(%rsp),%rax /* return value 1 */
600 movq TF_RFLAGS(%rsp),%r11 /* original %rflags */
601 movq TF_RIP(%rsp),%rcx /* original %rip */
602 movq TF_RSP(%rsp),%rsp /* user stack pointer */
603 xorl %r8d,%r8d /* zero the rest of GPRs */
604 xorl %r10d,%r10d
605 cmpq $~0,PCPU(UCR3)
606 je 2f
607 movq PCPU(UCR3),%r9
608 movq %r9,%cr3
609 2: xorl %r9d,%r9d
610 swapgs
611 sysretq
612
613 3: /* AST scheduled. */
614 sti
615 movq %rsp,%rdi
616 call ast
617 jmp 1b
618
619 4: /* Requested full context restore, use doreti for that. */
620 MEXITCOUNT
621 jmp doreti
622
623 /*
624 * Here for CYA insurance, in case a "syscall" instruction gets
625 * issued from 32 bit compatibility mode. MSR_CSTAR has to point
626 * to *something* if EFER_SCE is enabled.
627 */
628 IDTVEC(fast_syscall32)
629 sysret
630
631 /*
632 * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
633 * generation of exception until the next instruction is executed,
634 * which might be a kernel entry. So we must execute the handler
635 * on IST stack and be ready for non-kernel GSBASE.
636 */
637 IDTVEC(dbg)
638 subq $TF_RIP,%rsp
639 movl $(T_TRCTRAP),TF_TRAPNO(%rsp)
640 movq $0,TF_ADDR(%rsp)
641 movq $0,TF_ERR(%rsp)
642 movq %rdi,TF_RDI(%rsp)
643 movq %rsi,TF_RSI(%rsp)
644 movq %rdx,TF_RDX(%rsp)
645 movq %rcx,TF_RCX(%rsp)
646 movq %r8,TF_R8(%rsp)
647 movq %r9,TF_R9(%rsp)
648 movq %rax,TF_RAX(%rsp)
649 movq %rbx,TF_RBX(%rsp)
650 movq %rbp,TF_RBP(%rsp)
651 movq %r10,TF_R10(%rsp)
652 movq %r11,TF_R11(%rsp)
653 movq %r12,TF_R12(%rsp)
654 movq %r13,TF_R13(%rsp)
655 movq %r14,TF_R14(%rsp)
656 movq %r15,TF_R15(%rsp)
657 SAVE_SEGS
658 movl $TF_HASSEGS,TF_FLAGS(%rsp)
659 pushfq
660 andq $~(PSL_D | PSL_AC),(%rsp)
661 popfq
662 testb $SEL_RPL_MASK,TF_CS(%rsp)
663 jnz dbg_fromuserspace
664 lfence
665 /*
666 * We've interrupted the kernel. Preserve GS.base in %r12,
667 * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
668 */
669 movl $MSR_GSBASE,%ecx
670 rdmsr
671 movq %rax,%r12
672 shlq $32,%rdx
673 orq %rdx,%r12
674 /* Retrieve and load the canonical value for GS.base. */
675 movq TF_SIZE(%rsp),%rdx
676 movl %edx,%eax
677 shrq $32,%rdx
678 wrmsr
679 movq %cr3,%r13
680 movq PCPU(KCR3),%rax
681 cmpq $~0,%rax
682 je 1f
683 movq %rax,%cr3
684 1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
685 je 2f
686 movl $MSR_IA32_SPEC_CTRL,%ecx
687 rdmsr
688 movl %eax,%r14d
689 call handle_ibrs_entry
690 2: FAKE_MCOUNT(TF_RIP(%rsp))
691 movq %rsp,%rdi
692 call trap
693 MEXITCOUNT
694 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
695 je 3f
696 movl %r14d,%eax
697 xorl %edx,%edx
698 movl $MSR_IA32_SPEC_CTRL,%ecx
699 wrmsr
700 /*
701 * Put back the preserved MSR_GSBASE value.
702 */
703 3: movl $MSR_GSBASE,%ecx
704 movq %r12,%rdx
705 movl %edx,%eax
706 shrq $32,%rdx
707 wrmsr
708 movq %r13,%cr3
709 RESTORE_REGS
710 addq $TF_RIP,%rsp
711 jmp doreti_iret
712 dbg_fromuserspace:
713 /*
714 * Switch to kernel GSBASE and kernel page table, and copy frame
715 * from the IST stack to the normal kernel stack, since trap()
716 * re-enables interrupts, and since we might trap on DB# while
717 * in trap().
718 */
719 swapgs
720 lfence
721 movq PCPU(KCR3),%rax
722 cmpq $~0,%rax
723 je 1f
724 movq %rax,%cr3
725 1: movq PCPU(RSP0),%rax
726 movl $TF_SIZE,%ecx
727 subq %rcx,%rax
728 movq %rax,%rdi
729 movq %rsp,%rsi
730 rep;movsb
731 movq %rax,%rsp
732 call handle_ibrs_entry
733 movq PCPU(CURPCB),%rdi
734 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
735 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
736 jz 3f
737 cmpw $KUF32SEL,TF_FS(%rsp)
738 jne 2f
739 rdfsbase %rax
740 movq %rax,PCB_FSBASE(%rdi)
741 2: cmpw $KUG32SEL,TF_GS(%rsp)
742 jne 3f
743 movl $MSR_KGSBASE,%ecx
744 rdmsr
745 shlq $32,%rdx
746 orq %rdx,%rax
747 movq %rax,PCB_GSBASE(%rdi)
748 3: jmp calltrap
749
750 /*
751 * NMI handling is special.
752 *
753 * First, NMIs do not respect the state of the processor's RFLAGS.IF
754 * bit. The NMI handler may be entered at any time, including when
755 * the processor is in a critical section with RFLAGS.IF == 0.
756 * The processor's GS.base value could be invalid on entry to the
757 * handler.
758 *
759 * Second, the processor treats NMIs specially, blocking further NMIs
760 * until an 'iretq' instruction is executed. We thus need to execute
761 * the NMI handler with interrupts disabled, to prevent a nested interrupt
762 * from executing an 'iretq' instruction and inadvertently taking the
763 * processor out of NMI mode.
764 *
765 * Third, the NMI handler runs on its own stack (tss_ist2). The canonical
766 * GS.base value for the processor is stored just above the bottom of its
767 * NMI stack. For NMIs taken from kernel mode, the current value in
768 * the processor's GS.base is saved at entry to C-preserved register %r12,
769 * the canonical value for GS.base is then loaded into the processor, and
770 * the saved value is restored at exit time. For NMIs taken from user mode,
771 * the cheaper 'SWAPGS' instructions are used for swapping GS.base.
772 */
773
774 IDTVEC(nmi)
775 subq $TF_RIP,%rsp
776 movl $(T_NMI),TF_TRAPNO(%rsp)
777 movq $0,TF_ADDR(%rsp)
778 movq $0,TF_ERR(%rsp)
779 movq %rdi,TF_RDI(%rsp)
780 movq %rsi,TF_RSI(%rsp)
781 movq %rdx,TF_RDX(%rsp)
782 movq %rcx,TF_RCX(%rsp)
783 movq %r8,TF_R8(%rsp)
784 movq %r9,TF_R9(%rsp)
785 movq %rax,TF_RAX(%rsp)
786 movq %rbx,TF_RBX(%rsp)
787 movq %rbp,TF_RBP(%rsp)
788 movq %r10,TF_R10(%rsp)
789 movq %r11,TF_R11(%rsp)
790 movq %r12,TF_R12(%rsp)
791 movq %r13,TF_R13(%rsp)
792 movq %r14,TF_R14(%rsp)
793 movq %r15,TF_R15(%rsp)
794 SAVE_SEGS
795 movl $TF_HASSEGS,TF_FLAGS(%rsp)
796 pushfq
797 andq $~(PSL_D | PSL_AC),(%rsp)
798 popfq
799 xorl %ebx,%ebx
800 testb $SEL_RPL_MASK,TF_CS(%rsp)
801 jnz nmi_fromuserspace
802 /*
803 * We've interrupted the kernel. Preserve GS.base in %r12,
804 * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
805 */
806 lfence
807 movl $MSR_GSBASE,%ecx
808 rdmsr
809 movq %rax,%r12
810 shlq $32,%rdx
811 orq %rdx,%r12
812 /* Retrieve and load the canonical value for GS.base. */
813 movq TF_SIZE(%rsp),%rdx
814 movl %edx,%eax
815 shrq $32,%rdx
816 wrmsr
817 movq %cr3,%r13
818 movq PCPU(KCR3),%rax
819 cmpq $~0,%rax
820 je 1f
821 movq %rax,%cr3
822 1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
823 je nmi_calltrap
824 movl $MSR_IA32_SPEC_CTRL,%ecx
825 rdmsr
826 movl %eax,%r14d
827 call handle_ibrs_entry
828 jmp nmi_calltrap
829 nmi_fromuserspace:
830 incl %ebx
831 swapgs
832 lfence
833 movq %cr3,%r13
834 movq PCPU(KCR3),%rax
835 cmpq $~0,%rax
836 je 1f
837 movq %rax,%cr3
838 1: call handle_ibrs_entry
839 movq PCPU(CURPCB),%rdi
840 testq %rdi,%rdi
841 jz 3f
842 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
843 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
844 jz 3f
845 cmpw $KUF32SEL,TF_FS(%rsp)
846 jne 2f
847 rdfsbase %rax
848 movq %rax,PCB_FSBASE(%rdi)
849 2: cmpw $KUG32SEL,TF_GS(%rsp)
850 jne 3f
851 movl $MSR_KGSBASE,%ecx
852 rdmsr
853 shlq $32,%rdx
854 orq %rdx,%rax
855 movq %rax,PCB_GSBASE(%rdi)
856 3:
857 /* Note: this label is also used by ddb and gdb: */
858 nmi_calltrap:
859 FAKE_MCOUNT(TF_RIP(%rsp))
860 movq %rsp,%rdi
861 call trap
862 MEXITCOUNT
863 #ifdef HWPMC_HOOKS
864 /*
865 * Capture a userspace callchain if needed.
866 *
867 * - Check if the current trap was from user mode.
868 * - Check if the current thread is valid.
869 * - Check if the thread requires a user call chain to be
870 * captured.
871 *
872 * We are still in NMI mode at this point.
873 */
874 testl %ebx,%ebx
875 jz nocallchain /* not from userspace */
876 movq PCPU(CURTHREAD),%rax
877 orq %rax,%rax /* curthread present? */
878 jz nocallchain
879 /*
880 * Move execution to the regular kernel stack, because we
881 * committed to return through doreti.
882 */
883 movq %rsp,%rsi /* source stack pointer */
884 movq $TF_SIZE,%rcx
885 movq PCPU(RSP0),%rdx
886 subq %rcx,%rdx
887 movq %rdx,%rdi /* destination stack pointer */
888 shrq $3,%rcx /* trap frame size in long words */
889 pushfq
890 andq $~(PSL_D | PSL_AC),(%rsp)
891 popfq
892 rep
893 movsq /* copy trapframe */
894 movq %rdx,%rsp /* we are on the regular kstack */
895
896 testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
897 jz nocallchain
898 /*
899 * A user callchain is to be captured, so:
900 * - Take the processor out of "NMI" mode by faking an "iret",
901 * to allow for nested NMI interrupts.
902 * - Enable interrupts, so that copyin() can work.
903 */
904 movl %ss,%eax
905 pushq %rax /* tf_ss */
906 pushq %rdx /* tf_rsp (on kernel stack) */
907 pushfq /* tf_rflags */
908 movl %cs,%eax
909 pushq %rax /* tf_cs */
910 pushq $outofnmi /* tf_rip */
911 iretq
912 outofnmi:
913 /*
914 * At this point the processor has exited NMI mode and is running
915 * with interrupts turned off on the normal kernel stack.
916 *
917 * If a pending NMI gets recognized at or after this point, it
918 * will cause a kernel callchain to be traced.
919 *
920 * We turn interrupts back on, and call the user callchain capture hook.
921 */
922 movq pmc_hook,%rax
923 orq %rax,%rax
924 jz nocallchain
925 movq PCPU(CURTHREAD),%rdi /* thread */
926 movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */
927 movq %rsp,%rdx /* frame */
928 sti
929 call *%rax
930 cli
931 nocallchain:
932 #endif
933 testl %ebx,%ebx /* %ebx == 0 => return to userland */
934 jnz doreti_exit
935 /*
936 * Restore speculation control MSR, if preserved.
937 */
938 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
939 je 1f
940 movl %r14d,%eax
941 xorl %edx,%edx
942 movl $MSR_IA32_SPEC_CTRL,%ecx
943 wrmsr
944 /*
945 * Put back the preserved MSR_GSBASE value.
946 */
947 1: movl $MSR_GSBASE,%ecx
948 movq %r12,%rdx
949 movl %edx,%eax
950 shrq $32,%rdx
951 wrmsr
952 cmpb $0, nmi_flush_l1d_sw(%rip)
953 je 2f
954 call flush_l1d_sw /* bhyve L1TF assist */
955 2: movq %r13,%cr3
956 RESTORE_REGS
957 addq $TF_RIP,%rsp
958 jmp doreti_iret
959
960 /*
961 * MC# handling is similar to NMI.
962 *
963 * As with NMIs, machine check exceptions do not respect RFLAGS.IF and
964 * can occur at any time with a GS.base value that does not correspond
965 * to the privilege level in CS.
966 *
967 * Machine checks are not unblocked by iretq, but it is best to run
968 * the handler with interrupts disabled since the exception may have
969 * interrupted a critical section.
970 *
971 * The MC# handler runs on its own stack (tss_ist3). The canonical
972 * GS.base value for the processor is stored just above the bottom of
973 * its MC# stack. For exceptions taken from kernel mode, the current
974 * value in the processor's GS.base is saved at entry to C-preserved
975 * register %r12, the canonical value for GS.base is then loaded into
976 * the processor, and the saved value is restored at exit time. For
977 * exceptions taken from user mode, the cheaper 'SWAPGS' instructions
978 * are used for swapping GS.base.
979 */
980
981 IDTVEC(mchk)
982 subq $TF_RIP,%rsp
983 movl $(T_MCHK),TF_TRAPNO(%rsp)
984 movq $0,TF_ADDR(%rsp)
985 movq $0,TF_ERR(%rsp)
986 movq %rdi,TF_RDI(%rsp)
987 movq %rsi,TF_RSI(%rsp)
988 movq %rdx,TF_RDX(%rsp)
989 movq %rcx,TF_RCX(%rsp)
990 movq %r8,TF_R8(%rsp)
991 movq %r9,TF_R9(%rsp)
992 movq %rax,TF_RAX(%rsp)
993 movq %rbx,TF_RBX(%rsp)
994 movq %rbp,TF_RBP(%rsp)
995 movq %r10,TF_R10(%rsp)
996 movq %r11,TF_R11(%rsp)
997 movq %r12,TF_R12(%rsp)
998 movq %r13,TF_R13(%rsp)
999 movq %r14,TF_R14(%rsp)
1000 movq %r15,TF_R15(%rsp)
1001 SAVE_SEGS
1002 movl $TF_HASSEGS,TF_FLAGS(%rsp)
1003 pushfq
1004 andq $~(PSL_D | PSL_AC),(%rsp)
1005 popfq
1006 xorl %ebx,%ebx
1007 testb $SEL_RPL_MASK,TF_CS(%rsp)
1008 jnz mchk_fromuserspace
1009 /*
1010 * We've interrupted the kernel. Preserve GS.base in %r12,
1011 * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
1012 */
1013 movl $MSR_GSBASE,%ecx
1014 rdmsr
1015 movq %rax,%r12
1016 shlq $32,%rdx
1017 orq %rdx,%r12
1018 /* Retrieve and load the canonical value for GS.base. */
1019 movq TF_SIZE(%rsp),%rdx
1020 movl %edx,%eax
1021 shrq $32,%rdx
1022 wrmsr
1023 movq %cr3,%r13
1024 movq PCPU(KCR3),%rax
1025 cmpq $~0,%rax
1026 je 1f
1027 movq %rax,%cr3
1028 1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
1029 je mchk_calltrap
1030 movl $MSR_IA32_SPEC_CTRL,%ecx
1031 rdmsr
1032 movl %eax,%r14d
1033 call handle_ibrs_entry
1034 jmp mchk_calltrap
1035 mchk_fromuserspace:
1036 incl %ebx
1037 swapgs
1038 movq %cr3,%r13
1039 movq PCPU(KCR3),%rax
1040 cmpq $~0,%rax
1041 je 1f
1042 movq %rax,%cr3
1043 1: call handle_ibrs_entry
1044 /* Note: this label is also used by ddb and gdb: */
1045 mchk_calltrap:
1046 FAKE_MCOUNT(TF_RIP(%rsp))
1047 movq %rsp,%rdi
1048 call mca_intr
1049 MEXITCOUNT
1050 testl %ebx,%ebx /* %ebx == 0 => return to userland */
1051 jnz doreti_exit
1052 /*
1053 * Restore speculation control MSR, if preserved.
1054 */
1055 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
1056 je 1f
1057 movl %r14d,%eax
1058 xorl %edx,%edx
1059 movl $MSR_IA32_SPEC_CTRL,%ecx
1060 wrmsr
1061 /*
1062 * Put back the preserved MSR_GSBASE value.
1063 */
1064 1: movl $MSR_GSBASE,%ecx
1065 movq %r12,%rdx
1066 movl %edx,%eax
1067 shrq $32,%rdx
1068 wrmsr
1069 movq %r13,%cr3
1070 RESTORE_REGS
1071 addq $TF_RIP,%rsp
1072 jmp doreti_iret
1073
1074 ENTRY(fork_trampoline)
1075 movq %r12,%rdi /* function */
1076 movq %rbx,%rsi /* arg1 */
1077 movq %rsp,%rdx /* trapframe pointer */
1078 call fork_exit
1079 MEXITCOUNT
1080 jmp doreti /* Handle any ASTs */
1081
1082 /*
1083 * To efficiently implement classification of trap and interrupt handlers
1084 * for profiling, there must be only trap handlers between the labels btrap
1085 * and bintr, and only interrupt handlers between the labels bintr and
1086 * eintr. This is implemented (partly) by including files that contain
1087 * some of the handlers. Before including the files, set up a normal asm
1088 * environment so that the included files doen't need to know that they are
1089 * included.
1090 */
1091
1092 #ifdef COMPAT_FREEBSD32
1093 .data
1094 .p2align 4
1095 .text
1096 SUPERALIGN_TEXT
1097
1098 #include <amd64/ia32/ia32_exception.S>
1099 #endif
1100
1101 .data
1102 .p2align 4
1103 .text
1104 SUPERALIGN_TEXT
1105 MCOUNT_LABEL(bintr)
1106
1107 #include <amd64/amd64/apic_vector.S>
1108
1109 #ifdef DEV_ATPIC
1110 .data
1111 .p2align 4
1112 .text
1113 SUPERALIGN_TEXT
1114
1115 #include <amd64/amd64/atpic_vector.S>
1116 #endif
1117
1118 .text
1119 MCOUNT_LABEL(eintr)
1120
1121 /*
1122 * void doreti(struct trapframe)
1123 *
1124 * Handle return from interrupts, traps and syscalls.
1125 */
1126 .text
1127 SUPERALIGN_TEXT
1128 .type doreti,@function
1129 .globl doreti
1130 doreti:
1131 FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
1132 /*
1133 * Check if ASTs can be handled now.
1134 */
1135 testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */
1136 jz doreti_exit /* can't handle ASTs now if not */
1137
1138 doreti_ast:
1139 /*
1140 * Check for ASTs atomically with returning. Disabling CPU
1141 * interrupts provides sufficient locking even in the SMP case,
1142 * since we will be informed of any new ASTs by an IPI.
1143 */
1144 cli
1145 movq PCPU(CURTHREAD),%rax
1146 testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
1147 je doreti_exit
1148 sti
1149 movq %rsp,%rdi /* pass a pointer to the trapframe */
1150 call ast
1151 jmp doreti_ast
1152
1153 /*
1154 * doreti_exit: pop registers, iret.
1155 *
1156 * The segment register pop is a special case, since it may
1157 * fault if (for example) a sigreturn specifies bad segment
1158 * registers. The fault is handled in trap.c.
1159 */
1160 doreti_exit:
1161 MEXITCOUNT
1162 movq PCPU(CURPCB),%r8
1163
1164 /*
1165 * Do not reload segment registers for kernel.
1166 * Since we do not reload segments registers with sane
1167 * values on kernel entry, descriptors referenced by
1168 * segments registers might be not valid. This is fatal
1169 * for user mode, but is not a problem for the kernel.
1170 */
1171 testb $SEL_RPL_MASK,TF_CS(%rsp)
1172 jz ld_regs
1173 testl $PCB_FULL_IRET,PCB_FLAGS(%r8)
1174 jz ld_regs
1175 andl $~PCB_FULL_IRET,PCB_FLAGS(%r8)
1176 testl $TF_HASSEGS,TF_FLAGS(%rsp)
1177 je set_segs
1178
1179 do_segs:
1180 /* Restore %fs and fsbase */
1181 movw TF_FS(%rsp),%ax
1182 .globl ld_fs
1183 ld_fs:
1184 movw %ax,%fs
1185 cmpw $KUF32SEL,%ax
1186 jne 1f
1187 movl $MSR_FSBASE,%ecx
1188 movl PCB_FSBASE(%r8),%eax
1189 movl PCB_FSBASE+4(%r8),%edx
1190 .globl ld_fsbase
1191 ld_fsbase:
1192 wrmsr
1193 1:
1194 /* Restore %gs and gsbase */
1195 movw TF_GS(%rsp),%si
1196 pushfq
1197 cli
1198 movl $MSR_GSBASE,%ecx
1199 /* Save current kernel %gs base into %r12d:%r13d */
1200 rdmsr
1201 movl %eax,%r12d
1202 movl %edx,%r13d
1203 .globl ld_gs
1204 ld_gs:
1205 movw %si,%gs
1206 /* Save user %gs base into %r14d:%r15d */
1207 rdmsr
1208 movl %eax,%r14d
1209 movl %edx,%r15d
1210 /* Restore kernel %gs base */
1211 movl %r12d,%eax
1212 movl %r13d,%edx
1213 wrmsr
1214 popfq
1215 /*
1216 * Restore user %gs base, either from PCB if used for TLS, or
1217 * from the previously saved msr read.
1218 */
1219 movl $MSR_KGSBASE,%ecx
1220 cmpw $KUG32SEL,%si
1221 jne 1f
1222 movl PCB_GSBASE(%r8),%eax
1223 movl PCB_GSBASE+4(%r8),%edx
1224 jmp ld_gsbase
1225 1:
1226 movl %r14d,%eax
1227 movl %r15d,%edx
1228 .globl ld_gsbase
1229 ld_gsbase:
1230 wrmsr /* May trap if non-canonical, but only for TLS. */
1231 .globl ld_es
1232 ld_es:
1233 movw TF_ES(%rsp),%es
1234 .globl ld_ds
1235 ld_ds:
1236 movw TF_DS(%rsp),%ds
1237 ld_regs:
1238 RESTORE_REGS
1239 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
1240 jz 2f /* keep running with kernel GS.base */
1241 cli
1242 call handle_ibrs_exit_rs
1243 callq *mds_handler
1244 cmpq $~0,PCPU(UCR3)
1245 je 1f
1246 pushq %rdx
1247 movq PCPU(PTI_RSP0),%rdx
1248 subq $PTI_SIZE,%rdx
1249 movq %rax,PTI_RAX(%rdx)
1250 popq %rax
1251 movq %rax,PTI_RDX(%rdx)
1252 movq TF_RIP(%rsp),%rax
1253 movq %rax,PTI_RIP(%rdx)
1254 movq TF_CS(%rsp),%rax
1255 movq %rax,PTI_CS(%rdx)
1256 movq TF_RFLAGS(%rsp),%rax
1257 movq %rax,PTI_RFLAGS(%rdx)
1258 movq TF_RSP(%rsp),%rax
1259 movq %rax,PTI_RSP(%rdx)
1260 movq TF_SS(%rsp),%rax
1261 movq %rax,PTI_SS(%rdx)
1262 movq PCPU(UCR3),%rax
1263 swapgs
1264 movq %rdx,%rsp
1265 movq %rax,%cr3
1266 popq %rdx
1267 popq %rax
1268 addq $8,%rsp
1269 jmp doreti_iret
1270 1: swapgs
1271 2: addq $TF_RIP,%rsp
1272 .globl doreti_iret
1273 doreti_iret:
1274 iretq
1275
1276 set_segs:
1277 movw $KUDSEL,%ax
1278 movw %ax,TF_DS(%rsp)
1279 movw %ax,TF_ES(%rsp)
1280 movw $KUF32SEL,TF_FS(%rsp)
1281 movw $KUG32SEL,TF_GS(%rsp)
1282 jmp do_segs
1283
1284 /*
1285 * doreti_iret_fault. Alternative return code for
1286 * the case where we get a fault in the doreti_exit code
1287 * above. trap() (amd64/amd64/trap.c) catches this specific
1288 * case, sends the process a signal and continues in the
1289 * corresponding place in the code below.
1290 */
1291 ALIGN_TEXT
1292 .globl doreti_iret_fault
1293 doreti_iret_fault:
1294 subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */
1295 movq %rax,TF_RAX(%rsp)
1296 movq %rdx,TF_RDX(%rsp)
1297 movq %rcx,TF_RCX(%rsp)
1298 call handle_ibrs_entry
1299 testb $SEL_RPL_MASK,TF_CS(%rsp)
1300 jz 1f
1301 sti
1302 1:
1303 SAVE_SEGS
1304 movl $TF_HASSEGS,TF_FLAGS(%rsp)
1305 movq %rdi,TF_RDI(%rsp)
1306 movq %rsi,TF_RSI(%rsp)
1307 movq %r8,TF_R8(%rsp)
1308 movq %r9,TF_R9(%rsp)
1309 movq %rbx,TF_RBX(%rsp)
1310 movq %rbp,TF_RBP(%rsp)
1311 movq %r10,TF_R10(%rsp)
1312 movq %r11,TF_R11(%rsp)
1313 movq %r12,TF_R12(%rsp)
1314 movq %r13,TF_R13(%rsp)
1315 movq %r14,TF_R14(%rsp)
1316 movq %r15,TF_R15(%rsp)
1317 movl $T_PROTFLT,TF_TRAPNO(%rsp)
1318 movq $0,TF_ERR(%rsp) /* XXX should be the error code */
1319 movq $0,TF_ADDR(%rsp)
1320 FAKE_MCOUNT(TF_RIP(%rsp))
1321 jmp calltrap
1322
1323 ALIGN_TEXT
1324 .globl ds_load_fault
1325 ds_load_fault:
1326 movl $T_PROTFLT,TF_TRAPNO(%rsp)
1327 testb $SEL_RPL_MASK,TF_CS(%rsp)
1328 jz 1f
1329 sti
1330 1:
1331 movq %rsp,%rdi
1332 call trap
1333 movw $KUDSEL,TF_DS(%rsp)
1334 jmp doreti
1335
1336 ALIGN_TEXT
1337 .globl es_load_fault
1338 es_load_fault:
1339 movl $T_PROTFLT,TF_TRAPNO(%rsp)
1340 testl $PSL_I,TF_RFLAGS(%rsp)
1341 jz 1f
1342 sti
1343 1:
1344 movq %rsp,%rdi
1345 call trap
1346 movw $KUDSEL,TF_ES(%rsp)
1347 jmp doreti
1348
1349 ALIGN_TEXT
1350 .globl fs_load_fault
1351 fs_load_fault:
1352 testl $PSL_I,TF_RFLAGS(%rsp)
1353 jz 1f
1354 sti
1355 1:
1356 movl $T_PROTFLT,TF_TRAPNO(%rsp)
1357 movq %rsp,%rdi
1358 call trap
1359 movw $KUF32SEL,TF_FS(%rsp)
1360 jmp doreti
1361
1362 ALIGN_TEXT
1363 .globl gs_load_fault
1364 gs_load_fault:
1365 popfq
1366 movl $T_PROTFLT,TF_TRAPNO(%rsp)
1367 testl $PSL_I,TF_RFLAGS(%rsp)
1368 jz 1f
1369 sti
1370 1:
1371 movq %rsp,%rdi
1372 call trap
1373 movw $KUG32SEL,TF_GS(%rsp)
1374 jmp doreti
1375
1376 ALIGN_TEXT
1377 .globl fsbase_load_fault
1378 fsbase_load_fault:
1379 movl $T_PROTFLT,TF_TRAPNO(%rsp)
1380 testl $PSL_I,TF_RFLAGS(%rsp)
1381 jz 1f
1382 sti
1383 1:
1384 movq %rsp,%rdi
1385 call trap
1386 movq PCPU(CURTHREAD),%r8
1387 movq TD_PCB(%r8),%r8
1388 movq $0,PCB_FSBASE(%r8)
1389 jmp doreti
1390
1391 ALIGN_TEXT
1392 .globl gsbase_load_fault
1393 gsbase_load_fault:
1394 movl $T_PROTFLT,TF_TRAPNO(%rsp)
1395 testl $PSL_I,TF_RFLAGS(%rsp)
1396 jz 1f
1397 sti
1398 1:
1399 movq %rsp,%rdi
1400 call trap
1401 movq PCPU(CURTHREAD),%r8
1402 movq TD_PCB(%r8),%r8
1403 movq $0,PCB_GSBASE(%r8)
1404 jmp doreti
1405
1406 #ifdef HWPMC_HOOKS
1407 ENTRY(end_exceptions)
1408 #endif
Cache object: a9ff8071a46b138239fe53af256f9834
|