1 /* $OpenBSD: locore.S,v 1.132 2023/01/20 16:01:04 deraadt Exp $ */
2 /* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */
3
4 /*
5 * Copyright-o-rama!
6 */
7
8 /*
9 * Copyright (c) 2001 Wasabi Systems, Inc.
10 * All rights reserved.
11 *
12 * Written by Frank van der Linden for Wasabi Systems, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed for the NetBSD Project by
25 * Wasabi Systems, Inc.
26 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
27 * or promote products derived from this software without specific prior
28 * written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
34 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43
44 /*-
45 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
46 * All rights reserved.
47 *
48 * This code is derived from software contributed to The NetBSD Foundation
49 * by Charles M. Hannum.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions
53 * are met:
54 * 1. Redistributions of source code must retain the above copyright
55 * notice, this list of conditions and the following disclaimer.
56 * 2. Redistributions in binary form must reproduce the above copyright
57 * notice, this list of conditions and the following disclaimer in the
58 * documentation and/or other materials provided with the distribution.
59 *
60 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
61 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
62 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
63 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
64 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
65 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
66 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
67 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
68 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
70 * POSSIBILITY OF SUCH DAMAGE.
71 */
72
73 /*-
74 * Copyright (c) 1990 The Regents of the University of California.
75 * All rights reserved.
76 *
77 * This code is derived from software contributed to Berkeley by
78 * William Jolitz.
79 *
80 * Redistribution and use in source and binary forms, with or without
81 * modification, are permitted provided that the following conditions
82 * are met:
83 * 1. Redistributions of source code must retain the above copyright
84 * notice, this list of conditions and the following disclaimer.
85 * 2. Redistributions in binary form must reproduce the above copyright
86 * notice, this list of conditions and the following disclaimer in the
87 * documentation and/or other materials provided with the distribution.
88 * 3. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)locore.s 7.3 (Berkeley) 5/13/91
105 */
106
107 #include "assym.h"
108 #include "efi.h"
109 #include "lapic.h"
110 #include "ksyms.h"
111 #include "xen.h"
112 #include "hyperv.h"
113
114 #include <sys/syscall.h>
115
116 #include <machine/param.h>
117 #include <machine/codepatch.h>
118 #include <machine/psl.h>
119 #include <machine/segments.h>
120 #include <machine/specialreg.h>
121 #include <machine/trap.h> /* T_PROTFLT */
122 #include <machine/frameasm.h>
123
124 #if NLAPIC > 0
125 #include <machine/i82489reg.h>
126 #endif
127
128 /*
129 * override user-land alignment before including asm.h
130 */
131 #define ALIGN_DATA .align 8,0xcc
132
133 #include <machine/asm.h>
134
135 #define SET_CURPROC(proc,cpu) \
136 movq CPUVAR(SELF),cpu ; \
137 movq proc,CPUVAR(CURPROC) ; \
138 movq cpu,P_CPU(proc)
139
140 #define GET_CURPCB(reg) movq CPUVAR(CURPCB),reg
141 #define SET_CURPCB(reg) movq reg,CPUVAR(CURPCB)
142
143
144 /*
145 * Initialization
146 */
147 .data
148
149 #if NLAPIC > 0
150 .align NBPG, 0xcc
151 .globl local_apic, lapic_id, lapic_tpr
152 local_apic:
153 .space LAPIC_ID
154 lapic_id:
155 .long 0x00000000
156 .space LAPIC_TPRI-(LAPIC_ID+4)
157 lapic_tpr:
158 .space LAPIC_PPRI-LAPIC_TPRI
159 lapic_ppr:
160 .space LAPIC_ISR-LAPIC_PPRI
161 lapic_isr:
162 .space NBPG-LAPIC_ISR
163 #endif
164
165 /*****************************************************************************/
166
167 /*
168 * Signal trampoline; copied to a page mapped into userspace.
169 * gdb's backtrace logic matches against the instructions in this.
170 */
171 .section .rodata
172 .globl sigcode
173 sigcode:
174 call 1f
175 movq %rsp,%rdi
176 pushq %rdi /* fake return address */
177 movq $SYS_sigreturn,%rax
178 syscall
179 .globl sigcoderet
180 sigcoderet:
181 movq $SYS_exit,%rax
182 syscall
183 _ALIGN_TRAPS
184 1: JMP_RETPOLINE(rax)
185 .globl esigcode
186 esigcode:
187
188 .globl sigfill
189 sigfill:
190 int3
191 esigfill:
192 .globl sigfillsiz
193 sigfillsiz:
194 .long esigfill - sigfill
195
196 .text
197 /*
198 * void lgdt(struct region_descriptor *rdp);
199 * Change the global descriptor table.
200 */
201 NENTRY(lgdt)
202 RETGUARD_SETUP(lgdt, r11)
203 /* Reload the descriptor table. */
204 movq %rdi,%rax
205 lgdt (%rax)
206 /* Flush the prefetch q. */
207 jmp 1f
208 nop
209 1: /* Reload "stale" selectors. */
210 movl $GSEL(GDATA_SEL, SEL_KPL),%eax
211 movl %eax,%ds
212 movl %eax,%es
213 movl %eax,%ss
214 /* Reload code selector by doing intersegment return. */
215 popq %rax
216 pushq $GSEL(GCODE_SEL, SEL_KPL)
217 pushq %rax
218 RETGUARD_CHECK(lgdt, r11)
219 lretq
220 END(lgdt)
221
222 #if defined(DDB) || NEFI > 0
223 ENTRY(setjmp)
224 RETGUARD_SETUP(setjmp, r11)
225 /*
226 * Only save registers that must be preserved across function
227 * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
228 * and %rip.
229 */
230 movq %rdi,%rax
231 movq %rbx,(%rax)
232 movq %rsp,8(%rax)
233 movq %rbp,16(%rax)
234 movq %r12,24(%rax)
235 movq %r13,32(%rax)
236 movq %r14,40(%rax)
237 movq %r15,48(%rax)
238 movq (%rsp),%rdx
239 movq %rdx,56(%rax)
240 xorl %eax,%eax
241 RETGUARD_CHECK(setjmp, r11)
242 ret
243 lfence
244 END(setjmp)
245
246 ENTRY(longjmp)
247 movq %rdi,%rax
248 movq 8(%rax),%rsp
249 movq 56(%rax),%rdx
250 movq %rdx,(%rsp)
251 RETGUARD_SETUP(longjmp, r11)
252 movq (%rax),%rbx
253 movq 16(%rax),%rbp
254 movq 24(%rax),%r12
255 movq 32(%rax),%r13
256 movq 40(%rax),%r14
257 movq 48(%rax),%r15
258 xorl %eax,%eax
259 incl %eax
260 RETGUARD_CHECK(longjmp, r11)
261 ret
262 lfence
263 END(longjmp)
264 #endif /* DDB || NEFI > 0 */
265
266 /*****************************************************************************/
267
268 /*
269 * int cpu_switchto(struct proc *old, struct proc *new)
270 * Switch from "old" proc to "new".
271 */
272 ENTRY(cpu_switchto)
273 pushq %rbx
274 pushq %rbp
275 pushq %r12
276 pushq %r13
277 pushq %r14
278 pushq %r15
279
280 movq %rdi, %r13
281 movq %rsi, %r12
282
283 /* Record new proc. */
284 movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC
285 SET_CURPROC(%r12,%rcx)
286
287 movl CPUVAR(CPUID),%r9d
288
289 /* for the FPU/"extended CPU state" handling below */
290 movq xsave_mask(%rip),%rdx
291 movl %edx,%eax
292 shrq $32,%rdx
293
294 /* If old proc exited, don't bother. */
295 xorl %ecx,%ecx
296 testq %r13,%r13
297 jz switch_exited
298
299 /*
300 * Save old context.
301 *
302 * Registers:
303 * %rax - scratch
304 * %r13 - old proc, then old pcb
305 * %rcx - old pmap if not P_SYSTEM
306 * %r12 - new proc
307 * %r9d - cpuid
308 */
309
310 /* remember the pmap if not P_SYSTEM */
311 testl $P_SYSTEM,P_FLAG(%r13)
312 movq P_ADDR(%r13),%r13
313 jnz 0f
314 movq PCB_PMAP(%r13),%rcx
315 0:
316
317 /* Save stack pointers. */
318 movq %rsp,PCB_RSP(%r13)
319 movq %rbp,PCB_RBP(%r13)
320
321 /*
322 * If the old proc ran in userspace then save the
323 * floating-point/"extended state" registers
324 */
325 testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS)
326 jz .Lxstate_reset
327
328 movq %r13, %rdi
329 #if PCB_SAVEFPU != 0
330 addq $PCB_SAVEFPU,%rdi
331 #endif
332 CODEPATCH_START
333 fxsave64 (%rdi)
334 CODEPATCH_END(CPTAG_XSAVE)
335
336 switch_exited:
337 /* now clear the xstate */
338 movq proc0paddr(%rip),%rdi
339 #if PCB_SAVEFPU != 0
340 addq $PCB_SAVEFPU,%rdi
341 #endif
342 CODEPATCH_START
343 fxrstor64 (%rdi)
344 CODEPATCH_END(CPTAG_XRSTOR)
345 andl $~CPUPF_USERXSTATE,CPUVAR(PFLAGS)
346
347 .Lxstate_reset:
348 /*
349 * If the segment registers haven't been reset since the old proc
350 * ran in userspace then reset them now
351 */
352 testl $CPUPF_USERSEGS,CPUVAR(PFLAGS)
353 jz restore_saved
354 andl $~CPUPF_USERSEGS,CPUVAR(PFLAGS)
355
356 /* set %ds, %es, %fs, and %gs to expected value to prevent info leak */
357 movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
358 movw %ax,%ds
359 movw %ax,%es
360 movw %ax,%fs
361 cli /* block interrupts when on user GS.base */
362 swapgs /* switch from kernel to user GS.base */
363 movw %ax,%gs /* set %gs to UDATA and GS.base to 0 */
364 swapgs /* back to kernel GS.base */
365
366 restore_saved:
367 /*
368 * Restore saved context.
369 *
370 * Registers:
371 * %rax, %rdx - scratch
372 * %rcx - old pmap if not P_SYSTEM
373 * %r12 - new process
374 * %r13 - new pcb
375 * %rbx - new pmap if not P_SYSTEM
376 */
377
378 movq P_ADDR(%r12),%r13
379
380 /* remember the pmap if not P_SYSTEM */
381 xorl %ebx,%ebx
382 testl $P_SYSTEM,P_FLAG(%r12)
383 jnz 1f
384 movq PCB_PMAP(%r13),%rbx
385 1:
386
387 /* No interrupts while loading new state. */
388 cli
389
390 /* Restore stack pointers. */
391 movq PCB_RSP(%r13),%rsp
392 movq PCB_RBP(%r13),%rbp
393
394 /* Stack pivot done, setup RETGUARD */
395 RETGUARD_SETUP_OFF(cpu_switchto, r11, 6*8)
396
397 /* don't switch cr3 to the same thing it already was */
398 movq PCB_CR3(%r13),%rax
399 movq %cr3,%rdi
400 xorq %rax,%rdi
401 btrq $63,%rdi /* ignore CR3_REUSE_PCID */
402 testq %rdi,%rdi
403 jz .Lsame_cr3
404
405 #ifdef DIAGNOSTIC
406 /* verify ci_proc_pmap had been updated properly */
407 cmpq %rcx,CPUVAR(PROC_PMAP)
408 jnz .Lbogus_proc_pmap
409 #endif
410 /* record which pmap this CPU should get IPIs for */
411 movq %rbx,CPUVAR(PROC_PMAP)
412
413 .Lset_cr3:
414 movq %rax,%cr3 /* %rax used below too */
415
416 .Lsame_cr3:
417 /*
418 * If we switched from a userland thread with a shallow call stack
419 * (e.g interrupt->ast->mi_ast->prempt->mi_switch->cpu_switchto)
420 * then the RSB may have attacker controlled entries when we switch
421 * to a deeper call stack in the new thread. Refill the RSB with
422 * entries safe to speculate into/through.
423 */
424 RET_STACK_REFILL_WITH_RCX
425
426 /* Don't bother with the rest if switching to a system process. */
427 testq %rbx,%rbx
428 jz switch_restored
429
430 /* record the bits needed for future U-->K transition */
431 movq PCB_KSTACK(%r13),%rdx
432 subq $FRAMESIZE,%rdx
433 movq %rdx,CPUVAR(KERN_RSP)
434
435 CODEPATCH_START
436 /*
437 * Meltdown: iff we're doing separate U+K and U-K page tables,
438 * then record them in cpu_info for easy access in syscall and
439 * interrupt trampolines.
440 */
441 movq PM_PDIRPA_INTEL(%rbx),%rdx
442 orq cr3_reuse_pcid,%rax
443 orq cr3_pcid_proc_intel,%rdx
444 movq %rax,CPUVAR(KERN_CR3)
445 movq %rdx,CPUVAR(USER_CR3)
446 CODEPATCH_END(CPTAG_MELTDOWN_NOP)
447
448 switch_restored:
449 SET_CURPCB(%r13)
450
451 /* Interrupts are okay again. */
452 sti
453 popq %r15
454 popq %r14
455 popq %r13
456 popq %r12
457 popq %rbp
458 popq %rbx
459 RETGUARD_CHECK(cpu_switchto, r11)
460 ret
461 lfence
462
463 #ifdef DIAGNOSTIC
464 .Lbogus_proc_pmap:
465 leaq bogus_proc_pmap,%rdi
466 call panic
467 int3 /* NOTREACHED */
468 .pushsection .rodata
469 bogus_proc_pmap:
470 .asciz "curcpu->ci_proc_pmap didn't point to previous pmap"
471 .popsection
472 #endif /* DIAGNOSTIC */
473 END(cpu_switchto)
474
475 ENTRY(cpu_idle_enter)
476 ret
477 lfence
478 END(cpu_idle_enter)
479
480 ENTRY(cpu_idle_leave)
481 ret
482 lfence
483 END(cpu_idle_leave)
484
485 /* placed here for correct static branch prediction in cpu_idle_* */
486 NENTRY(retpoline_rax)
487 JMP_RETPOLINE(rax)
488
489 ENTRY(cpu_idle_cycle)
490 RETGUARD_SETUP(cpu_idle_cycle, r11)
491 movq cpu_idle_cycle_fcn,%rax
492 cmpq $0,%rax
493 jne retpoline_rax
494 sti
495 hlt
496 RETGUARD_CHECK(cpu_idle_cycle, r11)
497 ret
498 lfence
499 END(cpu_idle_cycle)
500
501 /*
502 * savectx(struct pcb *pcb);
503 * Update pcb, saving current processor state.
504 */
505 ENTRY(savectx)
506 RETGUARD_SETUP(savectx, r11)
507 /* Save stack pointers. */
508 movq %rsp,PCB_RSP(%rdi)
509 movq %rbp,PCB_RBP(%rdi)
510 RETGUARD_CHECK(savectx, r11)
511 ret
512 lfence
513 END(savectx)
514
515 IDTVEC(syscall32)
516 sysret /* go away please */
517 END(Xsyscall32)
518
519 /*
520 * syscall insn entry.
521 * Enter here with interrupts blocked; %rcx contains the caller's
522 * %rip and the original rflags has been copied to %r11. %cs and
523 * %ss have been updated to the kernel segments, but %rsp is still
524 * the user-space value.
525 * First order of business is to swap to the kernel GS.base so that
526 * we can access our struct cpu_info. After possibly mucking with
527 * pagetables, we switch to our kernel stack. Once that's in place
528 * we can save the rest of the syscall frame and unblock interrupts.
529 */
530 KUTEXT_PAGE_START
531 .align NBPG, 0xcc
532 XUsyscall_meltdown:
533 /*
534 * This is the real Xsyscall_meltdown page, which is mapped into
535 * the U-K page tables at the same location as Xsyscall_meltdown
536 * below. For this, the Meltdown case, we use the scratch space
537 * in cpu_info so we can switch to the kernel page tables
538 * (thank you, Intel), at which point we'll continue at the
539 * "SYSCALL_ENTRY" after Xsyscall below.
540 * In case the CPU speculates past the mov to cr3, we put a
541 * retpoline-style pause-lfence-jmp-to-pause loop.
542 */
543 swapgs
544 movq %rax,CPUVAR(SCRATCH)
545 movq CPUVAR(KERN_CR3),%rax
546 movq %rax,%cr3
547 0: pause
548 lfence
549 jmp 0b
550 KUTEXT_PAGE_END
551
552 KTEXT_PAGE_START
553 .align NBPG, 0xcc
554 IDTVEC_NOALIGN(syscall_meltdown)
555 /* pad to match real Xsyscall_meltdown positioning above */
556 movq CPUVAR(KERN_CR3),%rax
557 movq %rax,%cr3
558 IDTVEC_NOALIGN(syscall)
559 swapgs
560 movq %rax,CPUVAR(SCRATCH)
561 SYSCALL_ENTRY /* create trapframe */
562 sti
563
564 movq CPUVAR(CURPROC),%r14
565 movq %rsp,P_MD_REGS(%r14) # save pointer to frame
566 andl $~MDP_IRET,P_MD_FLAGS(%r14)
567 movq %rsp,%rdi
568 call syscall
569
570 .Lsyscall_check_asts:
571 /* Check for ASTs on exit to user mode. */
572 cli
573 CHECK_ASTPENDING(%r11)
574 je 2f
575 CLEAR_ASTPENDING(%r11)
576 sti
577 movq %rsp,%rdi
578 call ast
579 jmp .Lsyscall_check_asts
580
581 2:
582 #ifdef DIAGNOSTIC
583 cmpl $IPL_NONE,CPUVAR(ILEVEL)
584 jne .Lsyscall_spl_not_lowered
585 #endif /* DIAGNOSTIC */
586
587 /* Could registers have been changed that require an iretq? */
588 testl $MDP_IRET, P_MD_FLAGS(%r14)
589 jne intr_user_exit_post_ast
590
591 /* Restore FPU/"extended CPU state" if it's not already in the CPU */
592 testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS)
593 jz .Lsyscall_restore_xstate
594
595 /* Restore FS.base if it's not already in the CPU */
596 testl $CPUPF_USERSEGS,CPUVAR(PFLAGS)
597 jz .Lsyscall_restore_fsbase
598
599 .Lsyscall_restore_registers:
600 call pku_xonly
601 RET_STACK_REFILL_WITH_RCX
602
603 movq TF_R8(%rsp),%r8
604 movq TF_R9(%rsp),%r9
605 movq TF_R10(%rsp),%r10
606 movq TF_R12(%rsp),%r12
607 movq TF_R13(%rsp),%r13
608 movq TF_R14(%rsp),%r14
609 movq TF_R15(%rsp),%r15
610
611 CODEPATCH_START
612 movw %ds,TF_R8(%rsp)
613 verw TF_R8(%rsp)
614 CODEPATCH_END(CPTAG_MDS)
615
616 movq TF_RDI(%rsp),%rdi
617 movq TF_RSI(%rsp),%rsi
618 movq TF_RBP(%rsp),%rbp
619 movq TF_RBX(%rsp),%rbx
620
621 /*
622 * We need to finish reading from the trapframe, then switch
623 * to the user page tables, swapgs, and return. We need
624 * to get the final value for the register that was used
625 * for the mov to %cr3 from somewhere accessible on the
626 * user page tables, so save it in CPUVAR(SCRATCH) across
627 * the switch.
628 */
629 movq TF_RDX(%rsp),%rdx
630 movq TF_RAX(%rsp),%rax
631 movq TF_RIP(%rsp),%rcx
632 movq TF_RFLAGS(%rsp),%r11
633 movq TF_RSP(%rsp),%rsp
634 CODEPATCH_START
635 movq %rax,CPUVAR(SCRATCH)
636 movq CPUVAR(USER_CR3),%rax
637 PCID_SET_REUSE_NOP
638 movq %rax,%cr3
639 Xsyscall_trampback:
640 0: pause
641 lfence
642 jmp 0b
643 CODEPATCH_END(CPTAG_MELTDOWN_NOP)
644 swapgs
645 sysretq
646 END(Xsyscall)
647 END(Xsyscall_meltdown)
648 KTEXT_PAGE_END
649
650 KUTEXT_PAGE_START
651 .space (Xsyscall_trampback - Xsyscall_meltdown) - \
652 (. - XUsyscall_meltdown), 0xcc
653 movq %rax,%cr3
654 movq CPUVAR(SCRATCH),%rax
655 swapgs
656 sysretq
657 KUTEXT_PAGE_END
658
659 .text
660 _ALIGN_TRAPS
661 /* in this case, need FS.base but not xstate, rarely happens */
662 .Lsyscall_restore_fsbase: /* CPU doesn't have curproc's FS.base */
663 orl $CPUPF_USERSEGS,CPUVAR(PFLAGS)
664 movq CPUVAR(CURPCB),%rdi
665 jmp .Lsyscall_restore_fsbase_real
666
667 _ALIGN_TRAPS
668 .Lsyscall_restore_xstate: /* CPU doesn't have curproc's xstate */
669 orl $(CPUPF_USERXSTATE|CPUPF_USERSEGS),CPUVAR(PFLAGS)
670 movq CPUVAR(CURPCB),%rdi
671 movq xsave_mask(%rip),%rdx
672 movl %edx,%eax
673 shrq $32,%rdx
674 #if PCB_SAVEFPU != 0
675 addq $PCB_SAVEFPU,%rdi
676 #endif
677 /* untouched state so can't fault */
678 CODEPATCH_START
679 fxrstor64 (%rdi)
680 CODEPATCH_END(CPTAG_XRSTOR)
681 #if PCB_SAVEFPU != 0
682 subq $PCB_SAVEFPU,%rdi
683 #endif
684 .Lsyscall_restore_fsbase_real:
685 movq PCB_FSBASE(%rdi),%rdx
686 movl %edx,%eax
687 shrq $32,%rdx
688 movl $MSR_FSBASE,%ecx
689 wrmsr
690 jmp .Lsyscall_restore_registers
691
692 #ifdef DIAGNOSTIC
693 .Lsyscall_spl_not_lowered:
694 leaq spl_lowered(%rip), %rdi
695 movl TF_ERR(%rsp),%esi /* syscall # stashed above */
696 movl TF_RDI(%rsp),%edx
697 movl %ebx,%ecx
698 movl CPUVAR(ILEVEL),%r8d
699 xorq %rax,%rax
700 call printf
701 #ifdef DDB
702 int $3
703 #endif /* DDB */
704 movl $IPL_NONE,CPUVAR(ILEVEL)
705 jmp .Lsyscall_check_asts
706
707 .section .rodata
708 spl_lowered:
709 .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
710 .text
711 #endif
712
713 NENTRY(proc_trampoline)
714 #ifdef MULTIPROCESSOR
715 call proc_trampoline_mp
716 #endif
717 movl $IPL_NONE,CPUVAR(ILEVEL)
718 movq %r13,%rdi
719 movq %r12,%rax
720 call retpoline_rax
721 movq CPUVAR(CURPROC),%r14
722 jmp .Lsyscall_check_asts
723 END(proc_trampoline)
724
725
726 /*
727 * Returning to userspace via iretq. We do things in this order:
728 * - check for ASTs
729 * - restore FPU/"extended CPU state" if it's not already in the CPU
730 * - DIAGNOSTIC: no more C calls after this, so check the SPL
731 * - restore FS.base if it's not already in the CPU
732 * - restore most registers
733 * - update the iret frame from the trapframe
734 * - finish reading from the trapframe
735 * - switch to the trampoline stack \
736 * - jump to the .kutext segment |-- Meltdown workaround
737 * - switch to the user page tables /
738 * - swapgs
739 * - iretq
740 */
741 KTEXT_PAGE_START
742 _ALIGN_TRAPS
743 GENTRY(intr_user_exit)
744 #ifdef DIAGNOSTIC
745 pushfq
746 popq %rdx
747 testq $PSL_I,%rdx
748 jnz .Lintr_user_exit_not_blocked
749 #endif /* DIAGNOSTIC */
750
751 /* Check for ASTs */
752 CHECK_ASTPENDING(%r11)
753 je intr_user_exit_post_ast
754 CLEAR_ASTPENDING(%r11)
755 sti
756 movq %rsp,%rdi
757 call ast
758 cli
759 jmp intr_user_exit
760
761 intr_user_exit_post_ast:
762 /* Restore FPU/"extended CPU state" if it's not already in the CPU */
763 testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS)
764 jz .Lintr_restore_xstate
765
766 #ifdef DIAGNOSTIC
767 /* no more C calls after this, so check the SPL */
768 cmpl $0,CPUVAR(ILEVEL)
769 jne .Luser_spl_not_lowered
770 #endif /* DIAGNOSTIC */
771
772 /* Restore FS.base if it's not already in the CPU */
773 testl $CPUPF_USERSEGS,CPUVAR(PFLAGS)
774 jz .Lintr_restore_fsbase
775
776 .Lintr_restore_registers:
777 call pku_xonly
778 RET_STACK_REFILL_WITH_RCX
779
780 movq TF_R8(%rsp),%r8
781 movq TF_R9(%rsp),%r9
782 movq TF_R10(%rsp),%r10
783 movq TF_R12(%rsp),%r12
784 movq TF_R13(%rsp),%r13
785 movq TF_R14(%rsp),%r14
786 movq TF_R15(%rsp),%r15
787
788 CODEPATCH_START
789 movw %ds,TF_R8(%rsp)
790 verw TF_R8(%rsp)
791 CODEPATCH_END(CPTAG_MDS)
792
793 movq TF_RDI(%rsp),%rdi
794 movq TF_RSI(%rsp),%rsi
795 movq TF_RBP(%rsp),%rbp
796 movq TF_RBX(%rsp),%rbx
797
798 /*
799 * To get the final value for the register that was used
800 * for the mov to %cr3, we need access to somewhere accessible
801 * on the user page tables, so we save it in CPUVAR(SCRATCH)
802 * across the switch.
803 */
804 /* update iret frame */
805 movq CPUVAR(INTR_RSP),%rdx
806 movq $(GSEL(GUCODE_SEL,SEL_UPL)),IRETQ_CS(%rdx)
807 movq TF_RIP(%rsp),%rax
808 movq %rax,IRETQ_RIP(%rdx)
809 movq TF_RFLAGS(%rsp),%rax
810 movq %rax,IRETQ_RFLAGS(%rdx)
811 movq TF_RSP(%rsp),%rax
812 movq %rax,IRETQ_RSP(%rdx)
813 movq $(GSEL(GUDATA_SEL,SEL_UPL)),IRETQ_SS(%rdx)
814 /* finish with the trap frame */
815 movq TF_RAX(%rsp),%rax
816 movq TF_RCX(%rsp),%rcx
817 movq TF_R11(%rsp),%r11
818 /* switch to the trampoline stack */
819 xchgq %rdx,%rsp
820 movq TF_RDX(%rdx),%rdx
821 CODEPATCH_START
822 movq %rax,CPUVAR(SCRATCH)
823 movq CPUVAR(USER_CR3),%rax
824 PCID_SET_REUSE_NOP
825 movq %rax,%cr3
826 Xiretq_trampback:
827 KTEXT_PAGE_END
828 /* the movq %cr3 switches to this "KUTEXT" page */
829 KUTEXT_PAGE_START
830 .space (Xiretq_trampback - Xsyscall_meltdown) - \
831 (. - XUsyscall_meltdown), 0xcc
832 movq CPUVAR(SCRATCH),%rax
833 .Liretq_swapgs:
834 swapgs
835 doreti_iret_meltdown:
836 iretq
837 KUTEXT_PAGE_END
838 /*
839 * Back to the "KTEXT" page to fill in the speculation trap and the
840 * swapgs+iretq used for non-Meltdown kernels. This switching back
841 * and forth between segments is so that we can do the .space
842 * calculation below to guarantee the iretq's above and below line
843 * up, so the 'doreti_iret' label lines up with the iretq whether
844 * the CPU is affected by Meltdown or not.
845 */
846 KTEXT_PAGE_START
847 0: pause
848 lfence
849 jmp 0b
850 .space (.Liretq_swapgs - XUsyscall_meltdown) - \
851 (. - Xsyscall_meltdown), 0xcc
852 CODEPATCH_END(CPTAG_MELTDOWN_NOP)
853 swapgs
854
855 .globl doreti_iret
856 doreti_iret:
857 iretq
858 KTEXT_PAGE_END
859
860 .text
861 _ALIGN_TRAPS
862 .Lintr_restore_xstate: /* CPU doesn't have curproc's xstate */
863 orl $CPUPF_USERXSTATE,CPUVAR(PFLAGS)
864 movq CPUVAR(CURPCB),%rdi
865 #if PCB_SAVEFPU != 0
866 addq $PCB_SAVEFPU,%rdi
867 #endif
868 movq xsave_mask(%rip),%rsi
869 call xrstor_user
870 testl %eax,%eax
871 jnz .Lintr_xrstor_faulted
872 .Lintr_restore_fsbase: /* CPU doesn't have curproc's FS.base */
873 orl $CPUPF_USERSEGS,CPUVAR(PFLAGS)
874 movq CPUVAR(CURPCB),%rdx
875 movq PCB_FSBASE(%rdx),%rdx
876 movl %edx,%eax
877 shrq $32,%rdx
878 movl $MSR_FSBASE,%ecx
879 wrmsr
880 jmp .Lintr_restore_registers
881
882 .Lintr_xrstor_faulted:
883 /*
884 * xrstor faulted; we need to reset the FPU state and call trap()
885 * to post a signal, which requires interrupts be enabled.
886 */
887 sti
888 movq proc0paddr(%rip),%rdi
889 #if PCB_SAVEFPU != 0
890 addq $PCB_SAVEFPU,%rdi
891 #endif
892 CODEPATCH_START
893 fxrstor64 (%rdi)
894 CODEPATCH_END(CPTAG_XRSTOR)
895 movq $T_PROTFLT,TF_TRAPNO(%rsp)
896 jmp recall_trap
897
898 #ifdef DIAGNOSTIC
899 .Lintr_user_exit_not_blocked:
900 movl warn_once(%rip),%edi
901 testl %edi,%edi
902 jnz 1f
903 incl %edi
904 movl %edi,warn_once(%rip)
905 leaq .Lnot_blocked(%rip),%rdi
906 call printf
907 #ifdef DDB
908 int $3
909 #endif /* DDB */
910 1: cli
911 jmp intr_user_exit
912
913 .Luser_spl_not_lowered:
914 sti
915 leaq intr_spl_lowered(%rip),%rdi
916 movl CPUVAR(ILEVEL),%esi
917 xorl %edx,%edx /* always SPL zero for userspace */
918 xorl %eax,%eax
919 call printf
920 #ifdef DDB
921 int $3
922 #endif /* DDB */
923 movl $0,CPUVAR(ILEVEL)
924 cli
925 jmp intr_user_exit
926
927 .section .rodata
928 intr_spl_lowered:
929 .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n"
930 .text
931 #endif /* DIAGNOSTIC */
932 END(Xintr_user_exit)
933
934
935 /*
936 * Return to supervisor mode from trap or interrupt
937 */
938 NENTRY(intr_fast_exit)
939 #ifdef DIAGNOSTIC
940 pushfq
941 popq %rdx
942 testq $PSL_I,%rdx
943 jnz .Lintr_exit_not_blocked
944 #endif /* DIAGNOSTIC */
945 call pku_xonly /* XXX guenther disapproves, but foo3 locks */
946 movq TF_RDI(%rsp),%rdi
947 movq TF_RSI(%rsp),%rsi
948 movq TF_R8(%rsp),%r8
949 movq TF_R9(%rsp),%r9
950 movq TF_R10(%rsp),%r10
951 movq TF_R12(%rsp),%r12
952 movq TF_R13(%rsp),%r13
953 movq TF_R14(%rsp),%r14
954 movq TF_R15(%rsp),%r15
955 movq TF_RBP(%rsp),%rbp
956 movq TF_RBX(%rsp),%rbx
957 movq TF_RDX(%rsp),%rdx
958 movq TF_RCX(%rsp),%rcx
959 movq TF_R11(%rsp),%r11
960 movq TF_RAX(%rsp),%rax
961 addq $TF_RIP,%rsp
962 iretq
963
964 #ifdef DIAGNOSTIC
965 .Lintr_exit_not_blocked:
966 movl warn_once(%rip),%edi
967 testl %edi,%edi
968 jnz 1f
969 incl %edi
970 movl %edi,warn_once(%rip)
971 leaq .Lnot_blocked(%rip),%rdi
972 call printf
973 #ifdef DDB
974 int $3
975 #endif /* DDB */
976 1: cli
977 jmp intr_fast_exit
978
979 .data
980 .global warn_once
981 warn_once:
982 .long 0
983 .section .rodata
984 .Lnot_blocked:
985 .asciz "WARNING: INTERRUPTS NOT BLOCKED ON INTERRUPT RETURN: 0x%x 0x%x\n"
986 .text
987 #endif
988 END(intr_fast_exit)
989
990 /*
991 * FPU/"extended CPU state" handling
992 * int xrstor_user(sfp, mask)
993 * load given state, returns 0/1 if okay/it trapped
994 * void fpusave(sfp)
995 * save current state, but retain it in the FPU
996 * void fpusavereset(sfp)
997 * save current state and reset FPU to initial/kernel state
998 * int xsetbv_user(reg, mask)
999 * load specified %xcr# register, returns 0/1 if okay/it trapped
1000 */
1001
1002 ENTRY(xrstor_user)
1003 RETGUARD_SETUP(xrstor_user, r11)
1004 movq %rsi, %rdx
1005 movl %esi, %eax
1006 shrq $32, %rdx
1007 .globl xrstor_fault
1008 xrstor_fault:
1009 CODEPATCH_START
1010 fxrstor64 (%rdi)
1011 CODEPATCH_END(CPTAG_XRSTOR)
1012 xorl %eax, %eax
1013 RETGUARD_CHECK(xrstor_user, r11)
1014 ret
1015 lfence
1016 NENTRY(xrstor_resume)
1017 movl $1, %eax
1018 RETGUARD_CHECK(xrstor_user, r11)
1019 ret
1020 lfence
1021 END(xrstor_user)
1022
1023 ENTRY(fpusave)
1024 RETGUARD_SETUP(fpusave, r11)
1025 movq xsave_mask(%rip),%rdx
1026 movl %edx,%eax
1027 shrq $32,%rdx
1028 CODEPATCH_START
1029 fxsave64 (%rdi)
1030 CODEPATCH_END(CPTAG_XSAVE)
1031 RETGUARD_CHECK(fpusave, r11)
1032 ret
1033 lfence
1034 END(fpusave)
1035
1036 ENTRY(fpusavereset)
1037 RETGUARD_SETUP(fpusavereset, r11)
1038 movq xsave_mask(%rip),%rdx
1039 movl %edx,%eax
1040 shrq $32,%rdx
1041 CODEPATCH_START
1042 fxsave64 (%rdi)
1043 CODEPATCH_END(CPTAG_XSAVE)
1044 movq proc0paddr(%rip),%rdi
1045 #if PCB_SAVEFPU != 0
1046 addq $PCB_SAVEFPU,%rdi
1047 #endif
1048 CODEPATCH_START
1049 fxrstor64 (%rdi)
1050 CODEPATCH_END(CPTAG_XRSTOR)
1051 RETGUARD_CHECK(fpusavereset, r11)
1052 ret
1053 lfence
1054 END(fpusavereset)
1055
1056 ENTRY(xsetbv_user)
1057 RETGUARD_SETUP(xsetbv_user, r11)
1058 movl %edi, %ecx
1059 movq %rsi, %rdx
1060 movl %esi, %eax
1061 shrq $32, %rdx
1062 .globl xsetbv_fault
1063 xsetbv_fault:
1064 xsetbv
1065 xorl %eax, %eax
1066 RETGUARD_CHECK(xsetbv_user, r11)
1067 ret
1068 lfence
1069 NENTRY(xsetbv_resume)
1070 movl $1, %eax
1071 RETGUARD_CHECK(xsetbv_user, r11)
1072 ret
1073 lfence
1074 END(xsetbv_user)
1075
1076 .section .rodata
1077 .globl _xrstor
1078 _xrstor:
1079 xrstor64 (%rdi)
1080
1081 .globl _xsave
1082 _xsave:
1083 xsave64 (%rdi)
1084
1085 .globl _xsaveopt
1086 _xsaveopt:
1087 xsaveopt64 (%rdi)
1088
1089 .globl _pcid_set_reuse
1090 _pcid_set_reuse:
1091 orl $(CR3_REUSE_PCID >> 32),CPUVAR(USER_CR3 + 4)
1092
1093 ENTRY(pagezero)
1094 RETGUARD_SETUP(pagezero, r11)
1095 movq $-PAGE_SIZE,%rdx
1096 subq %rdx,%rdi
1097 xorq %rax,%rax
1098 1:
1099 movnti %rax,(%rdi,%rdx)
1100 movnti %rax,8(%rdi,%rdx)
1101 movnti %rax,16(%rdi,%rdx)
1102 movnti %rax,24(%rdi,%rdx)
1103 addq $32,%rdx
1104 jne 1b
1105 sfence
1106 RETGUARD_CHECK(pagezero, r11)
1107 ret
1108 lfence
1109 END(pagezero)
1110
1111 /* void pku_xonly(void) */
1112 ENTRY(pku_xonly)
1113 movq pg_xo,%rax /* have PKU support? */
1114 cmpq $0,%rax
1115 je 1f
1116 movl $0,%ecx /* force PKRU for xonly restriction */
1117 movl $0,%edx
1118 movl $PGK_VALUE,%eax /* key0 normal, key1 is exec without read */
1119 wrpkru
1120 1: ret
1121 lfence
1122 END(pku_xonly)
1123
1124 /* int rdmsr_safe(u_int msr, uint64_t *data) */
1125 ENTRY(rdmsr_safe)
1126 RETGUARD_SETUP(rdmsr_safe, r10)
1127
1128 movl %edi, %ecx /* u_int msr */
1129 .globl rdmsr_safe_fault
1130 rdmsr_safe_fault:
1131 rdmsr
1132 salq $32, %rdx
1133 movl %eax, %eax
1134 orq %rdx, %rax
1135 movq %rax, (%rsi) /* *data */
1136 xorq %rax, %rax
1137
1138 RETGUARD_CHECK(rdmsr_safe, r10)
1139 ret
1140 lfence
1141
1142 NENTRY(rdmsr_resume)
1143 movl $0x1, %eax
1144 RETGUARD_CHECK(rdmsr_safe, r10)
1145 ret
1146 lfence
1147 END(rdmsr_safe)
1148
1149 #if NXEN > 0
1150 /* Hypercall page needs to be page aligned */
1151 .text
1152 .align NBPG, 0xcc
1153 .globl xen_hypercall_page
1154 xen_hypercall_page:
1155 .skip 0x1000, 0xcc
1156 #endif /* NXEN > 0 */
1157
1158 #if NHYPERV > 0
1159 /* Hypercall page needs to be page aligned */
1160 .text
1161 .align NBPG, 0xcc
1162 .globl hv_hypercall_page
1163 hv_hypercall_page:
1164 .skip 0x1000, 0xcc
1165 #endif /* NXEN > 0 */
Cache object: 3b8683ba82600f0f5da3e16ceb67f580
|