1 /*-
2 * Copyright (c) 2003 Peter Wemm.
3 * Copyright (c) 1992 Terrence R. Lambert.
4 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
39 */
40
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD: releng/5.2/sys/amd64/amd64/machdep.c 144700 2005-04-06 01:44:36Z peter $");
43
44 #include "opt_atalk.h"
45 #include "opt_atpic.h"
46 #include "opt_compat.h"
47 #include "opt_cpu.h"
48 #include "opt_ddb.h"
49 #include "opt_inet.h"
50 #include "opt_ipx.h"
51 #include "opt_isa.h"
52 #include "opt_kstack_pages.h"
53 #include "opt_maxmem.h"
54 #include "opt_msgbuf.h"
55 #include "opt_perfmon.h"
56
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/sysproto.h>
60 #include <sys/signalvar.h>
61 #include <sys/imgact.h>
62 #include <sys/kernel.h>
63 #include <sys/ktr.h>
64 #include <sys/linker.h>
65 #include <sys/lock.h>
66 #include <sys/malloc.h>
67 #include <sys/mutex.h>
68 #include <sys/pcpu.h>
69 #include <sys/proc.h>
70 #include <sys/bio.h>
71 #include <sys/buf.h>
72 #include <sys/reboot.h>
73 #include <sys/callout.h>
74 #include <sys/msgbuf.h>
75 #include <sys/sched.h>
76 #include <sys/sysent.h>
77 #include <sys/sysctl.h>
78 #include <sys/ucontext.h>
79 #include <sys/vmmeter.h>
80 #include <sys/bus.h>
81 #include <sys/eventhandler.h>
82
83 #include <vm/vm.h>
84 #include <vm/vm_param.h>
85 #include <vm/vm_kern.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_map.h>
89 #include <vm/vm_pager.h>
90 #include <vm/vm_extern.h>
91
92 #include <sys/user.h>
93 #include <sys/exec.h>
94 #include <sys/cons.h>
95
96 #include <ddb/ddb.h>
97
98 #include <net/netisr.h>
99
100 #include <machine/cpu.h>
101 #include <machine/cputypes.h>
102 #include <machine/reg.h>
103 #include <machine/clock.h>
104 #include <machine/specialreg.h>
105 #include <machine/intr_machdep.h>
106 #include <machine/md_var.h>
107 #include <machine/metadata.h>
108 #include <machine/proc.h>
109 #ifdef PERFMON
110 #include <machine/perfmon.h>
111 #endif
112 #include <machine/tss.h>
113 #ifdef SMP
114 #include <machine/smp.h>
115 #endif
116
117 #include <amd64/isa/icu.h>
118
119 #include <isa/isareg.h>
120 #include <isa/rtc.h>
121 #include <sys/ptrace.h>
122 #include <machine/sigframe.h>
123
124 /* Sanity check for __curthread() */
125 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
126
127 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
128 extern void dblfault_handler(void);
129
130 extern void printcpuinfo(void); /* XXX header file */
131 extern void identify_cpu(void);
132 extern void panicifcpuunsupported(void);
133 extern void initializecpu(void);
134
135 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
136 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
137
138 static void cpu_startup(void *);
139 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
140 static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
141 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
142
143 int _udatasel, _ucodesel, _ucode32sel;
144 u_long atdevbase;
145
146 int cold = 1;
147
148 long Maxmem = 0;
149
150 vm_paddr_t phys_avail[10];
151
152 /* must be 2 less so 0 0 can signal end of chunks */
153 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
154
155 struct kva_md_info kmi;
156
157 static struct trapframe proc0_tf;
158 struct region_descriptor r_gdt, r_idt;
159
160 struct pcpu __pcpu[MAXCPU];
161
162 struct mtx icu_lock;
163
164 static void
165 cpu_startup(dummy)
166 void *dummy;
167 {
168 /*
169 * Good {morning,afternoon,evening,night}.
170 */
171 startrtclock();
172 printcpuinfo();
173 panicifcpuunsupported();
174 #ifdef PERFMON
175 perfmon_init();
176 #endif
177 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
178 ptoa((uintmax_t)Maxmem) / 1048576);
179 /*
180 * Display any holes after the first chunk of extended memory.
181 */
182 if (bootverbose) {
183 int indx;
184
185 printf("Physical memory chunk(s):\n");
186 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
187 vm_paddr_t size;
188
189 size = phys_avail[indx + 1] - phys_avail[indx];
190 printf(
191 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
192 (uintmax_t)phys_avail[indx],
193 (uintmax_t)phys_avail[indx + 1] - 1,
194 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
195 }
196 }
197
198 vm_ksubmap_init(&kmi);
199
200 printf("avail memory = %ju (%ju MB)\n",
201 ptoa((uintmax_t)cnt.v_free_count),
202 ptoa((uintmax_t)cnt.v_free_count) / 1048576);
203
204 /*
205 * Set up buffers, so they can be used to read disk labels.
206 */
207 bufinit();
208 vm_pager_bufferinit();
209
210 cpu_setregs();
211 }
212
213 /*
214 * Send an interrupt to process.
215 *
216 * Stack is set up to allow sigcode stored
217 * at top to call routine, followed by kcall
218 * to sigreturn routine below. After sigreturn
219 * resets the signal mask, the stack, and the
220 * frame pointer, it returns to the user
221 * specified pc, psl.
222 */
223 void
224 sendsig(catcher, sig, mask, code)
225 sig_t catcher;
226 int sig;
227 sigset_t *mask;
228 u_long code;
229 {
230 struct sigframe sf, *sfp;
231 struct proc *p;
232 struct thread *td;
233 struct sigacts *psp;
234 char *sp;
235 struct trapframe *regs;
236 int oonstack;
237
238 td = curthread;
239 p = td->td_proc;
240 PROC_LOCK_ASSERT(p, MA_OWNED);
241 psp = p->p_sigacts;
242 mtx_assert(&psp->ps_mtx, MA_OWNED);
243 regs = td->td_frame;
244 oonstack = sigonstack(regs->tf_rsp);
245
246 /* Save user context. */
247 bzero(&sf, sizeof(sf));
248 sf.sf_uc.uc_sigmask = *mask;
249 sf.sf_uc.uc_stack = p->p_sigstk;
250 sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
251 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
252 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
253 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
254 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
255 get_fpcontext(td, &sf.sf_uc.uc_mcontext);
256 fpstate_drop(td);
257
258 /* Allocate space for the signal handler context. */
259 if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
260 SIGISMEMBER(psp->ps_sigonstack, sig)) {
261 sp = p->p_sigstk.ss_sp +
262 p->p_sigstk.ss_size - sizeof(struct sigframe);
263 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
264 p->p_sigstk.ss_flags |= SS_ONSTACK;
265 #endif
266 } else
267 sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128;
268 /* Align to 16 bytes. */
269 sfp = (struct sigframe *)((unsigned long)sp & ~0xF);
270
271 /* Translate the signal if appropriate. */
272 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
273 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
274
275 /* Build the argument list for the signal handler. */
276 regs->tf_rdi = sig; /* arg 1 in %rdi */
277 regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */
278 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
279 /* Signal handler installed with SA_SIGINFO. */
280 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */
281 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
282
283 /* Fill in POSIX parts */
284 sf.sf_si.si_signo = sig;
285 sf.sf_si.si_code = code;
286 regs->tf_rcx = regs->tf_addr; /* arg 4 in %rcx */
287 } else {
288 /* Old FreeBSD-style arguments. */
289 regs->tf_rsi = code; /* arg 2 in %rsi */
290 regs->tf_rcx = regs->tf_addr; /* arg 4 in %rcx */
291 sf.sf_ahu.sf_handler = catcher;
292 }
293 mtx_unlock(&psp->ps_mtx);
294 PROC_UNLOCK(p);
295
296 /*
297 * Copy the sigframe out to the user's stack.
298 */
299 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
300 #ifdef DEBUG
301 printf("process %ld has trashed its stack\n", (long)p->p_pid);
302 #endif
303 PROC_LOCK(p);
304 sigexit(td, SIGILL);
305 }
306
307 regs->tf_rsp = (long)sfp;
308 regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
309 regs->tf_rflags &= ~PSL_T;
310 regs->tf_cs = _ucodesel;
311 PROC_LOCK(p);
312 mtx_lock(&psp->ps_mtx);
313 }
314
315 /*
316 * Build siginfo_t for SA thread
317 */
318 void
319 cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
320 {
321 struct proc *p;
322 struct thread *td;
323 struct trapframe *regs;
324
325 td = curthread;
326 p = td->td_proc;
327 regs = td->td_frame;
328 PROC_LOCK_ASSERT(p, MA_OWNED);
329
330 bzero(si, sizeof(*si));
331 si->si_signo = sig;
332 si->si_code = code;
333 si->si_addr = (void *)regs->tf_addr;
334 /* XXXKSE fill other fields */
335 }
336
337 /*
338 * System call to cleanup state after a signal
339 * has been taken. Reset signal mask and
340 * stack state from context left by sendsig (above).
341 * Return to previous pc and psl as specified by
342 * context left by sendsig. Check carefully to
343 * make sure that the user has not modified the
344 * state to gain improper privileges.
345 *
346 * MPSAFE
347 */
348 int
349 sigreturn(td, uap)
350 struct thread *td;
351 struct sigreturn_args /* {
352 const __ucontext *sigcntxp;
353 } */ *uap;
354 {
355 ucontext_t uc;
356 struct proc *p = td->td_proc;
357 struct trapframe *regs;
358 const ucontext_t *ucp;
359 long rflags;
360 int cs, error, ret;
361
362 error = copyin(uap->sigcntxp, &uc, sizeof(uc));
363 if (error != 0)
364 return (error);
365 ucp = &uc;
366 regs = td->td_frame;
367 rflags = ucp->uc_mcontext.mc_rflags;
368 /*
369 * Don't allow users to change privileged or reserved flags.
370 */
371 /*
372 * XXX do allow users to change the privileged flag PSL_RF.
373 * The cpu sets PSL_RF in tf_rflags for faults. Debuggers
374 * should sometimes set it there too. tf_rflags is kept in
375 * the signal context during signal handling and there is no
376 * other place to remember it, so the PSL_RF bit may be
377 * corrupted by the signal handler without us knowing.
378 * Corruption of the PSL_RF bit at worst causes one more or
379 * one less debugger trap, so allowing it is fairly harmless.
380 */
381 if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
382 printf("sigreturn: rflags = 0x%lx\n", rflags);
383 return (EINVAL);
384 }
385
386 /*
387 * Don't allow users to load a valid privileged %cs. Let the
388 * hardware check for invalid selectors, excess privilege in
389 * other selectors, invalid %eip's and invalid %esp's.
390 */
391 cs = ucp->uc_mcontext.mc_cs;
392 if (!CS_SECURE(cs)) {
393 printf("sigreturn: cs = 0x%x\n", cs);
394 trapsignal(td, SIGBUS, T_PROTFLT);
395 return (EINVAL);
396 }
397
398 ret = set_fpcontext(td, &ucp->uc_mcontext);
399 if (ret != 0)
400 return (ret);
401 bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
402
403 PROC_LOCK(p);
404 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
405 if (ucp->uc_mcontext.mc_onstack & 1)
406 p->p_sigstk.ss_flags |= SS_ONSTACK;
407 else
408 p->p_sigstk.ss_flags &= ~SS_ONSTACK;
409 #endif
410
411 td->td_sigmask = ucp->uc_sigmask;
412 SIG_CANTMASK(td->td_sigmask);
413 signotify(td);
414 PROC_UNLOCK(p);
415 td->td_pcb->pcb_flags |= PCB_FULLCTX;
416 return (EJUSTRETURN);
417 }
418
419 #ifdef COMPAT_FREEBSD4
420 int
421 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
422 {
423
424 return sigreturn(td, (struct sigreturn_args *)uap);
425 }
426 #endif
427
428
429 /*
430 * Machine dependent boot() routine
431 *
432 * I haven't seen anything to put here yet
433 * Possibly some stuff might be grafted back here from boot()
434 */
435 void
436 cpu_boot(int howto)
437 {
438 }
439
440 /*
441 * Shutdown the CPU as much as possible
442 */
443 void
444 cpu_halt(void)
445 {
446 for (;;)
447 __asm__ ("hlt");
448 }
449
450 /*
451 * Hook to idle the CPU when possible. In the SMP case we default to
452 * off because a halted cpu will not currently pick up a new thread in the
453 * run queue until the next timer tick. If turned on this will result in
454 * approximately a 4.2% loss in real time performance in buildworld tests
455 * (but improves user and sys times oddly enough), and saves approximately
456 * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
457 *
458 * XXX we need to have a cpu mask of idle cpus and generate an IPI or
459 * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
460 * Then we can have our cake and eat it too.
461 *
462 * XXX I'm turning it on for SMP as well by default for now. It seems to
463 * help lock contention somewhat, and this is critical for HTT. -Peter
464 */
465 static int cpu_idle_hlt = 1;
466 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
467 &cpu_idle_hlt, 0, "Idle loop HLT enable");
468
469 static void
470 cpu_idle_default(void)
471 {
472 /*
473 * we must absolutely guarentee that hlt is the
474 * absolute next instruction after sti or we
475 * introduce a timing window.
476 */
477 __asm __volatile("sti; hlt");
478 }
479
480 /*
481 * Note that we have to be careful here to avoid a race between checking
482 * sched_runnable() and actually halting. If we don't do this, we may waste
483 * the time between calling hlt and the next interrupt even though there
484 * is a runnable process.
485 */
486 void
487 cpu_idle(void)
488 {
489
490 if (cpu_idle_hlt) {
491 disable_intr();
492 if (sched_runnable())
493 enable_intr();
494 else
495 (*cpu_idle_hook)();
496 }
497 }
498
499 /* Other subsystems (e.g., ACPI) can hook this later. */
500 void (*cpu_idle_hook)(void) = cpu_idle_default;
501
502 /*
503 * Clear registers on exec
504 */
505 void
506 exec_setregs(td, entry, stack, ps_strings)
507 struct thread *td;
508 u_long entry;
509 u_long stack;
510 u_long ps_strings;
511 {
512 struct trapframe *regs = td->td_frame;
513 struct pcb *pcb = td->td_pcb;
514
515 wrmsr(MSR_FSBASE, 0);
516 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
517 pcb->pcb_fsbase = 0;
518 pcb->pcb_gsbase = 0;
519 load_ds(_udatasel);
520 load_es(_udatasel);
521 load_fs(_udatasel);
522 load_gs(_udatasel);
523 pcb->pcb_ds = _udatasel;
524 pcb->pcb_es = _udatasel;
525 pcb->pcb_fs = _udatasel;
526 pcb->pcb_gs = _udatasel;
527
528 bzero((char *)regs, sizeof(struct trapframe));
529 regs->tf_rip = entry;
530 regs->tf_rsp = ((stack - 8) & ~0xF) + 8;
531 regs->tf_rdi = stack; /* argv */
532 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
533 regs->tf_ss = _udatasel;
534 regs->tf_cs = _ucodesel;
535
536 /*
537 * Arrange to trap the next fpu or `fwait' instruction (see fpu.c
538 * for why fwait must be trapped at least if there is an fpu or an
539 * emulator). This is mainly to handle the case where npx0 is not
540 * configured, since the fpu routines normally set up the trap
541 * otherwise. It should be done only at boot time, but doing it
542 * here allows modifying `fpu_exists' for testing the emulator on
543 * systems with an fpu.
544 */
545 load_cr0(rcr0() | CR0_MP | CR0_TS);
546
547 /* Initialize the fpu (if any) for the current process. */
548 /*
549 * XXX the above load_cr0() also initializes it and is a layering
550 * violation. It drops the fpu state partially
551 * and this would be fatal if we were interrupted now, and decided
552 * to force the state to the pcb, and checked the invariant
553 * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
554 * ALL of this can happen except the check. The check used to
555 * happen and be fatal later when we didn't complete the drop
556 * before returning to user mode. This should be fixed properly
557 * soon.
558 */
559 fpstate_drop(td);
560 }
561
562 void
563 cpu_setregs(void)
564 {
565 register_t cr0;
566
567 cr0 = rcr0();
568 cr0 |= CR0_NE; /* Done by fpuinit() */
569 cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */
570 cr0 |= CR0_WP | CR0_AM;
571 load_cr0(cr0);
572 }
573
574 static int
575 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
576 {
577 int error;
578 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
579 req);
580 if (!error && req->newptr)
581 resettodr();
582 return (error);
583 }
584
585 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
586 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
587
588 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
589 CTLFLAG_RW, &disable_rtc_set, 0, "");
590
591 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
592 CTLFLAG_RW, &wall_cmos_clock, 0, "");
593
594 /*
595 * Initialize 386 and configure to run kernel
596 */
597
598 /*
599 * Initialize segments & interrupt table
600 */
601
602 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor table */
603 static struct gate_descriptor idt0[NIDT];
604 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
605
606 static char dblfault_stack[PAGE_SIZE] __aligned(16);
607
608 struct amd64tss common_tss[MAXCPU];
609
610 /* software prototypes -- in more palatable form */
611 struct soft_segment_descriptor gdt_segs[] = {
612 /* GNULL_SEL 0 Null Descriptor */
613 { 0x0, /* segment base address */
614 0x0, /* length */
615 0, /* segment type */
616 0, /* segment descriptor priority level */
617 0, /* segment descriptor present */
618 0, /* long */
619 0, /* default 32 vs 16 bit size */
620 0 /* limit granularity (byte/page units)*/ },
621 /* GCODE_SEL 1 Code Descriptor for kernel */
622 { 0x0, /* segment base address */
623 0xfffff, /* length - all address space */
624 SDT_MEMERA, /* segment type */
625 SEL_KPL, /* segment descriptor priority level */
626 1, /* segment descriptor present */
627 1, /* long */
628 0, /* default 32 vs 16 bit size */
629 1 /* limit granularity (byte/page units)*/ },
630 /* GDATA_SEL 2 Data Descriptor for kernel */
631 { 0x0, /* segment base address */
632 0xfffff, /* length - all address space */
633 SDT_MEMRWA, /* segment type */
634 SEL_KPL, /* segment descriptor priority level */
635 1, /* segment descriptor present */
636 1, /* long */
637 0, /* default 32 vs 16 bit size */
638 1 /* limit granularity (byte/page units)*/ },
639 /* GUCODE32_SEL 3 32 bit Code Descriptor for user */
640 { 0x0, /* segment base address */
641 0xfffff, /* length - all address space */
642 SDT_MEMERA, /* segment type */
643 SEL_UPL, /* segment descriptor priority level */
644 1, /* segment descriptor present */
645 0, /* long */
646 1, /* default 32 vs 16 bit size */
647 1 /* limit granularity (byte/page units)*/ },
648 /* GUDATA_SEL 4 32/64 bit Data Descriptor for user */
649 { 0x0, /* segment base address */
650 0xfffff, /* length - all address space */
651 SDT_MEMRWA, /* segment type */
652 SEL_UPL, /* segment descriptor priority level */
653 1, /* segment descriptor present */
654 0, /* long */
655 1, /* default 32 vs 16 bit size */
656 1 /* limit granularity (byte/page units)*/ },
657 /* GUCODE_SEL 5 64 bit Code Descriptor for user */
658 { 0x0, /* segment base address */
659 0xfffff, /* length - all address space */
660 SDT_MEMERA, /* segment type */
661 SEL_UPL, /* segment descriptor priority level */
662 1, /* segment descriptor present */
663 1, /* long */
664 0, /* default 32 vs 16 bit size */
665 1 /* limit granularity (byte/page units)*/ },
666 /* GPROC0_SEL 6 Proc 0 Tss Descriptor */
667 {
668 0x0, /* segment base address */
669 sizeof(struct amd64tss)-1,/* length - all address space */
670 SDT_SYSTSS, /* segment type */
671 SEL_KPL, /* segment descriptor priority level */
672 1, /* segment descriptor present */
673 0, /* long */
674 0, /* unused - default 32 vs 16 bit size */
675 0 /* limit granularity (byte/page units)*/ },
676 /* Actually, the TSS is a system descriptor which is double size */
677 { 0x0, /* segment base address */
678 0x0, /* length */
679 0, /* segment type */
680 0, /* segment descriptor priority level */
681 0, /* segment descriptor present */
682 0, /* long */
683 0, /* default 32 vs 16 bit size */
684 0 /* limit granularity (byte/page units)*/ },
685 };
686
687 void
688 setidt(idx, func, typ, dpl, ist)
689 int idx;
690 inthand_t *func;
691 int typ;
692 int dpl;
693 int ist;
694 {
695 struct gate_descriptor *ip;
696
697 ip = idt + idx;
698 ip->gd_looffset = (uintptr_t)func;
699 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
700 ip->gd_ist = ist;
701 ip->gd_xx = 0;
702 ip->gd_type = typ;
703 ip->gd_dpl = dpl;
704 ip->gd_p = 1;
705 ip->gd_hioffset = ((uintptr_t)func)>>16 ;
706 }
707
708 #define IDTVEC(name) __CONCAT(X,name)
709
710 extern inthand_t
711 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
712 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
713 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
714 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
715 IDTVEC(xmm), IDTVEC(dblfault),
716 IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
717
718 void
719 sdtossd(sd, ssd)
720 struct user_segment_descriptor *sd;
721 struct soft_segment_descriptor *ssd;
722 {
723
724 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase;
725 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
726 ssd->ssd_type = sd->sd_type;
727 ssd->ssd_dpl = sd->sd_dpl;
728 ssd->ssd_p = sd->sd_p;
729 ssd->ssd_long = sd->sd_long;
730 ssd->ssd_def32 = sd->sd_def32;
731 ssd->ssd_gran = sd->sd_gran;
732 }
733
734 void
735 ssdtosd(ssd, sd)
736 struct soft_segment_descriptor *ssd;
737 struct user_segment_descriptor *sd;
738 {
739
740 sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
741 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
742 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
743 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
744 sd->sd_type = ssd->ssd_type;
745 sd->sd_dpl = ssd->ssd_dpl;
746 sd->sd_p = ssd->ssd_p;
747 sd->sd_long = ssd->ssd_long;
748 sd->sd_def32 = ssd->ssd_def32;
749 sd->sd_gran = ssd->ssd_gran;
750 }
751
752 void
753 ssdtosyssd(ssd, sd)
754 struct soft_segment_descriptor *ssd;
755 struct system_segment_descriptor *sd;
756 {
757
758 sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
759 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
760 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
761 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
762 sd->sd_type = ssd->ssd_type;
763 sd->sd_dpl = ssd->ssd_dpl;
764 sd->sd_p = ssd->ssd_p;
765 sd->sd_gran = ssd->ssd_gran;
766 }
767
768 #if !defined(DEV_ATPIC) && defined(DEV_ISA)
769 #include <isa/isavar.h>
770 u_int
771 isa_irq_pending(void)
772 {
773
774 return (0);
775 }
776 #endif
777
778 #define PHYSMAP_SIZE (2 * 8)
779
780 struct bios_smap {
781 u_int64_t base;
782 u_int64_t length;
783 u_int32_t type;
784 } __packed;
785
786 u_int basemem;
787
788 /*
789 * Populate the (physmap) array with base/bound pairs describing the
790 * available physical memory in the system, then test this memory and
791 * build the phys_avail array describing the actually-available memory.
792 *
793 * If we cannot accurately determine the physical memory map, then use
794 * value from the 0xE801 call, and failing that, the RTC.
795 *
796 * Total memory size may be set by the kernel environment variable
797 * hw.physmem or the compile-time define MAXMEM.
798 *
799 * XXX first should be vm_paddr_t.
800 */
801 static void
802 getmemsize(caddr_t kmdp, u_int64_t first)
803 {
804 int i, physmap_idx, pa_indx;
805 vm_paddr_t pa, physmap[PHYSMAP_SIZE];
806 pt_entry_t *pte;
807 char *cp;
808 struct bios_smap *smapbase, *smap, *smapend;
809 u_int32_t smapsize;
810
811 bzero(physmap, sizeof(physmap));
812 basemem = 0;
813 physmap_idx = 0;
814
815 /*
816 * get memory map from INT 15:E820, kindly supplied by the loader.
817 *
818 * subr_module.c says:
819 * "Consumer may safely assume that size value precedes data."
820 * ie: an int32_t immediately precedes smap.
821 */
822 smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP);
823 if (smapbase == NULL)
824 panic("No BIOS smap info from loader!");
825
826 smapsize = *((u_int32_t *)smapbase - 1);
827 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
828
829 for (smap = smapbase; smap < smapend; smap++) {
830 if (boothowto & RB_VERBOSE)
831 printf("SMAP type=%02x base=%016lx len=%016lx\n",
832 smap->type, smap->base, smap->length);
833
834 if (smap->type != 0x01)
835 continue;
836
837 if (smap->length == 0)
838 continue;
839
840 for (i = 0; i <= physmap_idx; i += 2) {
841 if (smap->base < physmap[i + 1]) {
842 if (boothowto & RB_VERBOSE)
843 printf(
844 "Overlapping or non-montonic memory region, ignoring second region\n");
845 goto next_run;
846 }
847 }
848
849 if (smap->base == physmap[physmap_idx + 1]) {
850 physmap[physmap_idx + 1] += smap->length;
851 next_run:
852 continue;
853 }
854
855 physmap_idx += 2;
856 if (physmap_idx == PHYSMAP_SIZE) {
857 printf(
858 "Too many segments in the physical address map, giving up\n");
859 break;
860 }
861 physmap[physmap_idx] = smap->base;
862 physmap[physmap_idx + 1] = smap->base + smap->length;
863 }
864
865 /*
866 * Find the 'base memory' segment for SMP
867 */
868 basemem = 0;
869 for (i = 0; i <= physmap_idx; i += 2) {
870 if (physmap[i] == 0x00000000) {
871 basemem = physmap[i + 1] / 1024;
872 break;
873 }
874 }
875 if (basemem == 0)
876 panic("BIOS smap did not include a basemem segment!");
877
878 #ifdef SMP
879 /* make hole for AP bootstrap code */
880 physmap[1] = mp_bootaddress(physmap[1] / 1024);
881 #endif
882
883 /*
884 * Maxmem isn't the "maximum memory", it's one larger than the
885 * highest page of the physical address space. It should be
886 * called something like "Maxphyspage". We may adjust this
887 * based on ``hw.physmem'' and the results of the memory test.
888 */
889 Maxmem = atop(physmap[physmap_idx + 1]);
890
891 #ifdef MAXMEM
892 Maxmem = MAXMEM / 4;
893 #endif
894
895 /*
896 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
897 * for the appropriate modifiers. This overrides MAXMEM.
898 */
899 cp = getenv("hw.physmem");
900 if (cp != NULL) {
901 u_int64_t AllowMem, sanity;
902 char *ep;
903
904 sanity = AllowMem = strtouq(cp, &ep, 0);
905 if ((ep != cp) && (*ep != 0)) {
906 switch(*ep) {
907 case 'g':
908 case 'G':
909 AllowMem <<= 10;
910 case 'm':
911 case 'M':
912 AllowMem <<= 10;
913 case 'k':
914 case 'K':
915 AllowMem <<= 10;
916 break;
917 default:
918 AllowMem = sanity = 0;
919 }
920 if (AllowMem < sanity)
921 AllowMem = 0;
922 }
923 if (AllowMem == 0)
924 printf("Ignoring invalid memory size of '%s'\n", cp);
925 else
926 Maxmem = atop(AllowMem);
927 freeenv(cp);
928 }
929
930 if (atop(physmap[physmap_idx + 1]) != Maxmem &&
931 (boothowto & RB_VERBOSE))
932 printf("Physical memory use set to %ldK\n", Maxmem * 4);
933
934 /*
935 * If Maxmem has been increased beyond what the system has detected,
936 * extend the last memory segment to the new limit.
937 */
938 if (atop(physmap[physmap_idx + 1]) < Maxmem)
939 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
940
941 /* call pmap initialization to make new kernel address space */
942 pmap_bootstrap(&first);
943
944 /*
945 * Size up each available chunk of physical memory.
946 */
947 physmap[0] = PAGE_SIZE; /* mask off page 0 */
948 pa_indx = 0;
949 phys_avail[pa_indx++] = physmap[0];
950 phys_avail[pa_indx] = physmap[0];
951 pte = CMAP1;
952
953 /*
954 * physmap is in bytes, so when converting to page boundaries,
955 * round up the start address and round down the end address.
956 */
957 for (i = 0; i <= physmap_idx; i += 2) {
958 vm_paddr_t end;
959
960 end = ptoa((vm_paddr_t)Maxmem);
961 if (physmap[i + 1] < end)
962 end = trunc_page(physmap[i + 1]);
963 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
964 int tmp, page_bad;
965 int *ptr = (int *)CADDR1;
966
967 /*
968 * block out kernel memory as not available.
969 */
970 if (pa >= 0x100000 && pa < first)
971 continue;
972
973 page_bad = FALSE;
974
975 /*
976 * map page into kernel: valid, read/write,non-cacheable
977 */
978 *pte = pa | PG_V | PG_RW | PG_N;
979 invltlb();
980
981 tmp = *(int *)ptr;
982 /*
983 * Test for alternating 1's and 0's
984 */
985 *(volatile int *)ptr = 0xaaaaaaaa;
986 if (*(volatile int *)ptr != 0xaaaaaaaa) {
987 page_bad = TRUE;
988 }
989 /*
990 * Test for alternating 0's and 1's
991 */
992 *(volatile int *)ptr = 0x55555555;
993 if (*(volatile int *)ptr != 0x55555555) {
994 page_bad = TRUE;
995 }
996 /*
997 * Test for all 1's
998 */
999 *(volatile int *)ptr = 0xffffffff;
1000 if (*(volatile int *)ptr != 0xffffffff) {
1001 page_bad = TRUE;
1002 }
1003 /*
1004 * Test for all 0's
1005 */
1006 *(volatile int *)ptr = 0x0;
1007 if (*(volatile int *)ptr != 0x0) {
1008 page_bad = TRUE;
1009 }
1010 /*
1011 * Restore original value.
1012 */
1013 *(int *)ptr = tmp;
1014
1015 /*
1016 * Adjust array of valid/good pages.
1017 */
1018 if (page_bad == TRUE) {
1019 continue;
1020 }
1021 /*
1022 * If this good page is a continuation of the
1023 * previous set of good pages, then just increase
1024 * the end pointer. Otherwise start a new chunk.
1025 * Note that "end" points one higher than end,
1026 * making the range >= start and < end.
1027 * If we're also doing a speculative memory
1028 * test and we at or past the end, bump up Maxmem
1029 * so that we keep going. The first bad page
1030 * will terminate the loop.
1031 */
1032 if (phys_avail[pa_indx] == pa) {
1033 phys_avail[pa_indx] += PAGE_SIZE;
1034 } else {
1035 pa_indx++;
1036 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1037 printf(
1038 "Too many holes in the physical address space, giving up\n");
1039 pa_indx--;
1040 break;
1041 }
1042 phys_avail[pa_indx++] = pa; /* start */
1043 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
1044 }
1045 physmem++;
1046 }
1047 }
1048 *pte = 0;
1049 invltlb();
1050
1051 /*
1052 * XXX
1053 * The last chunk must contain at least one page plus the message
1054 * buffer to avoid complicating other code (message buffer address
1055 * calculation, etc.).
1056 */
1057 while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1058 round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
1059 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1060 phys_avail[pa_indx--] = 0;
1061 phys_avail[pa_indx--] = 0;
1062 }
1063
1064 Maxmem = atop(phys_avail[pa_indx]);
1065
1066 /* Trim off space for the message buffer. */
1067 phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
1068
1069 avail_end = phys_avail[pa_indx];
1070 }
1071
1072 u_int64_t
1073 hammer_time(u_int64_t modulep, u_int64_t physfree)
1074 {
1075 caddr_t kmdp;
1076 int gsel_tss, off, x;
1077 struct pcpu *pc;
1078 u_int64_t msr;
1079 char *env;
1080
1081 #ifdef DEV_ISA
1082 /* Preemptively mask the atpics and leave them shut down */
1083 outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
1084 outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
1085 #else
1086 #error "have you forgotten the isa device?";
1087 #endif
1088
1089 /* Turn on PTE NX (no execute) bit */
1090 msr = rdmsr(MSR_EFER) | EFER_NXE;
1091 wrmsr(MSR_EFER, msr);
1092
1093 proc0.p_uarea = (struct user *)(physfree + KERNBASE);
1094 bzero(proc0.p_uarea, UAREA_PAGES * PAGE_SIZE);
1095 physfree += UAREA_PAGES * PAGE_SIZE;
1096 thread0.td_kstack = physfree + KERNBASE;
1097 bzero((void *)thread0.td_kstack, KSTACK_PAGES * PAGE_SIZE);
1098 physfree += KSTACK_PAGES * PAGE_SIZE;
1099 thread0.td_pcb = (struct pcb *)
1100 (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
1101
1102 atdevbase = ISA_HOLE_START + KERNBASE;
1103
1104 /*
1105 * This may be done better later if it gets more high level
1106 * components in it. If so just link td->td_proc here.
1107 */
1108 proc_linkup(&proc0, &ksegrp0, &kse0, &thread0);
1109
1110 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
1111 preload_bootstrap_relocate(KERNBASE);
1112 kmdp = preload_search_by_type("elf kernel");
1113 if (kmdp == NULL)
1114 kmdp = preload_search_by_type("elf64 kernel");
1115 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
1116 kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE;
1117
1118 /* Init basic tunables, hz etc */
1119 init_param1();
1120
1121 /*
1122 * make gdt memory segments
1123 */
1124 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
1125
1126 for (x = 0; x < NGDT; x++) {
1127 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
1128 ssdtosd(&gdt_segs[x], &gdt[x]);
1129 }
1130 ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
1131
1132 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
1133 r_gdt.rd_base = (long) gdt;
1134 lgdt(&r_gdt);
1135 pc = &__pcpu[0];
1136
1137 wrmsr(MSR_FSBASE, 0); /* User value */
1138 wrmsr(MSR_GSBASE, (u_int64_t)pc);
1139 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
1140
1141 pcpu_init(pc, 0, sizeof(struct pcpu));
1142 PCPU_SET(prvspace, pc);
1143 PCPU_SET(curthread, &thread0);
1144 PCPU_SET(tssp, &common_tss[0]);
1145
1146 /*
1147 * Initialize mutexes.
1148 *
1149 * icu_lock: in order to allow an interrupt to occur in a critical
1150 * section, to set pcpu->ipending (etc...) properly, we
1151 * must be able to get the icu lock, so it can't be
1152 * under witness.
1153 */
1154 mutex_init();
1155 mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
1156 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
1157
1158 /* exceptions */
1159 for (x = 0; x < NIDT; x++)
1160 setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
1161 setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0);
1162 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0);
1163 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 0);
1164 setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0);
1165 setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0);
1166 setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0);
1167 setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0);
1168 setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0);
1169 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
1170 setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0);
1171 setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0);
1172 setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0);
1173 setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0);
1174 setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0);
1175 setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0);
1176 setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0);
1177 setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
1178 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0);
1179 setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
1180
1181 r_idt.rd_limit = sizeof(idt0) - 1;
1182 r_idt.rd_base = (long) idt;
1183 lidt(&r_idt);
1184
1185 /*
1186 * Initialize the console before we print anything out.
1187 */
1188 cninit();
1189
1190 #ifdef DEV_ATPIC
1191 atpic_startup();
1192 #endif
1193
1194 #ifdef DDB
1195 kdb_init();
1196 if (boothowto & RB_KDB)
1197 Debugger("Boot flags requested debugger");
1198 #endif
1199
1200 identify_cpu(); /* Final stage of CPU initialization */
1201 initializecpu(); /* Initialize CPU registers */
1202
1203 /* make an initial tss so cpu can get interrupt stack on syscall! */
1204 common_tss[0].tss_rsp0 = thread0.td_kstack + \
1205 KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb);
1206 /* Ensure the stack is aligned to 16 bytes */
1207 common_tss[0].tss_rsp0 &= ~0xF;
1208 PCPU_SET(rsp0, common_tss[0].tss_rsp0);
1209
1210 /* doublefault stack space, runs on ist1 */
1211 common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
1212
1213 /* Set the IO permission bitmap (empty due to tss seg limit) */
1214 common_tss[0].tss_iobase = sizeof(struct amd64tss);
1215
1216 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1217 ltr(gsel_tss);
1218
1219 /* Set up the fast syscall stuff */
1220 msr = rdmsr(MSR_EFER) | EFER_SCE;
1221 wrmsr(MSR_EFER, msr);
1222 wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
1223 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
1224 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
1225 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
1226 wrmsr(MSR_STAR, msr);
1227 wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
1228
1229 getmemsize(kmdp, physfree);
1230 init_param2(physmem);
1231
1232 /* now running on new page tables, configured,and u/iom is accessible */
1233
1234 /* Map the message buffer. */
1235 for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
1236 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
1237
1238 msgbufinit(msgbufp, MSGBUF_SIZE);
1239 fpuinit();
1240
1241 /* transfer to user mode */
1242
1243 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
1244 _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
1245 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
1246
1247 /* setup proc 0's pcb */
1248 thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
1249 thread0.td_pcb->pcb_cr3 = KPML4phys;
1250 thread0.td_frame = &proc0_tf;
1251
1252 env = getenv("kernelname");
1253 if (env != NULL)
1254 strlcpy(kernelname, env, sizeof(kernelname));
1255
1256 /* Location of kernel stack for locore */
1257 return ((u_int64_t)thread0.td_pcb);
1258 }
1259
1260 void
1261 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
1262 {
1263
1264 pcpu->pc_acpi_id = 0xffffffff;
1265 }
1266
1267 int
1268 ptrace_set_pc(struct thread *td, unsigned long addr)
1269 {
1270 td->td_frame->tf_rip = addr;
1271 return (0);
1272 }
1273
1274 int
1275 ptrace_single_step(struct thread *td)
1276 {
1277 td->td_frame->tf_rflags |= PSL_T;
1278 return (0);
1279 }
1280
1281 int
1282 fill_regs(struct thread *td, struct reg *regs)
1283 {
1284 struct pcb *pcb;
1285 struct trapframe *tp;
1286
1287 tp = td->td_frame;
1288 regs->r_r15 = tp->tf_r15;
1289 regs->r_r14 = tp->tf_r14;
1290 regs->r_r13 = tp->tf_r13;
1291 regs->r_r12 = tp->tf_r12;
1292 regs->r_r11 = tp->tf_r11;
1293 regs->r_r10 = tp->tf_r10;
1294 regs->r_r9 = tp->tf_r9;
1295 regs->r_r8 = tp->tf_r8;
1296 regs->r_rdi = tp->tf_rdi;
1297 regs->r_rsi = tp->tf_rsi;
1298 regs->r_rbp = tp->tf_rbp;
1299 regs->r_rbx = tp->tf_rbx;
1300 regs->r_rdx = tp->tf_rdx;
1301 regs->r_rcx = tp->tf_rcx;
1302 regs->r_rax = tp->tf_rax;
1303 regs->r_rip = tp->tf_rip;
1304 regs->r_cs = tp->tf_cs;
1305 regs->r_rflags = tp->tf_rflags;
1306 regs->r_rsp = tp->tf_rsp;
1307 regs->r_ss = tp->tf_ss;
1308 pcb = td->td_pcb;
1309 return (0);
1310 }
1311
1312 int
1313 set_regs(struct thread *td, struct reg *regs)
1314 {
1315 struct pcb *pcb;
1316 struct trapframe *tp;
1317
1318 tp = td->td_frame;
1319 if (!EFL_SECURE(regs->r_rflags, tp->tf_rflags) ||
1320 !CS_SECURE(regs->r_cs))
1321 return (EINVAL);
1322 tp->tf_r15 = regs->r_r15;
1323 tp->tf_r14 = regs->r_r14;
1324 tp->tf_r13 = regs->r_r13;
1325 tp->tf_r12 = regs->r_r12;
1326 tp->tf_r11 = regs->r_r11;
1327 tp->tf_r10 = regs->r_r10;
1328 tp->tf_r9 = regs->r_r9;
1329 tp->tf_r8 = regs->r_r8;
1330 tp->tf_rdi = regs->r_rdi;
1331 tp->tf_rsi = regs->r_rsi;
1332 tp->tf_rbp = regs->r_rbp;
1333 tp->tf_rbx = regs->r_rbx;
1334 tp->tf_rdx = regs->r_rdx;
1335 tp->tf_rcx = regs->r_rcx;
1336 tp->tf_rax = regs->r_rax;
1337 tp->tf_rip = regs->r_rip;
1338 tp->tf_cs = regs->r_cs;
1339 tp->tf_rflags = regs->r_rflags;
1340 tp->tf_rsp = regs->r_rsp;
1341 tp->tf_ss = regs->r_ss;
1342 pcb = td->td_pcb;
1343 return (0);
1344 }
1345
1346 /* XXX check all this stuff! */
1347 /* externalize from sv_xmm */
1348 static void
1349 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
1350 {
1351 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
1352 struct envxmm *penv_xmm = &sv_xmm->sv_env;
1353 int i;
1354
1355 /* pcb -> fpregs */
1356 bzero(fpregs, sizeof(*fpregs));
1357
1358 /* FPU control/status */
1359 penv_fpreg->en_cw = penv_xmm->en_cw;
1360 penv_fpreg->en_sw = penv_xmm->en_sw;
1361 penv_fpreg->en_tw = penv_xmm->en_tw;
1362 penv_fpreg->en_opcode = penv_xmm->en_opcode;
1363 penv_fpreg->en_rip = penv_xmm->en_rip;
1364 penv_fpreg->en_rdp = penv_xmm->en_rdp;
1365 penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
1366 penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
1367
1368 /* FPU registers */
1369 for (i = 0; i < 8; ++i)
1370 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
1371
1372 /* SSE registers */
1373 for (i = 0; i < 16; ++i)
1374 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
1375 }
1376
1377 /* internalize from fpregs into sv_xmm */
1378 static void
1379 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
1380 {
1381 struct envxmm *penv_xmm = &sv_xmm->sv_env;
1382 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
1383 int i;
1384
1385 /* fpregs -> pcb */
1386 /* FPU control/status */
1387 penv_xmm->en_cw = penv_fpreg->en_cw;
1388 penv_xmm->en_sw = penv_fpreg->en_sw;
1389 penv_xmm->en_tw = penv_fpreg->en_tw;
1390 penv_xmm->en_opcode = penv_fpreg->en_opcode;
1391 penv_xmm->en_rip = penv_fpreg->en_rip;
1392 penv_xmm->en_rdp = penv_fpreg->en_rdp;
1393 penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
1394 penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask;
1395
1396 /* FPU registers */
1397 for (i = 0; i < 8; ++i)
1398 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
1399
1400 /* SSE registers */
1401 for (i = 0; i < 16; ++i)
1402 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
1403 }
1404
1405 /* externalize from td->pcb */
1406 int
1407 fill_fpregs(struct thread *td, struct fpreg *fpregs)
1408 {
1409
1410 fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs);
1411 return (0);
1412 }
1413
1414 /* internalize to td->pcb */
1415 int
1416 set_fpregs(struct thread *td, struct fpreg *fpregs)
1417 {
1418
1419 set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save);
1420 return (0);
1421 }
1422
1423 /*
1424 * Get machine context.
1425 */
1426 int
1427 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
1428 {
1429 struct trapframe *tp;
1430
1431 tp = td->td_frame;
1432 PROC_LOCK(curthread->td_proc);
1433 mcp->mc_onstack = sigonstack(tp->tf_rsp);
1434 PROC_UNLOCK(curthread->td_proc);
1435 mcp->mc_r15 = tp->tf_r15;
1436 mcp->mc_r14 = tp->tf_r14;
1437 mcp->mc_r13 = tp->tf_r13;
1438 mcp->mc_r12 = tp->tf_r12;
1439 mcp->mc_r11 = tp->tf_r11;
1440 mcp->mc_r10 = tp->tf_r10;
1441 mcp->mc_r9 = tp->tf_r9;
1442 mcp->mc_r8 = tp->tf_r8;
1443 mcp->mc_rdi = tp->tf_rdi;
1444 mcp->mc_rsi = tp->tf_rsi;
1445 mcp->mc_rbp = tp->tf_rbp;
1446 mcp->mc_rbx = tp->tf_rbx;
1447 mcp->mc_rcx = tp->tf_rcx;
1448 if (flags & GET_MC_CLEAR_RET) {
1449 mcp->mc_rax = 0;
1450 mcp->mc_rdx = 0;
1451 } else {
1452 mcp->mc_rax = tp->tf_rax;
1453 mcp->mc_rdx = tp->tf_rdx;
1454 }
1455 mcp->mc_rip = tp->tf_rip;
1456 mcp->mc_cs = tp->tf_cs;
1457 mcp->mc_rflags = tp->tf_rflags;
1458 mcp->mc_rsp = tp->tf_rsp;
1459 mcp->mc_ss = tp->tf_ss;
1460 mcp->mc_len = sizeof(*mcp);
1461 get_fpcontext(td, mcp);
1462 return (0);
1463 }
1464
1465 /*
1466 * Set machine context.
1467 *
1468 * However, we don't set any but the user modifiable flags, and we won't
1469 * touch the cs selector.
1470 */
1471 int
1472 set_mcontext(struct thread *td, const mcontext_t *mcp)
1473 {
1474 struct trapframe *tp;
1475 long rflags;
1476 int ret;
1477
1478 tp = td->td_frame;
1479 if (mcp->mc_len != sizeof(*mcp))
1480 return (EINVAL);
1481 rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
1482 (tp->tf_rflags & ~PSL_USERCHANGE);
1483 ret = set_fpcontext(td, mcp);
1484 if (ret != 0)
1485 return (ret);
1486 tp->tf_r15 = mcp->mc_r15;
1487 tp->tf_r14 = mcp->mc_r14;
1488 tp->tf_r13 = mcp->mc_r13;
1489 tp->tf_r12 = mcp->mc_r12;
1490 tp->tf_r11 = mcp->mc_r11;
1491 tp->tf_r10 = mcp->mc_r10;
1492 tp->tf_r9 = mcp->mc_r9;
1493 tp->tf_r8 = mcp->mc_r8;
1494 tp->tf_rdi = mcp->mc_rdi;
1495 tp->tf_rsi = mcp->mc_rsi;
1496 tp->tf_rbp = mcp->mc_rbp;
1497 tp->tf_rbx = mcp->mc_rbx;
1498 tp->tf_rdx = mcp->mc_rdx;
1499 tp->tf_rcx = mcp->mc_rcx;
1500 tp->tf_rax = mcp->mc_rax;
1501 tp->tf_rip = mcp->mc_rip;
1502 tp->tf_rflags = rflags;
1503 tp->tf_rsp = mcp->mc_rsp;
1504 tp->tf_ss = mcp->mc_ss;
1505 return (0);
1506 }
1507
1508 static void
1509 get_fpcontext(struct thread *td, mcontext_t *mcp)
1510 {
1511
1512 mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate);
1513 mcp->mc_fpformat = fpuformat();
1514 }
1515
1516 static int
1517 set_fpcontext(struct thread *td, const mcontext_t *mcp)
1518 {
1519
1520 if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
1521 return (0);
1522 else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
1523 return (EINVAL);
1524 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
1525 /* We don't care what state is left in the FPU or PCB. */
1526 fpstate_drop(td);
1527 else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
1528 mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
1529 /*
1530 * XXX we violate the dubious requirement that fpusetregs()
1531 * be called with interrupts disabled.
1532 * XXX obsolete on trap-16 systems?
1533 */
1534 fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate);
1535 } else
1536 return (EINVAL);
1537 return (0);
1538 }
1539
1540 void
1541 fpstate_drop(struct thread *td)
1542 {
1543 register_t s;
1544
1545 s = intr_disable();
1546 if (PCPU_GET(fpcurthread) == td)
1547 fpudrop();
1548 /*
1549 * XXX force a full drop of the fpu. The above only drops it if we
1550 * owned it.
1551 *
1552 * XXX I don't much like fpugetregs()'s semantics of doing a full
1553 * drop. Dropping only to the pcb matches fnsave's behaviour.
1554 * We only need to drop to !PCB_INITDONE in sendsig(). But
1555 * sendsig() is the only caller of fpugetregs()... perhaps we just
1556 * have too many layers.
1557 */
1558 curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE;
1559 intr_restore(s);
1560 }
1561
1562 int
1563 fill_dbregs(struct thread *td, struct dbreg *dbregs)
1564 {
1565
1566 return (0);
1567 }
1568
1569 int
1570 set_dbregs(struct thread *td, struct dbreg *dbregs)
1571 {
1572
1573 return (0);
1574 }
1575
1576 #ifndef DDB
1577 void
1578 Debugger(const char *msg)
1579 {
1580 printf("Debugger(\"%s\") called.\n", msg);
1581 }
1582 #endif /* no DDB */
1583
1584 #ifdef DDB
1585
1586 /*
1587 * Provide inb() and outb() as functions. They are normally only
1588 * available as macros calling inlined functions, thus cannot be
1589 * called inside DDB.
1590 *
1591 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
1592 */
1593
1594 #undef inb
1595 #undef outb
1596
1597 /* silence compiler warnings */
1598 u_char inb(u_int);
1599 void outb(u_int, u_char);
1600
1601 u_char
1602 inb(u_int port)
1603 {
1604 u_char data;
1605 /*
1606 * We use %%dx and not %1 here because i/o is done at %dx and not at
1607 * %edx, while gcc generates inferior code (movw instead of movl)
1608 * if we tell it to load (u_short) port.
1609 */
1610 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
1611 return (data);
1612 }
1613
1614 void
1615 outb(u_int port, u_char data)
1616 {
1617 u_char al;
1618 /*
1619 * Use an unnecessary assignment to help gcc's register allocator.
1620 * This make a large difference for gcc-1.40 and a tiny difference
1621 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
1622 * best results. gcc-2.6.0 can't handle this.
1623 */
1624 al = data;
1625 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
1626 }
1627
1628 #endif /* DDB */
Cache object: 93676baa53e46a1442e26590eea3f683
|