1 /*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
38 * $FreeBSD: releng/5.0/sys/i386/i386/machdep.c 107521 2002-12-02 19:58:55Z deischen $
39 */
40
41 #include "opt_atalk.h"
42 #include "opt_compat.h"
43 #include "opt_cpu.h"
44 #include "opt_ddb.h"
45 #include "opt_inet.h"
46 #include "opt_ipx.h"
47 #include "opt_isa.h"
48 #include "opt_maxmem.h"
49 #include "opt_msgbuf.h"
50 #include "opt_npx.h"
51 #include "opt_perfmon.h"
52 #include "opt_kstack_pages.h"
53
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/sysproto.h>
57 #include <sys/signalvar.h>
58 #include <sys/imgact.h>
59 #include <sys/kernel.h>
60 #include <sys/ktr.h>
61 #include <sys/linker.h>
62 #include <sys/lock.h>
63 #include <sys/malloc.h>
64 #include <sys/mutex.h>
65 #include <sys/pcpu.h>
66 #include <sys/proc.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/reboot.h>
70 #include <sys/callout.h>
71 #include <sys/msgbuf.h>
72 #include <sys/sched.h>
73 #include <sys/sysent.h>
74 #include <sys/sysctl.h>
75 #include <sys/ucontext.h>
76 #include <sys/vmmeter.h>
77 #include <sys/bus.h>
78 #include <sys/eventhandler.h>
79
80 #include <vm/vm.h>
81 #include <vm/vm_param.h>
82 #include <vm/vm_kern.h>
83 #include <vm/vm_object.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_pager.h>
87 #include <vm/vm_extern.h>
88
89 #include <sys/user.h>
90 #include <sys/exec.h>
91 #include <sys/cons.h>
92
93 #include <ddb/ddb.h>
94
95 #include <net/netisr.h>
96
97 #include <machine/cpu.h>
98 #include <machine/cputypes.h>
99 #include <machine/reg.h>
100 #include <machine/clock.h>
101 #include <machine/specialreg.h>
102 #include <machine/bootinfo.h>
103 #include <machine/md_var.h>
104 #include <machine/pc/bios.h>
105 #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
106 #include <machine/proc.h>
107 #ifdef PERFMON
108 #include <machine/perfmon.h>
109 #endif
110 #ifdef SMP
111 #include <machine/privatespace.h>
112 #include <machine/smp.h>
113 #endif
114
115 #include <i386/isa/icu.h>
116 #include <i386/isa/intr_machdep.h>
117 #include <isa/rtc.h>
118 #include <machine/vm86.h>
119 #include <sys/ptrace.h>
120 #include <machine/sigframe.h>
121
122 extern void init386(int first);
123 extern void dblfault_handler(void);
124
125 extern void printcpuinfo(void); /* XXX header file */
126 extern void earlysetcpuclass(void); /* same header file */
127 extern void finishidentcpu(void);
128 extern void panicifcpuunsupported(void);
129 extern void initializecpu(void);
130
131 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
132 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
133
134 #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
135 #define CPU_ENABLE_SSE
136 #endif
137 #if defined(CPU_DISABLE_SSE)
138 #undef CPU_ENABLE_SSE
139 #endif
140
141 static void cpu_startup(void *);
142 static void fpstate_drop(struct thread *td);
143 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
144 static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
145 #ifdef CPU_ENABLE_SSE
146 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
147 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
148 #endif /* CPU_ENABLE_SSE */
149 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
150
151 int _udatasel, _ucodesel;
152 u_int atdevbase;
153
154 #if defined(SWTCH_OPTIM_STATS)
155 extern int swtch_optim_stats;
156 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
157 CTLFLAG_RD, &swtch_optim_stats, 0, "");
158 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
159 CTLFLAG_RD, &tlb_flush_count, 0, "");
160 #endif
161
162 int cold = 1;
163
164 #ifdef COMPAT_43
165 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
166 #endif
167 #ifdef COMPAT_FREEBSD4
168 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
169 u_long code);
170 #endif
171
172 long Maxmem = 0;
173
174 vm_offset_t phys_avail[10];
175
176 /* must be 2 less so 0 0 can signal end of chunks */
177 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
178
179 struct kva_md_info kmi;
180
181 static struct trapframe proc0_tf;
182 #ifndef SMP
183 static struct pcpu __pcpu;
184 #endif
185
186 struct mtx icu_lock;
187
188 static void
189 cpu_startup(dummy)
190 void *dummy;
191 {
192 /*
193 * Good {morning,afternoon,evening,night}.
194 */
195 earlysetcpuclass();
196 startrtclock();
197 printcpuinfo();
198 panicifcpuunsupported();
199 #ifdef PERFMON
200 perfmon_init();
201 #endif
202 printf("real memory = %u (%u MB)\n", ptoa(Maxmem),
203 ptoa(Maxmem) / 1048576);
204 /*
205 * Display any holes after the first chunk of extended memory.
206 */
207 if (bootverbose) {
208 int indx;
209
210 printf("Physical memory chunk(s):\n");
211 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
212 unsigned int size1;
213
214 size1 = phys_avail[indx + 1] - phys_avail[indx];
215 printf("0x%08x - 0x%08x, %u bytes (%u pages)\n",
216 phys_avail[indx], phys_avail[indx + 1] - 1, size1,
217 size1 / PAGE_SIZE);
218 }
219 }
220
221 vm_ksubmap_init(&kmi);
222
223 printf("avail memory = %u (%u MB)\n", ptoa(cnt.v_free_count),
224 ptoa(cnt.v_free_count) / 1048576);
225
226 /*
227 * Set up buffers, so they can be used to read disk labels.
228 */
229 bufinit();
230 vm_pager_bufferinit();
231
232 #ifndef SMP
233 /* For SMP, we delay the cpu_setregs() until after SMP startup. */
234 cpu_setregs();
235 #endif
236 }
237
238 /*
239 * Send an interrupt to process.
240 *
241 * Stack is set up to allow sigcode stored
242 * at top to call routine, followed by kcall
243 * to sigreturn routine below. After sigreturn
244 * resets the signal mask, the stack, and the
245 * frame pointer, it returns to the user
246 * specified pc, psl.
247 */
248 #ifdef COMPAT_43
249 static void
250 osendsig(catcher, sig, mask, code)
251 sig_t catcher;
252 int sig;
253 sigset_t *mask;
254 u_long code;
255 {
256 struct osigframe sf, *fp;
257 struct proc *p;
258 struct thread *td;
259 struct sigacts *psp;
260 struct trapframe *regs;
261 int oonstack;
262
263 td = curthread;
264 p = td->td_proc;
265 PROC_LOCK_ASSERT(p, MA_OWNED);
266 psp = p->p_sigacts;
267 regs = td->td_frame;
268 oonstack = sigonstack(regs->tf_esp);
269
270 /* Allocate space for the signal handler context. */
271 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
272 SIGISMEMBER(psp->ps_sigonstack, sig)) {
273 fp = (struct osigframe *)(p->p_sigstk.ss_sp +
274 p->p_sigstk.ss_size - sizeof(struct osigframe));
275 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
276 p->p_sigstk.ss_flags |= SS_ONSTACK;
277 #endif
278 } else
279 fp = (struct osigframe *)regs->tf_esp - 1;
280 PROC_UNLOCK(p);
281
282 /* Translate the signal if appropriate. */
283 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
284 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
285
286 /* Build the argument list for the signal handler. */
287 sf.sf_signum = sig;
288 sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
289 PROC_LOCK(p);
290 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
291 /* Signal handler installed with SA_SIGINFO. */
292 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
293 sf.sf_siginfo.si_signo = sig;
294 sf.sf_siginfo.si_code = code;
295 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
296 } else {
297 /* Old FreeBSD-style arguments. */
298 sf.sf_arg2 = code;
299 sf.sf_addr = regs->tf_err;
300 sf.sf_ahu.sf_handler = catcher;
301 }
302 PROC_UNLOCK(p);
303
304 /* Save most if not all of trap frame. */
305 sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
306 sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
307 sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
308 sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
309 sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
310 sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
311 sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
312 sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
313 sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
314 sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
315 sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
316 sf.sf_siginfo.si_sc.sc_gs = rgs();
317 sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
318
319 /* Build the signal context to be used by osigreturn(). */
320 sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
321 SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
322 sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
323 sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
324 sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
325 sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
326 sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
327 sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
328
329 /*
330 * If we're a vm86 process, we want to save the segment registers.
331 * We also change eflags to be our emulated eflags, not the actual
332 * eflags.
333 */
334 if (regs->tf_eflags & PSL_VM) {
335 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
336 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
337 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
338
339 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
340 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
341 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
342 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
343
344 if (vm86->vm86_has_vme == 0)
345 sf.sf_siginfo.si_sc.sc_ps =
346 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
347 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
348
349 /* See sendsig() for comments. */
350 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
351 }
352
353 /*
354 * Copy the sigframe out to the user's stack.
355 */
356 if (copyout(&sf, fp, sizeof(*fp)) != 0) {
357 #ifdef DEBUG
358 printf("process %ld has trashed its stack\n", (long)p->p_pid);
359 #endif
360 PROC_LOCK(p);
361 sigexit(td, SIGILL);
362 }
363
364 regs->tf_esp = (int)fp;
365 regs->tf_eip = PS_STRINGS - szosigcode;
366 regs->tf_eflags &= ~PSL_T;
367 regs->tf_cs = _ucodesel;
368 regs->tf_ds = _udatasel;
369 regs->tf_es = _udatasel;
370 regs->tf_fs = _udatasel;
371 load_gs(_udatasel);
372 regs->tf_ss = _udatasel;
373 PROC_LOCK(p);
374 }
375 #endif /* COMPAT_43 */
376
377 #ifdef COMPAT_FREEBSD4
378 static void
379 freebsd4_sendsig(catcher, sig, mask, code)
380 sig_t catcher;
381 int sig;
382 sigset_t *mask;
383 u_long code;
384 {
385 struct sigframe4 sf, *sfp;
386 struct proc *p;
387 struct thread *td;
388 struct sigacts *psp;
389 struct trapframe *regs;
390 int oonstack;
391
392 td = curthread;
393 p = td->td_proc;
394 PROC_LOCK_ASSERT(p, MA_OWNED);
395 psp = p->p_sigacts;
396 regs = td->td_frame;
397 oonstack = sigonstack(regs->tf_esp);
398
399 /* Save user context. */
400 bzero(&sf, sizeof(sf));
401 sf.sf_uc.uc_sigmask = *mask;
402 sf.sf_uc.uc_stack = p->p_sigstk;
403 sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
404 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
405 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
406 sf.sf_uc.uc_mcontext.mc_gs = rgs();
407 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
408
409 /* Allocate space for the signal handler context. */
410 if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
411 SIGISMEMBER(psp->ps_sigonstack, sig)) {
412 sfp = (struct sigframe4 *)(p->p_sigstk.ss_sp +
413 p->p_sigstk.ss_size - sizeof(struct sigframe4));
414 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
415 p->p_sigstk.ss_flags |= SS_ONSTACK;
416 #endif
417 } else
418 sfp = (struct sigframe4 *)regs->tf_esp - 1;
419 PROC_UNLOCK(p);
420
421 /* Translate the signal if appropriate. */
422 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
423 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
424
425 /* Build the argument list for the signal handler. */
426 sf.sf_signum = sig;
427 sf.sf_ucontext = (register_t)&sfp->sf_uc;
428 PROC_LOCK(p);
429 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
430 /* Signal handler installed with SA_SIGINFO. */
431 sf.sf_siginfo = (register_t)&sfp->sf_si;
432 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
433
434 /* Fill in POSIX parts */
435 sf.sf_si.si_signo = sig;
436 sf.sf_si.si_code = code;
437 sf.sf_si.si_addr = (void *)regs->tf_err;
438 } else {
439 /* Old FreeBSD-style arguments. */
440 sf.sf_siginfo = code;
441 sf.sf_addr = regs->tf_err;
442 sf.sf_ahu.sf_handler = catcher;
443 }
444 PROC_UNLOCK(p);
445
446 /*
447 * If we're a vm86 process, we want to save the segment registers.
448 * We also change eflags to be our emulated eflags, not the actual
449 * eflags.
450 */
451 if (regs->tf_eflags & PSL_VM) {
452 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
453 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
454
455 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
456 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
457 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
458 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
459
460 if (vm86->vm86_has_vme == 0)
461 sf.sf_uc.uc_mcontext.mc_eflags =
462 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
463 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
464
465 /*
466 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
467 * syscalls made by the signal handler. This just avoids
468 * wasting time for our lazy fixup of such faults. PSL_NT
469 * does nothing in vm86 mode, but vm86 programs can set it
470 * almost legitimately in probes for old cpu types.
471 */
472 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
473 }
474
475 /*
476 * Copy the sigframe out to the user's stack.
477 */
478 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
479 #ifdef DEBUG
480 printf("process %ld has trashed its stack\n", (long)p->p_pid);
481 #endif
482 PROC_LOCK(p);
483 sigexit(td, SIGILL);
484 }
485
486 regs->tf_esp = (int)sfp;
487 regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
488 regs->tf_eflags &= ~PSL_T;
489 regs->tf_cs = _ucodesel;
490 regs->tf_ds = _udatasel;
491 regs->tf_es = _udatasel;
492 regs->tf_fs = _udatasel;
493 regs->tf_ss = _udatasel;
494 PROC_LOCK(p);
495 }
496 #endif /* COMPAT_FREEBSD4 */
497
498 void
499 sendsig(catcher, sig, mask, code)
500 sig_t catcher;
501 int sig;
502 sigset_t *mask;
503 u_long code;
504 {
505 struct sigframe sf, *sfp;
506 struct proc *p;
507 struct thread *td;
508 struct sigacts *psp;
509 char *sp;
510 struct trapframe *regs;
511 int oonstack;
512
513 td = curthread;
514 p = td->td_proc;
515 PROC_LOCK_ASSERT(p, MA_OWNED);
516 psp = p->p_sigacts;
517 #ifdef COMPAT_FREEBSD4
518 if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
519 freebsd4_sendsig(catcher, sig, mask, code);
520 return;
521 }
522 #endif
523 #ifdef COMPAT_43
524 if (SIGISMEMBER(psp->ps_osigset, sig)) {
525 osendsig(catcher, sig, mask, code);
526 return;
527 }
528 #endif
529 regs = td->td_frame;
530 oonstack = sigonstack(regs->tf_esp);
531
532 /* Save user context. */
533 bzero(&sf, sizeof(sf));
534 sf.sf_uc.uc_sigmask = *mask;
535 sf.sf_uc.uc_stack = p->p_sigstk;
536 sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
537 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
538 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
539 sf.sf_uc.uc_mcontext.mc_gs = rgs();
540 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
541 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
542 get_fpcontext(td, &sf.sf_uc.uc_mcontext);
543 fpstate_drop(td);
544
545 /* Allocate space for the signal handler context. */
546 if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
547 SIGISMEMBER(psp->ps_sigonstack, sig)) {
548 sp = p->p_sigstk.ss_sp +
549 p->p_sigstk.ss_size - sizeof(struct sigframe);
550 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
551 p->p_sigstk.ss_flags |= SS_ONSTACK;
552 #endif
553 } else
554 sp = (char *)regs->tf_esp - sizeof(struct sigframe);
555 /* Align to 16 bytes. */
556 sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
557 PROC_UNLOCK(p);
558
559 /* Translate the signal if appropriate. */
560 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
561 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
562
563 /* Build the argument list for the signal handler. */
564 sf.sf_signum = sig;
565 sf.sf_ucontext = (register_t)&sfp->sf_uc;
566 PROC_LOCK(p);
567 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
568 /* Signal handler installed with SA_SIGINFO. */
569 sf.sf_siginfo = (register_t)&sfp->sf_si;
570 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
571
572 /* Fill in POSIX parts */
573 sf.sf_si.si_signo = sig;
574 sf.sf_si.si_code = code;
575 sf.sf_si.si_addr = (void *)regs->tf_err;
576 } else {
577 /* Old FreeBSD-style arguments. */
578 sf.sf_siginfo = code;
579 sf.sf_addr = regs->tf_err;
580 sf.sf_ahu.sf_handler = catcher;
581 }
582 PROC_UNLOCK(p);
583
584 /*
585 * If we're a vm86 process, we want to save the segment registers.
586 * We also change eflags to be our emulated eflags, not the actual
587 * eflags.
588 */
589 if (regs->tf_eflags & PSL_VM) {
590 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
591 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
592
593 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
594 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
595 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
596 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
597
598 if (vm86->vm86_has_vme == 0)
599 sf.sf_uc.uc_mcontext.mc_eflags =
600 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
601 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
602
603 /*
604 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
605 * syscalls made by the signal handler. This just avoids
606 * wasting time for our lazy fixup of such faults. PSL_NT
607 * does nothing in vm86 mode, but vm86 programs can set it
608 * almost legitimately in probes for old cpu types.
609 */
610 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
611 }
612
613 /*
614 * Copy the sigframe out to the user's stack.
615 */
616 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
617 #ifdef DEBUG
618 printf("process %ld has trashed its stack\n", (long)p->p_pid);
619 #endif
620 PROC_LOCK(p);
621 sigexit(td, SIGILL);
622 }
623
624 regs->tf_esp = (int)sfp;
625 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
626 regs->tf_eflags &= ~PSL_T;
627 regs->tf_cs = _ucodesel;
628 regs->tf_ds = _udatasel;
629 regs->tf_es = _udatasel;
630 regs->tf_fs = _udatasel;
631 regs->tf_ss = _udatasel;
632 PROC_LOCK(p);
633 }
634
635 /*
636 * System call to cleanup state after a signal
637 * has been taken. Reset signal mask and
638 * stack state from context left by sendsig (above).
639 * Return to previous pc and psl as specified by
640 * context left by sendsig. Check carefully to
641 * make sure that the user has not modified the
642 * state to gain improper privileges.
643 *
644 * MPSAFE
645 */
646 #ifdef COMPAT_43
647 int
648 osigreturn(td, uap)
649 struct thread *td;
650 struct osigreturn_args /* {
651 struct osigcontext *sigcntxp;
652 } */ *uap;
653 {
654 struct osigcontext sc;
655 struct trapframe *regs;
656 struct osigcontext *scp;
657 struct proc *p = td->td_proc;
658 int eflags, error;
659
660 regs = td->td_frame;
661 error = copyin(uap->sigcntxp, &sc, sizeof(sc));
662 if (error != 0)
663 return (error);
664 scp = ≻
665 eflags = scp->sc_ps;
666 if (eflags & PSL_VM) {
667 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
668 struct vm86_kernel *vm86;
669
670 /*
671 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
672 * set up the vm86 area, and we can't enter vm86 mode.
673 */
674 if (td->td_pcb->pcb_ext == 0)
675 return (EINVAL);
676 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
677 if (vm86->vm86_inited == 0)
678 return (EINVAL);
679
680 /* Go back to user mode if both flags are set. */
681 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
682 trapsignal(p, SIGBUS, 0);
683
684 if (vm86->vm86_has_vme) {
685 eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
686 (eflags & VME_USERCHANGE) | PSL_VM;
687 } else {
688 vm86->vm86_eflags = eflags; /* save VIF, VIP */
689 eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
690 (eflags & VM_USERCHANGE) | PSL_VM;
691 }
692 tf->tf_vm86_ds = scp->sc_ds;
693 tf->tf_vm86_es = scp->sc_es;
694 tf->tf_vm86_fs = scp->sc_fs;
695 tf->tf_vm86_gs = scp->sc_gs;
696 tf->tf_ds = _udatasel;
697 tf->tf_es = _udatasel;
698 tf->tf_fs = _udatasel;
699 } else {
700 /*
701 * Don't allow users to change privileged or reserved flags.
702 */
703 /*
704 * XXX do allow users to change the privileged flag PSL_RF.
705 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
706 * should sometimes set it there too. tf_eflags is kept in
707 * the signal context during signal handling and there is no
708 * other place to remember it, so the PSL_RF bit may be
709 * corrupted by the signal handler without us knowing.
710 * Corruption of the PSL_RF bit at worst causes one more or
711 * one less debugger trap, so allowing it is fairly harmless.
712 */
713 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
714 return (EINVAL);
715 }
716
717 /*
718 * Don't allow users to load a valid privileged %cs. Let the
719 * hardware check for invalid selectors, excess privilege in
720 * other selectors, invalid %eip's and invalid %esp's.
721 */
722 if (!CS_SECURE(scp->sc_cs)) {
723 trapsignal(p, SIGBUS, T_PROTFLT);
724 return (EINVAL);
725 }
726 regs->tf_ds = scp->sc_ds;
727 regs->tf_es = scp->sc_es;
728 regs->tf_fs = scp->sc_fs;
729 }
730
731 /* Restore remaining registers. */
732 regs->tf_eax = scp->sc_eax;
733 regs->tf_ebx = scp->sc_ebx;
734 regs->tf_ecx = scp->sc_ecx;
735 regs->tf_edx = scp->sc_edx;
736 regs->tf_esi = scp->sc_esi;
737 regs->tf_edi = scp->sc_edi;
738 regs->tf_cs = scp->sc_cs;
739 regs->tf_ss = scp->sc_ss;
740 regs->tf_isp = scp->sc_isp;
741 regs->tf_ebp = scp->sc_fp;
742 regs->tf_esp = scp->sc_sp;
743 regs->tf_eip = scp->sc_pc;
744 regs->tf_eflags = eflags;
745
746 PROC_LOCK(p);
747 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
748 if (scp->sc_onstack & 1)
749 p->p_sigstk.ss_flags |= SS_ONSTACK;
750 else
751 p->p_sigstk.ss_flags &= ~SS_ONSTACK;
752 #endif
753 SIGSETOLD(p->p_sigmask, scp->sc_mask);
754 SIG_CANTMASK(p->p_sigmask);
755 signotify(p);
756 PROC_UNLOCK(p);
757 return (EJUSTRETURN);
758 }
759 #endif /* COMPAT_43 */
760
761 #ifdef COMPAT_FREEBSD4
762 /*
763 * MPSAFE
764 */
765 int
766 freebsd4_sigreturn(td, uap)
767 struct thread *td;
768 struct freebsd4_sigreturn_args /* {
769 const ucontext4 *sigcntxp;
770 } */ *uap;
771 {
772 struct ucontext4 uc;
773 struct proc *p = td->td_proc;
774 struct trapframe *regs;
775 const struct ucontext4 *ucp;
776 int cs, eflags, error;
777
778 error = copyin(uap->sigcntxp, &uc, sizeof(uc));
779 if (error != 0)
780 return (error);
781 ucp = &uc;
782 regs = td->td_frame;
783 eflags = ucp->uc_mcontext.mc_eflags;
784 if (eflags & PSL_VM) {
785 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
786 struct vm86_kernel *vm86;
787
788 /*
789 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
790 * set up the vm86 area, and we can't enter vm86 mode.
791 */
792 if (td->td_pcb->pcb_ext == 0)
793 return (EINVAL);
794 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
795 if (vm86->vm86_inited == 0)
796 return (EINVAL);
797
798 /* Go back to user mode if both flags are set. */
799 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
800 trapsignal(p, SIGBUS, 0);
801
802 if (vm86->vm86_has_vme) {
803 eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
804 (eflags & VME_USERCHANGE) | PSL_VM;
805 } else {
806 vm86->vm86_eflags = eflags; /* save VIF, VIP */
807 eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
808 (eflags & VM_USERCHANGE) | PSL_VM;
809 }
810 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
811 tf->tf_eflags = eflags;
812 tf->tf_vm86_ds = tf->tf_ds;
813 tf->tf_vm86_es = tf->tf_es;
814 tf->tf_vm86_fs = tf->tf_fs;
815 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
816 tf->tf_ds = _udatasel;
817 tf->tf_es = _udatasel;
818 tf->tf_fs = _udatasel;
819 } else {
820 /*
821 * Don't allow users to change privileged or reserved flags.
822 */
823 /*
824 * XXX do allow users to change the privileged flag PSL_RF.
825 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
826 * should sometimes set it there too. tf_eflags is kept in
827 * the signal context during signal handling and there is no
828 * other place to remember it, so the PSL_RF bit may be
829 * corrupted by the signal handler without us knowing.
830 * Corruption of the PSL_RF bit at worst causes one more or
831 * one less debugger trap, so allowing it is fairly harmless.
832 */
833 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
834 printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
835 return (EINVAL);
836 }
837
838 /*
839 * Don't allow users to load a valid privileged %cs. Let the
840 * hardware check for invalid selectors, excess privilege in
841 * other selectors, invalid %eip's and invalid %esp's.
842 */
843 cs = ucp->uc_mcontext.mc_cs;
844 if (!CS_SECURE(cs)) {
845 printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
846 trapsignal(p, SIGBUS, T_PROTFLT);
847 return (EINVAL);
848 }
849
850 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
851 }
852
853 PROC_LOCK(p);
854 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
855 if (ucp->uc_mcontext.mc_onstack & 1)
856 p->p_sigstk.ss_flags |= SS_ONSTACK;
857 else
858 p->p_sigstk.ss_flags &= ~SS_ONSTACK;
859 #endif
860
861 p->p_sigmask = ucp->uc_sigmask;
862 SIG_CANTMASK(p->p_sigmask);
863 signotify(p);
864 PROC_UNLOCK(p);
865 return (EJUSTRETURN);
866 }
867 #endif /* COMPAT_FREEBSD4 */
868
869 /*
870 * MPSAFE
871 */
872 int
873 sigreturn(td, uap)
874 struct thread *td;
875 struct sigreturn_args /* {
876 const __ucontext *sigcntxp;
877 } */ *uap;
878 {
879 ucontext_t uc;
880 struct proc *p = td->td_proc;
881 struct trapframe *regs;
882 const ucontext_t *ucp;
883 int cs, eflags, error, ret;
884
885 error = copyin(uap->sigcntxp, &uc, sizeof(uc));
886 if (error != 0)
887 return (error);
888 ucp = &uc;
889 regs = td->td_frame;
890 eflags = ucp->uc_mcontext.mc_eflags;
891 if (eflags & PSL_VM) {
892 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
893 struct vm86_kernel *vm86;
894
895 /*
896 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
897 * set up the vm86 area, and we can't enter vm86 mode.
898 */
899 if (td->td_pcb->pcb_ext == 0)
900 return (EINVAL);
901 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
902 if (vm86->vm86_inited == 0)
903 return (EINVAL);
904
905 /* Go back to user mode if both flags are set. */
906 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
907 trapsignal(p, SIGBUS, 0);
908
909 if (vm86->vm86_has_vme) {
910 eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
911 (eflags & VME_USERCHANGE) | PSL_VM;
912 } else {
913 vm86->vm86_eflags = eflags; /* save VIF, VIP */
914 eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
915 (eflags & VM_USERCHANGE) | PSL_VM;
916 }
917 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
918 tf->tf_eflags = eflags;
919 tf->tf_vm86_ds = tf->tf_ds;
920 tf->tf_vm86_es = tf->tf_es;
921 tf->tf_vm86_fs = tf->tf_fs;
922 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
923 tf->tf_ds = _udatasel;
924 tf->tf_es = _udatasel;
925 tf->tf_fs = _udatasel;
926 } else {
927 /*
928 * Don't allow users to change privileged or reserved flags.
929 */
930 /*
931 * XXX do allow users to change the privileged flag PSL_RF.
932 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
933 * should sometimes set it there too. tf_eflags is kept in
934 * the signal context during signal handling and there is no
935 * other place to remember it, so the PSL_RF bit may be
936 * corrupted by the signal handler without us knowing.
937 * Corruption of the PSL_RF bit at worst causes one more or
938 * one less debugger trap, so allowing it is fairly harmless.
939 */
940 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
941 printf("sigreturn: eflags = 0x%x\n", eflags);
942 return (EINVAL);
943 }
944
945 /*
946 * Don't allow users to load a valid privileged %cs. Let the
947 * hardware check for invalid selectors, excess privilege in
948 * other selectors, invalid %eip's and invalid %esp's.
949 */
950 cs = ucp->uc_mcontext.mc_cs;
951 if (!CS_SECURE(cs)) {
952 printf("sigreturn: cs = 0x%x\n", cs);
953 trapsignal(p, SIGBUS, T_PROTFLT);
954 return (EINVAL);
955 }
956
957 ret = set_fpcontext(td, &ucp->uc_mcontext);
958 if (ret != 0)
959 return (ret);
960 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
961 }
962
963 PROC_LOCK(p);
964 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
965 if (ucp->uc_mcontext.mc_onstack & 1)
966 p->p_sigstk.ss_flags |= SS_ONSTACK;
967 else
968 p->p_sigstk.ss_flags &= ~SS_ONSTACK;
969 #endif
970
971 p->p_sigmask = ucp->uc_sigmask;
972 SIG_CANTMASK(p->p_sigmask);
973 signotify(p);
974 PROC_UNLOCK(p);
975 return (EJUSTRETURN);
976 }
977
978 /*
979 * Machine dependent boot() routine
980 *
981 * I haven't seen anything to put here yet
982 * Possibly some stuff might be grafted back here from boot()
983 */
984 void
985 cpu_boot(int howto)
986 {
987 }
988
989 /*
990 * Shutdown the CPU as much as possible
991 */
992 void
993 cpu_halt(void)
994 {
995 for (;;)
996 __asm__ ("hlt");
997 }
998
999 /*
1000 * Hook to idle the CPU when possible. In the SMP case we default to
1001 * off because a halted cpu will not currently pick up a new thread in the
1002 * run queue until the next timer tick. If turned on this will result in
1003 * approximately a 4.2% loss in real time performance in buildworld tests
1004 * (but improves user and sys times oddly enough), and saves approximately
1005 * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
1006 *
1007 * XXX we need to have a cpu mask of idle cpus and generate an IPI or
1008 * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
1009 * Then we can have our cake and eat it too.
1010 */
1011 #ifdef SMP
1012 static int cpu_idle_hlt = 0;
1013 #else
1014 static int cpu_idle_hlt = 1;
1015 #endif
1016 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
1017 &cpu_idle_hlt, 0, "Idle loop HLT enable");
1018
1019 /*
1020 * Note that we have to be careful here to avoid a race between checking
1021 * sched_runnable() and actually halting. If we don't do this, we may waste
1022 * the time between calling hlt and the next interrupt even though there
1023 * is a runnable process.
1024 */
1025 void
1026 cpu_idle(void)
1027 {
1028 if (cpu_idle_hlt) {
1029 disable_intr();
1030 if (sched_runnable()) {
1031 enable_intr();
1032 } else {
1033 /*
1034 * we must absolutely guarentee that hlt is the
1035 * absolute next instruction after sti or we
1036 * introduce a timing window.
1037 */
1038 __asm __volatile("sti; hlt");
1039 }
1040 }
1041 }
1042
1043 /*
1044 * Clear registers on exec
1045 */
1046 void
1047 exec_setregs(td, entry, stack, ps_strings)
1048 struct thread *td;
1049 u_long entry;
1050 u_long stack;
1051 u_long ps_strings;
1052 {
1053 struct trapframe *regs = td->td_frame;
1054 struct pcb *pcb = td->td_pcb;
1055
1056 /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
1057 pcb->pcb_gs = _udatasel;
1058 load_gs(_udatasel);
1059
1060 if (td->td_proc->p_md.md_ldt)
1061 user_ldt_free(td);
1062
1063 bzero((char *)regs, sizeof(struct trapframe));
1064 regs->tf_eip = entry;
1065 regs->tf_esp = stack;
1066 regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
1067 regs->tf_ss = _udatasel;
1068 regs->tf_ds = _udatasel;
1069 regs->tf_es = _udatasel;
1070 regs->tf_fs = _udatasel;
1071 regs->tf_cs = _ucodesel;
1072
1073 /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */
1074 regs->tf_ebx = ps_strings;
1075
1076 /*
1077 * Reset the hardware debug registers if they were in use.
1078 * They won't have any meaning for the newly exec'd process.
1079 */
1080 if (pcb->pcb_flags & PCB_DBREGS) {
1081 pcb->pcb_dr0 = 0;
1082 pcb->pcb_dr1 = 0;
1083 pcb->pcb_dr2 = 0;
1084 pcb->pcb_dr3 = 0;
1085 pcb->pcb_dr6 = 0;
1086 pcb->pcb_dr7 = 0;
1087 if (pcb == PCPU_GET(curpcb)) {
1088 /*
1089 * Clear the debug registers on the running
1090 * CPU, otherwise they will end up affecting
1091 * the next process we switch to.
1092 */
1093 reset_dbregs();
1094 }
1095 pcb->pcb_flags &= ~PCB_DBREGS;
1096 }
1097
1098 /*
1099 * Initialize the math emulator (if any) for the current process.
1100 * Actually, just clear the bit that says that the emulator has
1101 * been initialized. Initialization is delayed until the process
1102 * traps to the emulator (if it is done at all) mainly because
1103 * emulators don't provide an entry point for initialization.
1104 */
1105 td->td_pcb->pcb_flags &= ~FP_SOFTFP;
1106
1107 /*
1108 * Arrange to trap the next npx or `fwait' instruction (see npx.c
1109 * for why fwait must be trapped at least if there is an npx or an
1110 * emulator). This is mainly to handle the case where npx0 is not
1111 * configured, since the npx routines normally set up the trap
1112 * otherwise. It should be done only at boot time, but doing it
1113 * here allows modifying `npx_exists' for testing the emulator on
1114 * systems with an npx.
1115 */
1116 load_cr0(rcr0() | CR0_MP | CR0_TS);
1117
1118 /* Initialize the npx (if any) for the current process. */
1119 /*
1120 * XXX the above load_cr0() also initializes it and is a layering
1121 * violation if NPX is configured. It drops the npx partially
1122 * and this would be fatal if we were interrupted now, and decided
1123 * to force the state to the pcb, and checked the invariant
1124 * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
1125 * ALL of this can happen except the check. The check used to
1126 * happen and be fatal later when we didn't complete the drop
1127 * before returning to user mode. This should be fixed properly
1128 * soon.
1129 */
1130 fpstate_drop(td);
1131
1132 /*
1133 * XXX - Linux emulator
1134 * Make sure sure edx is 0x0 on entry. Linux binaries depend
1135 * on it.
1136 */
1137 td->td_retval[1] = 0;
1138 }
1139
1140 void
1141 cpu_setregs(void)
1142 {
1143 unsigned int cr0;
1144
1145 cr0 = rcr0();
1146 #ifdef SMP
1147 cr0 |= CR0_NE; /* Done by npxinit() */
1148 #endif
1149 cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */
1150 #ifndef I386_CPU
1151 cr0 |= CR0_WP | CR0_AM;
1152 #endif
1153 load_cr0(cr0);
1154 load_gs(_udatasel);
1155 }
1156
1157 static int
1158 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
1159 {
1160 int error;
1161 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1162 req);
1163 if (!error && req->newptr)
1164 resettodr();
1165 return (error);
1166 }
1167
1168 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1169 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1170
1171 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1172 CTLFLAG_RW, &disable_rtc_set, 0, "");
1173
1174 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1175 CTLFLAG_RD, &bootinfo, bootinfo, "");
1176
1177 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
1178 CTLFLAG_RW, &wall_cmos_clock, 0, "");
1179
1180 u_long bootdev; /* not a dev_t - encoding is different */
1181 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
1182 CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in dev_t format)");
1183
1184 /*
1185 * Initialize 386 and configure to run kernel
1186 */
1187
1188 /*
1189 * Initialize segments & interrupt table
1190 */
1191
1192 int _default_ldt;
1193 union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
1194 static struct gate_descriptor idt0[NIDT];
1195 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
1196 union descriptor ldt[NLDT]; /* local descriptor table */
1197 #ifdef SMP
1198 /* table descriptors - used to load tables by microp */
1199 struct region_descriptor r_gdt, r_idt;
1200 #endif
1201
1202 int private_tss; /* flag indicating private tss */
1203
1204 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1205 extern int has_f00f_bug;
1206 #endif
1207
1208 static struct i386tss dblfault_tss;
1209 static char dblfault_stack[PAGE_SIZE];
1210
1211 extern struct user *proc0uarea;
1212 extern vm_offset_t proc0kstack;
1213
1214
1215 /* software prototypes -- in more palatable form */
1216 struct soft_segment_descriptor gdt_segs[] = {
1217 /* GNULL_SEL 0 Null Descriptor */
1218 { 0x0, /* segment base address */
1219 0x0, /* length */
1220 0, /* segment type */
1221 0, /* segment descriptor priority level */
1222 0, /* segment descriptor present */
1223 0, 0,
1224 0, /* default 32 vs 16 bit size */
1225 0 /* limit granularity (byte/page units)*/ },
1226 /* GCODE_SEL 1 Code Descriptor for kernel */
1227 { 0x0, /* segment base address */
1228 0xfffff, /* length - all address space */
1229 SDT_MEMERA, /* segment type */
1230 0, /* segment descriptor priority level */
1231 1, /* segment descriptor present */
1232 0, 0,
1233 1, /* default 32 vs 16 bit size */
1234 1 /* limit granularity (byte/page units)*/ },
1235 /* GDATA_SEL 2 Data Descriptor for kernel */
1236 { 0x0, /* segment base address */
1237 0xfffff, /* length - all address space */
1238 SDT_MEMRWA, /* segment type */
1239 0, /* segment descriptor priority level */
1240 1, /* segment descriptor present */
1241 0, 0,
1242 1, /* default 32 vs 16 bit size */
1243 1 /* limit granularity (byte/page units)*/ },
1244 /* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */
1245 { 0x0, /* segment base address */
1246 0xfffff, /* length - all address space */
1247 SDT_MEMRWA, /* segment type */
1248 0, /* segment descriptor priority level */
1249 1, /* segment descriptor present */
1250 0, 0,
1251 1, /* default 32 vs 16 bit size */
1252 1 /* limit granularity (byte/page units)*/ },
1253 /* GPROC0_SEL 4 Proc 0 Tss Descriptor */
1254 {
1255 0x0, /* segment base address */
1256 sizeof(struct i386tss)-1,/* length - all address space */
1257 SDT_SYS386TSS, /* segment type */
1258 0, /* segment descriptor priority level */
1259 1, /* segment descriptor present */
1260 0, 0,
1261 0, /* unused - default 32 vs 16 bit size */
1262 0 /* limit granularity (byte/page units)*/ },
1263 /* GLDT_SEL 5 LDT Descriptor */
1264 { (int) ldt, /* segment base address */
1265 sizeof(ldt)-1, /* length - all address space */
1266 SDT_SYSLDT, /* segment type */
1267 SEL_UPL, /* segment descriptor priority level */
1268 1, /* segment descriptor present */
1269 0, 0,
1270 0, /* unused - default 32 vs 16 bit size */
1271 0 /* limit granularity (byte/page units)*/ },
1272 /* GUSERLDT_SEL 6 User LDT Descriptor per process */
1273 { (int) ldt, /* segment base address */
1274 (512 * sizeof(union descriptor)-1), /* length */
1275 SDT_SYSLDT, /* segment type */
1276 0, /* segment descriptor priority level */
1277 1, /* segment descriptor present */
1278 0, 0,
1279 0, /* unused - default 32 vs 16 bit size */
1280 0 /* limit granularity (byte/page units)*/ },
1281 /* GTGATE_SEL 7 Null Descriptor - Placeholder */
1282 { 0x0, /* segment base address */
1283 0x0, /* length - all address space */
1284 0, /* segment type */
1285 0, /* segment descriptor priority level */
1286 0, /* segment descriptor present */
1287 0, 0,
1288 0, /* default 32 vs 16 bit size */
1289 0 /* limit granularity (byte/page units)*/ },
1290 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
1291 { 0x400, /* segment base address */
1292 0xfffff, /* length */
1293 SDT_MEMRWA, /* segment type */
1294 0, /* segment descriptor priority level */
1295 1, /* segment descriptor present */
1296 0, 0,
1297 1, /* default 32 vs 16 bit size */
1298 1 /* limit granularity (byte/page units)*/ },
1299 /* GPANIC_SEL 9 Panic Tss Descriptor */
1300 { (int) &dblfault_tss, /* segment base address */
1301 sizeof(struct i386tss)-1,/* length - all address space */
1302 SDT_SYS386TSS, /* segment type */
1303 0, /* segment descriptor priority level */
1304 1, /* segment descriptor present */
1305 0, 0,
1306 0, /* unused - default 32 vs 16 bit size */
1307 0 /* limit granularity (byte/page units)*/ },
1308 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
1309 { 0, /* segment base address (overwritten) */
1310 0xfffff, /* length */
1311 SDT_MEMERA, /* segment type */
1312 0, /* segment descriptor priority level */
1313 1, /* segment descriptor present */
1314 0, 0,
1315 0, /* default 32 vs 16 bit size */
1316 1 /* limit granularity (byte/page units)*/ },
1317 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
1318 { 0, /* segment base address (overwritten) */
1319 0xfffff, /* length */
1320 SDT_MEMERA, /* segment type */
1321 0, /* segment descriptor priority level */
1322 1, /* segment descriptor present */
1323 0, 0,
1324 0, /* default 32 vs 16 bit size */
1325 1 /* limit granularity (byte/page units)*/ },
1326 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
1327 { 0, /* segment base address (overwritten) */
1328 0xfffff, /* length */
1329 SDT_MEMRWA, /* segment type */
1330 0, /* segment descriptor priority level */
1331 1, /* segment descriptor present */
1332 0, 0,
1333 1, /* default 32 vs 16 bit size */
1334 1 /* limit granularity (byte/page units)*/ },
1335 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
1336 { 0, /* segment base address (overwritten) */
1337 0xfffff, /* length */
1338 SDT_MEMRWA, /* segment type */
1339 0, /* segment descriptor priority level */
1340 1, /* segment descriptor present */
1341 0, 0,
1342 0, /* default 32 vs 16 bit size */
1343 1 /* limit granularity (byte/page units)*/ },
1344 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
1345 { 0, /* segment base address (overwritten) */
1346 0xfffff, /* length */
1347 SDT_MEMRWA, /* segment type */
1348 0, /* segment descriptor priority level */
1349 1, /* segment descriptor present */
1350 0, 0,
1351 0, /* default 32 vs 16 bit size */
1352 1 /* limit granularity (byte/page units)*/ },
1353 };
1354
1355 static struct soft_segment_descriptor ldt_segs[] = {
1356 /* Null Descriptor - overwritten by call gate */
1357 { 0x0, /* segment base address */
1358 0x0, /* length - all address space */
1359 0, /* segment type */
1360 0, /* segment descriptor priority level */
1361 0, /* segment descriptor present */
1362 0, 0,
1363 0, /* default 32 vs 16 bit size */
1364 0 /* limit granularity (byte/page units)*/ },
1365 /* Null Descriptor - overwritten by call gate */
1366 { 0x0, /* segment base address */
1367 0x0, /* length - all address space */
1368 0, /* segment type */
1369 0, /* segment descriptor priority level */
1370 0, /* segment descriptor present */
1371 0, 0,
1372 0, /* default 32 vs 16 bit size */
1373 0 /* limit granularity (byte/page units)*/ },
1374 /* Null Descriptor - overwritten by call gate */
1375 { 0x0, /* segment base address */
1376 0x0, /* length - all address space */
1377 0, /* segment type */
1378 0, /* segment descriptor priority level */
1379 0, /* segment descriptor present */
1380 0, 0,
1381 0, /* default 32 vs 16 bit size */
1382 0 /* limit granularity (byte/page units)*/ },
1383 /* Code Descriptor for user */
1384 { 0x0, /* segment base address */
1385 0xfffff, /* length - all address space */
1386 SDT_MEMERA, /* segment type */
1387 SEL_UPL, /* segment descriptor priority level */
1388 1, /* segment descriptor present */
1389 0, 0,
1390 1, /* default 32 vs 16 bit size */
1391 1 /* limit granularity (byte/page units)*/ },
1392 /* Null Descriptor - overwritten by call gate */
1393 { 0x0, /* segment base address */
1394 0x0, /* length - all address space */
1395 0, /* segment type */
1396 0, /* segment descriptor priority level */
1397 0, /* segment descriptor present */
1398 0, 0,
1399 0, /* default 32 vs 16 bit size */
1400 0 /* limit granularity (byte/page units)*/ },
1401 /* Data Descriptor for user */
1402 { 0x0, /* segment base address */
1403 0xfffff, /* length - all address space */
1404 SDT_MEMRWA, /* segment type */
1405 SEL_UPL, /* segment descriptor priority level */
1406 1, /* segment descriptor present */
1407 0, 0,
1408 1, /* default 32 vs 16 bit size */
1409 1 /* limit granularity (byte/page units)*/ },
1410 };
1411
1412 void
1413 setidt(idx, func, typ, dpl, selec)
1414 int idx;
1415 inthand_t *func;
1416 int typ;
1417 int dpl;
1418 int selec;
1419 {
1420 struct gate_descriptor *ip;
1421
1422 ip = idt + idx;
1423 ip->gd_looffset = (int)func;
1424 ip->gd_selector = selec;
1425 ip->gd_stkcpy = 0;
1426 ip->gd_xx = 0;
1427 ip->gd_type = typ;
1428 ip->gd_dpl = dpl;
1429 ip->gd_p = 1;
1430 ip->gd_hioffset = ((int)func)>>16 ;
1431 }
1432
1433 #define IDTVEC(name) __CONCAT(X,name)
1434
1435 extern inthand_t
1436 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1437 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1438 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1439 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1440 IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
1441
1442 void
1443 sdtossd(sd, ssd)
1444 struct segment_descriptor *sd;
1445 struct soft_segment_descriptor *ssd;
1446 {
1447 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase;
1448 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1449 ssd->ssd_type = sd->sd_type;
1450 ssd->ssd_dpl = sd->sd_dpl;
1451 ssd->ssd_p = sd->sd_p;
1452 ssd->ssd_def32 = sd->sd_def32;
1453 ssd->ssd_gran = sd->sd_gran;
1454 }
1455
1456 #define PHYSMAP_SIZE (2 * 8)
1457
1458 /*
1459 * Populate the (physmap) array with base/bound pairs describing the
1460 * available physical memory in the system, then test this memory and
1461 * build the phys_avail array describing the actually-available memory.
1462 *
1463 * If we cannot accurately determine the physical memory map, then use
1464 * value from the 0xE801 call, and failing that, the RTC.
1465 *
1466 * Total memory size may be set by the kernel environment variable
1467 * hw.physmem or the compile-time define MAXMEM.
1468 */
1469 static void
1470 getmemsize(int first)
1471 {
1472 int i, physmap_idx, pa_indx;
1473 int hasbrokenint12;
1474 u_int basemem, extmem;
1475 struct vm86frame vmf;
1476 struct vm86context vmc;
1477 vm_offset_t pa, physmap[PHYSMAP_SIZE];
1478 pt_entry_t *pte;
1479 char *cp;
1480 struct bios_smap *smap;
1481
1482 hasbrokenint12 = 0;
1483 TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
1484 bzero(&vmf, sizeof(struct vm86frame));
1485 bzero(physmap, sizeof(physmap));
1486 basemem = 0;
1487
1488 /*
1489 * Some newer BIOSes has broken INT 12H implementation which cause
1490 * kernel panic immediately. In this case, we need to scan SMAP
1491 * with INT 15:E820 first, then determine base memory size.
1492 */
1493 if (hasbrokenint12) {
1494 goto int15e820;
1495 }
1496
1497 /*
1498 * Perform "base memory" related probes & setup
1499 */
1500 vm86_intcall(0x12, &vmf);
1501 basemem = vmf.vmf_ax;
1502 if (basemem > 640) {
1503 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
1504 basemem);
1505 basemem = 640;
1506 }
1507
1508 /*
1509 * XXX if biosbasemem is now < 640, there is a `hole'
1510 * between the end of base memory and the start of
1511 * ISA memory. The hole may be empty or it may
1512 * contain BIOS code or data. Map it read/write so
1513 * that the BIOS can write to it. (Memory from 0 to
1514 * the physical end of the kernel is mapped read-only
1515 * to begin with and then parts of it are remapped.
1516 * The parts that aren't remapped form holes that
1517 * remain read-only and are unused by the kernel.
1518 * The base memory area is below the physical end of
1519 * the kernel and right now forms a read-only hole.
1520 * The part of it from PAGE_SIZE to
1521 * (trunc_page(biosbasemem * 1024) - 1) will be
1522 * remapped and used by the kernel later.)
1523 *
1524 * This code is similar to the code used in
1525 * pmap_mapdev, but since no memory needs to be
1526 * allocated we simply change the mapping.
1527 */
1528 for (pa = trunc_page(basemem * 1024);
1529 pa < ISA_HOLE_START; pa += PAGE_SIZE)
1530 pmap_kenter(KERNBASE + pa, pa);
1531
1532 /*
1533 * if basemem != 640, map pages r/w into vm86 page table so
1534 * that the bios can scribble on it.
1535 */
1536 pte = (pt_entry_t *)vm86paddr;
1537 for (i = basemem / 4; i < 160; i++)
1538 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
1539
1540 int15e820:
1541 /*
1542 * map page 1 R/W into the kernel page table so we can use it
1543 * as a buffer. The kernel will unmap this page later.
1544 */
1545 pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1);
1546
1547 /*
1548 * get memory map with INT 15:E820
1549 */
1550 vmc.npages = 0;
1551 smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
1552 vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
1553
1554 physmap_idx = 0;
1555 vmf.vmf_ebx = 0;
1556 do {
1557 vmf.vmf_eax = 0xE820;
1558 vmf.vmf_edx = SMAP_SIG;
1559 vmf.vmf_ecx = sizeof(struct bios_smap);
1560 i = vm86_datacall(0x15, &vmf, &vmc);
1561 if (i || vmf.vmf_eax != SMAP_SIG)
1562 break;
1563 if (boothowto & RB_VERBOSE)
1564 printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n",
1565 smap->type,
1566 *(u_int32_t *)((char *)&smap->base + 4),
1567 (u_int32_t)smap->base,
1568 *(u_int32_t *)((char *)&smap->length + 4),
1569 (u_int32_t)smap->length);
1570
1571 if (smap->type != 0x01)
1572 goto next_run;
1573
1574 if (smap->length == 0)
1575 goto next_run;
1576
1577 if (smap->base >= 0xffffffff) {
1578 printf("%uK of memory above 4GB ignored\n",
1579 (u_int)(smap->length / 1024));
1580 goto next_run;
1581 }
1582
1583 for (i = 0; i <= physmap_idx; i += 2) {
1584 if (smap->base < physmap[i + 1]) {
1585 if (boothowto & RB_VERBOSE)
1586 printf(
1587 "Overlapping or non-montonic memory region, ignoring second region\n");
1588 goto next_run;
1589 }
1590 }
1591
1592 if (smap->base == physmap[physmap_idx + 1]) {
1593 physmap[physmap_idx + 1] += smap->length;
1594 goto next_run;
1595 }
1596
1597 physmap_idx += 2;
1598 if (physmap_idx == PHYSMAP_SIZE) {
1599 printf(
1600 "Too many segments in the physical address map, giving up\n");
1601 break;
1602 }
1603 physmap[physmap_idx] = smap->base;
1604 physmap[physmap_idx + 1] = smap->base + smap->length;
1605 next_run: ;
1606 } while (vmf.vmf_ebx != 0);
1607
1608 /*
1609 * Perform "base memory" related probes & setup based on SMAP
1610 */
1611 if (basemem == 0) {
1612 for (i = 0; i <= physmap_idx; i += 2) {
1613 if (physmap[i] == 0x00000000) {
1614 basemem = physmap[i + 1] / 1024;
1615 break;
1616 }
1617 }
1618
1619 if (basemem == 0) {
1620 basemem = 640;
1621 }
1622
1623 if (basemem > 640) {
1624 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
1625 basemem);
1626 basemem = 640;
1627 }
1628
1629 for (pa = trunc_page(basemem * 1024);
1630 pa < ISA_HOLE_START; pa += PAGE_SIZE)
1631 pmap_kenter(KERNBASE + pa, pa);
1632
1633 pte = (pt_entry_t *)vm86paddr;
1634 for (i = basemem / 4; i < 160; i++)
1635 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
1636 }
1637
1638 if (physmap[1] != 0)
1639 goto physmap_done;
1640
1641 /*
1642 * If we failed above, try memory map with INT 15:E801
1643 */
1644 vmf.vmf_ax = 0xE801;
1645 if (vm86_intcall(0x15, &vmf) == 0) {
1646 extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
1647 } else {
1648 #if 0
1649 vmf.vmf_ah = 0x88;
1650 vm86_intcall(0x15, &vmf);
1651 extmem = vmf.vmf_ax;
1652 #else
1653 /*
1654 * Prefer the RTC value for extended memory.
1655 */
1656 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
1657 #endif
1658 }
1659
1660 /*
1661 * Special hack for chipsets that still remap the 384k hole when
1662 * there's 16MB of memory - this really confuses people that
1663 * are trying to use bus mastering ISA controllers with the
1664 * "16MB limit"; they only have 16MB, but the remapping puts
1665 * them beyond the limit.
1666 *
1667 * If extended memory is between 15-16MB (16-17MB phys address range),
1668 * chop it to 15MB.
1669 */
1670 if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
1671 extmem = 15 * 1024;
1672
1673 physmap[0] = 0;
1674 physmap[1] = basemem * 1024;
1675 physmap_idx = 2;
1676 physmap[physmap_idx] = 0x100000;
1677 physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
1678
1679 physmap_done:
1680 /*
1681 * Now, physmap contains a map of physical memory.
1682 */
1683
1684 #ifdef SMP
1685 /* make hole for AP bootstrap code */
1686 physmap[1] = mp_bootaddress(physmap[1] / 1024);
1687
1688 /* look for the MP hardware - needed for apic addresses */
1689 i386_mp_probe();
1690 #endif
1691
1692 /*
1693 * Maxmem isn't the "maximum memory", it's one larger than the
1694 * highest page of the physical address space. It should be
1695 * called something like "Maxphyspage". We may adjust this
1696 * based on ``hw.physmem'' and the results of the memory test.
1697 */
1698 Maxmem = atop(physmap[physmap_idx + 1]);
1699
1700 #ifdef MAXMEM
1701 Maxmem = MAXMEM / 4;
1702 #endif
1703
1704 /*
1705 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
1706 * for the appropriate modifiers. This overrides MAXMEM.
1707 */
1708 if ((cp = getenv("hw.physmem")) != NULL) {
1709 u_int64_t AllowMem, sanity;
1710 char *ep;
1711
1712 sanity = AllowMem = strtouq(cp, &ep, 0);
1713 if ((ep != cp) && (*ep != 0)) {
1714 switch(*ep) {
1715 case 'g':
1716 case 'G':
1717 AllowMem <<= 10;
1718 case 'm':
1719 case 'M':
1720 AllowMem <<= 10;
1721 case 'k':
1722 case 'K':
1723 AllowMem <<= 10;
1724 break;
1725 default:
1726 AllowMem = sanity = 0;
1727 }
1728 if (AllowMem < sanity)
1729 AllowMem = 0;
1730 }
1731 if (AllowMem == 0)
1732 printf("Ignoring invalid memory size of '%s'\n", cp);
1733 else
1734 Maxmem = atop(AllowMem);
1735 freeenv(cp);
1736 }
1737
1738 if (atop(physmap[physmap_idx + 1]) != Maxmem &&
1739 (boothowto & RB_VERBOSE))
1740 printf("Physical memory use set to %ldK\n", Maxmem * 4);
1741
1742 /*
1743 * If Maxmem has been increased beyond what the system has detected,
1744 * extend the last memory segment to the new limit.
1745 */
1746 if (atop(physmap[physmap_idx + 1]) < Maxmem)
1747 physmap[physmap_idx + 1] = ptoa(Maxmem);
1748
1749 /* call pmap initialization to make new kernel address space */
1750 pmap_bootstrap(first, 0);
1751
1752 /*
1753 * Size up each available chunk of physical memory.
1754 */
1755 physmap[0] = PAGE_SIZE; /* mask off page 0 */
1756 pa_indx = 0;
1757 phys_avail[pa_indx++] = physmap[0];
1758 phys_avail[pa_indx] = physmap[0];
1759 pte = CMAP1;
1760
1761 /*
1762 * physmap is in bytes, so when converting to page boundaries,
1763 * round up the start address and round down the end address.
1764 */
1765 for (i = 0; i <= physmap_idx; i += 2) {
1766 vm_offset_t end;
1767
1768 end = ptoa(Maxmem);
1769 if (physmap[i + 1] < end)
1770 end = trunc_page(physmap[i + 1]);
1771 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
1772 int tmp, page_bad;
1773 int *ptr = (int *)CADDR1;
1774
1775 /*
1776 * block out kernel memory as not available.
1777 */
1778 if (pa >= 0x100000 && pa < first)
1779 continue;
1780
1781 page_bad = FALSE;
1782
1783 /*
1784 * map page into kernel: valid, read/write,non-cacheable
1785 */
1786 *pte = pa | PG_V | PG_RW | PG_N;
1787 invltlb();
1788
1789 tmp = *(int *)ptr;
1790 /*
1791 * Test for alternating 1's and 0's
1792 */
1793 *(volatile int *)ptr = 0xaaaaaaaa;
1794 if (*(volatile int *)ptr != 0xaaaaaaaa) {
1795 page_bad = TRUE;
1796 }
1797 /*
1798 * Test for alternating 0's and 1's
1799 */
1800 *(volatile int *)ptr = 0x55555555;
1801 if (*(volatile int *)ptr != 0x55555555) {
1802 page_bad = TRUE;
1803 }
1804 /*
1805 * Test for all 1's
1806 */
1807 *(volatile int *)ptr = 0xffffffff;
1808 if (*(volatile int *)ptr != 0xffffffff) {
1809 page_bad = TRUE;
1810 }
1811 /*
1812 * Test for all 0's
1813 */
1814 *(volatile int *)ptr = 0x0;
1815 if (*(volatile int *)ptr != 0x0) {
1816 page_bad = TRUE;
1817 }
1818 /*
1819 * Restore original value.
1820 */
1821 *(int *)ptr = tmp;
1822
1823 /*
1824 * Adjust array of valid/good pages.
1825 */
1826 if (page_bad == TRUE) {
1827 continue;
1828 }
1829 /*
1830 * If this good page is a continuation of the
1831 * previous set of good pages, then just increase
1832 * the end pointer. Otherwise start a new chunk.
1833 * Note that "end" points one higher than end,
1834 * making the range >= start and < end.
1835 * If we're also doing a speculative memory
1836 * test and we at or past the end, bump up Maxmem
1837 * so that we keep going. The first bad page
1838 * will terminate the loop.
1839 */
1840 if (phys_avail[pa_indx] == pa) {
1841 phys_avail[pa_indx] += PAGE_SIZE;
1842 } else {
1843 pa_indx++;
1844 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1845 printf(
1846 "Too many holes in the physical address space, giving up\n");
1847 pa_indx--;
1848 break;
1849 }
1850 phys_avail[pa_indx++] = pa; /* start */
1851 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
1852 }
1853 physmem++;
1854 }
1855 }
1856 *pte = 0;
1857 invltlb();
1858
1859 /*
1860 * XXX
1861 * The last chunk must contain at least one page plus the message
1862 * buffer to avoid complicating other code (message buffer address
1863 * calculation, etc.).
1864 */
1865 while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1866 round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
1867 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1868 phys_avail[pa_indx--] = 0;
1869 phys_avail[pa_indx--] = 0;
1870 }
1871
1872 Maxmem = atop(phys_avail[pa_indx]);
1873
1874 /* Trim off space for the message buffer. */
1875 phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
1876
1877 avail_end = phys_avail[pa_indx];
1878 }
1879
1880 void
1881 init386(first)
1882 int first;
1883 {
1884 struct gate_descriptor *gdp;
1885 int gsel_tss, metadata_missing, off, x;
1886 #ifndef SMP
1887 /* table descriptors - used to load tables by microp */
1888 struct region_descriptor r_gdt, r_idt;
1889 #endif
1890 struct pcpu *pc;
1891
1892 proc0.p_uarea = proc0uarea;
1893 thread0.td_kstack = proc0kstack;
1894 thread0.td_pcb = (struct pcb *)
1895 (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
1896 atdevbase = ISA_HOLE_START + KERNBASE;
1897
1898 /*
1899 * This may be done better later if it gets more high level
1900 * components in it. If so just link td->td_proc here.
1901 */
1902 proc_linkup(&proc0, &ksegrp0, &kse0, &thread0);
1903
1904 metadata_missing = 0;
1905 if (bootinfo.bi_modulep) {
1906 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
1907 preload_bootstrap_relocate(KERNBASE);
1908 } else {
1909 metadata_missing = 1;
1910 }
1911 if (envmode == 1)
1912 kern_envp = static_env;
1913 else if (bootinfo.bi_envp)
1914 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
1915
1916 /* Init basic tunables, hz etc */
1917 init_param1();
1918
1919 /*
1920 * make gdt memory segments, the code segment goes up to end of the
1921 * page with etext in it, the data segment goes to the end of
1922 * the address space
1923 */
1924 /*
1925 * XXX text protection is temporarily (?) disabled. The limit was
1926 * i386_btop(round_page(etext)) - 1.
1927 */
1928 gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
1929 gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
1930 #ifdef SMP
1931 pc = &SMP_prvspace[0].pcpu;
1932 gdt_segs[GPRIV_SEL].ssd_limit =
1933 atop(sizeof(struct privatespace) - 1);
1934 #else
1935 pc = &__pcpu;
1936 gdt_segs[GPRIV_SEL].ssd_limit =
1937 atop(sizeof(struct pcpu) - 1);
1938 #endif
1939 gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
1940 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
1941
1942 for (x = 0; x < NGDT; x++) {
1943 #ifdef BDE_DEBUGGER
1944 /* avoid overwriting db entries with APM ones */
1945 if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
1946 continue;
1947 #endif
1948 ssdtosd(&gdt_segs[x], &gdt[x].sd);
1949 }
1950
1951 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
1952 r_gdt.rd_base = (int) gdt;
1953 lgdt(&r_gdt);
1954
1955 pcpu_init(pc, 0, sizeof(struct pcpu));
1956 PCPU_SET(prvspace, pc);
1957 PCPU_SET(curthread, &thread0);
1958
1959 /*
1960 * Initialize mutexes.
1961 *
1962 * icu_lock: in order to allow an interrupt to occur in a critical
1963 * section, to set pcpu->ipending (etc...) properly, we
1964 * must be able to get the icu lock, so it can't be
1965 * under witness.
1966 */
1967 mutex_init();
1968 mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_RECURSE);
1969 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
1970
1971 /* make ldt memory segments */
1972 /*
1973 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it
1974 * should be spelled ...MAX_USER...
1975 */
1976 ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
1977 ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
1978 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
1979 ssdtosd(&ldt_segs[x], &ldt[x].sd);
1980
1981 _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1982 lldt(_default_ldt);
1983 PCPU_SET(currentldt, _default_ldt);
1984
1985 /* exceptions */
1986 for (x = 0; x < NIDT; x++)
1987 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
1988 GSEL(GCODE_SEL, SEL_KPL));
1989 setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL,
1990 GSEL(GCODE_SEL, SEL_KPL));
1991 setidt(1, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
1992 GSEL(GCODE_SEL, SEL_KPL));
1993 setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL,
1994 GSEL(GCODE_SEL, SEL_KPL));
1995 setidt(3, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
1996 GSEL(GCODE_SEL, SEL_KPL));
1997 setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL,
1998 GSEL(GCODE_SEL, SEL_KPL));
1999 setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL,
2000 GSEL(GCODE_SEL, SEL_KPL));
2001 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
2002 GSEL(GCODE_SEL, SEL_KPL));
2003 setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL
2004 , GSEL(GCODE_SEL, SEL_KPL));
2005 setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
2006 setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL,
2007 GSEL(GCODE_SEL, SEL_KPL));
2008 setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL,
2009 GSEL(GCODE_SEL, SEL_KPL));
2010 setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL,
2011 GSEL(GCODE_SEL, SEL_KPL));
2012 setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL,
2013 GSEL(GCODE_SEL, SEL_KPL));
2014 setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
2015 GSEL(GCODE_SEL, SEL_KPL));
2016 setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
2017 GSEL(GCODE_SEL, SEL_KPL));
2018 setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
2019 GSEL(GCODE_SEL, SEL_KPL));
2020 setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL,
2021 GSEL(GCODE_SEL, SEL_KPL));
2022 setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
2023 GSEL(GCODE_SEL, SEL_KPL));
2024 setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL,
2025 GSEL(GCODE_SEL, SEL_KPL));
2026 setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
2027 GSEL(GCODE_SEL, SEL_KPL));
2028 setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
2029 GSEL(GCODE_SEL, SEL_KPL));
2030
2031 r_idt.rd_limit = sizeof(idt0) - 1;
2032 r_idt.rd_base = (int) idt;
2033 lidt(&r_idt);
2034
2035 /*
2036 * Initialize the console before we print anything out.
2037 */
2038 cninit();
2039
2040 if (metadata_missing)
2041 printf("WARNING: loader(8) metadata is missing!\n");
2042
2043 #ifdef DEV_ISA
2044 isa_defaultirq();
2045 #endif
2046
2047 #ifdef DDB
2048 kdb_init();
2049 if (boothowto & RB_KDB)
2050 Debugger("Boot flags requested debugger");
2051 #endif
2052
2053 finishidentcpu(); /* Final stage of CPU initialization */
2054 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
2055 GSEL(GCODE_SEL, SEL_KPL));
2056 setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
2057 GSEL(GCODE_SEL, SEL_KPL));
2058 initializecpu(); /* Initialize CPU registers */
2059
2060 /* make an initial tss so cpu can get interrupt stack on syscall! */
2061 /* Note: -16 is so we can grow the trapframe if we came from vm86 */
2062 PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
2063 KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
2064 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
2065 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
2066 private_tss = 0;
2067 PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
2068 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
2069 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
2070 ltr(gsel_tss);
2071
2072 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
2073 dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
2074 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
2075 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
2076 dblfault_tss.tss_cr3 = (int)IdlePTD;
2077 dblfault_tss.tss_eip = (int)dblfault_handler;
2078 dblfault_tss.tss_eflags = PSL_KERNEL;
2079 dblfault_tss.tss_ds = dblfault_tss.tss_es =
2080 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
2081 dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
2082 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
2083 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
2084
2085 vm86_initialize();
2086 getmemsize(first);
2087 init_param2(physmem);
2088
2089 /* now running on new page tables, configured,and u/iom is accessible */
2090
2091 /* Map the message buffer. */
2092 for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
2093 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
2094
2095 msgbufinit(msgbufp, MSGBUF_SIZE);
2096
2097 /* make a call gate to reenter kernel with */
2098 gdp = &ldt[LSYS5CALLS_SEL].gd;
2099
2100 x = (int) &IDTVEC(lcall_syscall);
2101 gdp->gd_looffset = x;
2102 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
2103 gdp->gd_stkcpy = 1;
2104 gdp->gd_type = SDT_SYS386CGT;
2105 gdp->gd_dpl = SEL_UPL;
2106 gdp->gd_p = 1;
2107 gdp->gd_hioffset = x >> 16;
2108
2109 /* XXX does this work? */
2110 ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
2111 ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
2112
2113 /* transfer to user mode */
2114
2115 _ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
2116 _udatasel = LSEL(LUDATA_SEL, SEL_UPL);
2117
2118 /* setup proc 0's pcb */
2119 thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
2120 thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
2121 thread0.td_pcb->pcb_ext = 0;
2122 thread0.td_frame = &proc0_tf;
2123 }
2124
2125 void
2126 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
2127 {
2128 }
2129
2130 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
2131 static void f00f_hack(void *unused);
2132 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
2133
2134 static void
2135 f00f_hack(void *unused) {
2136 struct gate_descriptor *new_idt;
2137 #ifndef SMP
2138 struct region_descriptor r_idt;
2139 #endif
2140 vm_offset_t tmp;
2141
2142 if (!has_f00f_bug)
2143 return;
2144
2145 GIANT_REQUIRED;
2146
2147 printf("Intel Pentium detected, installing workaround for F00F bug\n");
2148
2149 r_idt.rd_limit = sizeof(idt0) - 1;
2150
2151 tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
2152 if (tmp == 0)
2153 panic("kmem_alloc returned 0");
2154 if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
2155 panic("kmem_alloc returned non-page-aligned memory");
2156 /* Put the first seven entries in the lower page */
2157 new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
2158 bcopy(idt, new_idt, sizeof(idt0));
2159 r_idt.rd_base = (int)new_idt;
2160 lidt(&r_idt);
2161 idt = new_idt;
2162 if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
2163 VM_PROT_READ, FALSE) != KERN_SUCCESS)
2164 panic("vm_map_protect failed");
2165 return;
2166 }
2167 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
2168
2169 int
2170 ptrace_set_pc(struct thread *td, unsigned long addr)
2171 {
2172 td->td_frame->tf_eip = addr;
2173 return (0);
2174 }
2175
2176 int
2177 ptrace_single_step(struct thread *td)
2178 {
2179 td->td_frame->tf_eflags |= PSL_T;
2180 return (0);
2181 }
2182
2183 int
2184 fill_regs(struct thread *td, struct reg *regs)
2185 {
2186 struct pcb *pcb;
2187 struct trapframe *tp;
2188
2189 tp = td->td_frame;
2190 regs->r_fs = tp->tf_fs;
2191 regs->r_es = tp->tf_es;
2192 regs->r_ds = tp->tf_ds;
2193 regs->r_edi = tp->tf_edi;
2194 regs->r_esi = tp->tf_esi;
2195 regs->r_ebp = tp->tf_ebp;
2196 regs->r_ebx = tp->tf_ebx;
2197 regs->r_edx = tp->tf_edx;
2198 regs->r_ecx = tp->tf_ecx;
2199 regs->r_eax = tp->tf_eax;
2200 regs->r_eip = tp->tf_eip;
2201 regs->r_cs = tp->tf_cs;
2202 regs->r_eflags = tp->tf_eflags;
2203 regs->r_esp = tp->tf_esp;
2204 regs->r_ss = tp->tf_ss;
2205 pcb = td->td_pcb;
2206 regs->r_gs = pcb->pcb_gs;
2207 return (0);
2208 }
2209
2210 int
2211 set_regs(struct thread *td, struct reg *regs)
2212 {
2213 struct pcb *pcb;
2214 struct trapframe *tp;
2215
2216 tp = td->td_frame;
2217 if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
2218 !CS_SECURE(regs->r_cs))
2219 return (EINVAL);
2220 tp->tf_fs = regs->r_fs;
2221 tp->tf_es = regs->r_es;
2222 tp->tf_ds = regs->r_ds;
2223 tp->tf_edi = regs->r_edi;
2224 tp->tf_esi = regs->r_esi;
2225 tp->tf_ebp = regs->r_ebp;
2226 tp->tf_ebx = regs->r_ebx;
2227 tp->tf_edx = regs->r_edx;
2228 tp->tf_ecx = regs->r_ecx;
2229 tp->tf_eax = regs->r_eax;
2230 tp->tf_eip = regs->r_eip;
2231 tp->tf_cs = regs->r_cs;
2232 tp->tf_eflags = regs->r_eflags;
2233 tp->tf_esp = regs->r_esp;
2234 tp->tf_ss = regs->r_ss;
2235 pcb = td->td_pcb;
2236 pcb->pcb_gs = regs->r_gs;
2237 return (0);
2238 }
2239
2240 #ifdef CPU_ENABLE_SSE
2241 static void
2242 fill_fpregs_xmm(sv_xmm, sv_87)
2243 struct savexmm *sv_xmm;
2244 struct save87 *sv_87;
2245 {
2246 register struct env87 *penv_87 = &sv_87->sv_env;
2247 register struct envxmm *penv_xmm = &sv_xmm->sv_env;
2248 int i;
2249
2250 bzero(sv_87, sizeof(*sv_87));
2251
2252 /* FPU control/status */
2253 penv_87->en_cw = penv_xmm->en_cw;
2254 penv_87->en_sw = penv_xmm->en_sw;
2255 penv_87->en_tw = penv_xmm->en_tw;
2256 penv_87->en_fip = penv_xmm->en_fip;
2257 penv_87->en_fcs = penv_xmm->en_fcs;
2258 penv_87->en_opcode = penv_xmm->en_opcode;
2259 penv_87->en_foo = penv_xmm->en_foo;
2260 penv_87->en_fos = penv_xmm->en_fos;
2261
2262 /* FPU registers */
2263 for (i = 0; i < 8; ++i)
2264 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
2265 }
2266
2267 static void
2268 set_fpregs_xmm(sv_87, sv_xmm)
2269 struct save87 *sv_87;
2270 struct savexmm *sv_xmm;
2271 {
2272 register struct env87 *penv_87 = &sv_87->sv_env;
2273 register struct envxmm *penv_xmm = &sv_xmm->sv_env;
2274 int i;
2275
2276 /* FPU control/status */
2277 penv_xmm->en_cw = penv_87->en_cw;
2278 penv_xmm->en_sw = penv_87->en_sw;
2279 penv_xmm->en_tw = penv_87->en_tw;
2280 penv_xmm->en_fip = penv_87->en_fip;
2281 penv_xmm->en_fcs = penv_87->en_fcs;
2282 penv_xmm->en_opcode = penv_87->en_opcode;
2283 penv_xmm->en_foo = penv_87->en_foo;
2284 penv_xmm->en_fos = penv_87->en_fos;
2285
2286 /* FPU registers */
2287 for (i = 0; i < 8; ++i)
2288 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
2289 }
2290 #endif /* CPU_ENABLE_SSE */
2291
2292 int
2293 fill_fpregs(struct thread *td, struct fpreg *fpregs)
2294 {
2295 #ifdef CPU_ENABLE_SSE
2296 if (cpu_fxsr) {
2297 fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
2298 (struct save87 *)fpregs);
2299 return (0);
2300 }
2301 #endif /* CPU_ENABLE_SSE */
2302 bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
2303 return (0);
2304 }
2305
2306 int
2307 set_fpregs(struct thread *td, struct fpreg *fpregs)
2308 {
2309 #ifdef CPU_ENABLE_SSE
2310 if (cpu_fxsr) {
2311 set_fpregs_xmm((struct save87 *)fpregs,
2312 &td->td_pcb->pcb_save.sv_xmm);
2313 return (0);
2314 }
2315 #endif /* CPU_ENABLE_SSE */
2316 bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
2317 return (0);
2318 }
2319
2320 /*
2321 * Get machine context.
2322 */
2323 int
2324 get_mcontext(struct thread *td, mcontext_t *mcp)
2325 {
2326 struct trapframe *tp;
2327
2328 tp = td->td_frame;
2329
2330 mcp->mc_onstack = sigonstack(tp->tf_esp);
2331 mcp->mc_gs = td->td_pcb->pcb_gs;
2332 mcp->mc_fs = tp->tf_fs;
2333 mcp->mc_es = tp->tf_es;
2334 mcp->mc_ds = tp->tf_ds;
2335 mcp->mc_edi = tp->tf_edi;
2336 mcp->mc_esi = tp->tf_esi;
2337 mcp->mc_ebp = tp->tf_ebp;
2338 mcp->mc_isp = tp->tf_isp;
2339 mcp->mc_ebx = tp->tf_ebx;
2340 mcp->mc_edx = tp->tf_edx;
2341 mcp->mc_ecx = tp->tf_ecx;
2342 mcp->mc_eax = tp->tf_eax;
2343 mcp->mc_eip = tp->tf_eip;
2344 mcp->mc_cs = tp->tf_cs;
2345 mcp->mc_eflags = tp->tf_eflags;
2346 mcp->mc_esp = tp->tf_esp;
2347 mcp->mc_ss = tp->tf_ss;
2348 mcp->mc_len = sizeof(*mcp);
2349 get_fpcontext(td, mcp);
2350 return (0);
2351 }
2352
2353 /*
2354 * Set machine context.
2355 *
2356 * However, we don't set any but the user modifiable flags, and we won't
2357 * touch the cs selector.
2358 */
2359 int
2360 set_mcontext(struct thread *td, const mcontext_t *mcp)
2361 {
2362 struct trapframe *tp;
2363 int eflags, ret;
2364
2365 tp = td->td_frame;
2366 if (mcp->mc_len != sizeof(*mcp))
2367 return (EINVAL);
2368 eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
2369 (tp->tf_eflags & ~PSL_USERCHANGE);
2370 if ((ret = set_fpcontext(td, mcp)) == 0) {
2371 tp->tf_fs = mcp->mc_fs;
2372 tp->tf_es = mcp->mc_es;
2373 tp->tf_ds = mcp->mc_ds;
2374 tp->tf_edi = mcp->mc_edi;
2375 tp->tf_esi = mcp->mc_esi;
2376 tp->tf_ebp = mcp->mc_ebp;
2377 tp->tf_ebx = mcp->mc_ebx;
2378 tp->tf_edx = mcp->mc_edx;
2379 tp->tf_ecx = mcp->mc_ecx;
2380 tp->tf_eax = mcp->mc_eax;
2381 tp->tf_eip = mcp->mc_eip;
2382 tp->tf_eflags = eflags;
2383 tp->tf_esp = mcp->mc_esp;
2384 tp->tf_ss = mcp->mc_ss;
2385 td->td_pcb->pcb_gs = mcp->mc_gs;
2386 ret = 0;
2387 }
2388 return (ret);
2389 }
2390
2391 static void
2392 get_fpcontext(struct thread *td, mcontext_t *mcp)
2393 {
2394 #ifndef DEV_NPX
2395 mcp->mc_fpformat = _MC_FPFMT_NODEV;
2396 mcp->mc_ownedfp = _MC_FPOWNED_NONE;
2397 #else
2398 union savefpu *addr;
2399
2400 /*
2401 * XXX mc_fpstate might be misaligned, since its declaration is not
2402 * unportabilized using __attribute__((aligned(16))) like the
2403 * declaration of struct savemm, and anyway, alignment doesn't work
2404 * for auto variables since we don't use gcc's pessimal stack
2405 * alignment. Work around this by abusing the spare fields after
2406 * mcp->mc_fpstate.
2407 *
2408 * XXX unpessimize most cases by only aligning when fxsave might be
2409 * called, although this requires knowing too much about
2410 * npxgetregs()'s internals.
2411 */
2412 addr = (union savefpu *)&mcp->mc_fpstate;
2413 if (td == PCPU_GET(fpcurthread) &&
2414 #ifdef CPU_ENABLE_SSE
2415 cpu_fxsr &&
2416 #endif
2417 ((uintptr_t)(void *)addr & 0xF)) {
2418 do
2419 addr = (void *)((char *)addr + 4);
2420 while ((uintptr_t)(void *)addr & 0xF);
2421 }
2422 mcp->mc_ownedfp = npxgetregs(td, addr);
2423 if (addr != (union savefpu *)&mcp->mc_fpstate) {
2424 bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
2425 bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
2426 }
2427 mcp->mc_fpformat = npxformat();
2428 #endif
2429 }
2430
2431 static int
2432 set_fpcontext(struct thread *td, const mcontext_t *mcp)
2433 {
2434 union savefpu *addr;
2435
2436 if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
2437 return (0);
2438 else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
2439 mcp->mc_fpformat != _MC_FPFMT_XMM)
2440 return (EINVAL);
2441 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
2442 /* We don't care what state is left in the FPU or PCB. */
2443 fpstate_drop(td);
2444 else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
2445 mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
2446 /* XXX align as above. */
2447 addr = (union savefpu *)&mcp->mc_fpstate;
2448 if (td == PCPU_GET(fpcurthread) &&
2449 #ifdef CPU_ENABLE_SSE
2450 cpu_fxsr &&
2451 #endif
2452 ((uintptr_t)(void *)addr & 0xF)) {
2453 do
2454 addr = (void *)((char *)addr + 4);
2455 while ((uintptr_t)(void *)addr & 0xF);
2456 bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
2457 }
2458 #ifdef DEV_NPX
2459 /*
2460 * XXX we violate the dubious requirement that npxsetregs()
2461 * be called with interrupts disabled.
2462 */
2463 npxsetregs(td, addr);
2464 #endif
2465 /*
2466 * Don't bother putting things back where they were in the
2467 * misaligned case, since we know that the caller won't use
2468 * them again.
2469 */
2470 } else
2471 return (EINVAL);
2472 return (0);
2473 }
2474
2475 static void
2476 fpstate_drop(struct thread *td)
2477 {
2478 register_t s;
2479
2480 s = intr_disable();
2481 #ifdef DEV_NPX
2482 if (PCPU_GET(fpcurthread) == td)
2483 npxdrop();
2484 #endif
2485 /*
2486 * XXX force a full drop of the npx. The above only drops it if we
2487 * owned it. npxgetregs() has the same bug in the !cpu_fxsr case.
2488 *
2489 * XXX I don't much like npxgetregs()'s semantics of doing a full
2490 * drop. Dropping only to the pcb matches fnsave's behaviour.
2491 * We only need to drop to !PCB_INITDONE in sendsig(). But
2492 * sendsig() is the only caller of npxgetregs()... perhaps we just
2493 * have too many layers.
2494 */
2495 curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
2496 intr_restore(s);
2497 }
2498
2499 int
2500 fill_dbregs(struct thread *td, struct dbreg *dbregs)
2501 {
2502 struct pcb *pcb;
2503
2504 if (td == NULL) {
2505 dbregs->dr[0] = rdr0();
2506 dbregs->dr[1] = rdr1();
2507 dbregs->dr[2] = rdr2();
2508 dbregs->dr[3] = rdr3();
2509 dbregs->dr[4] = rdr4();
2510 dbregs->dr[5] = rdr5();
2511 dbregs->dr[6] = rdr6();
2512 dbregs->dr[7] = rdr7();
2513 } else {
2514 pcb = td->td_pcb;
2515 dbregs->dr[0] = pcb->pcb_dr0;
2516 dbregs->dr[1] = pcb->pcb_dr1;
2517 dbregs->dr[2] = pcb->pcb_dr2;
2518 dbregs->dr[3] = pcb->pcb_dr3;
2519 dbregs->dr[4] = 0;
2520 dbregs->dr[5] = 0;
2521 dbregs->dr[6] = pcb->pcb_dr6;
2522 dbregs->dr[7] = pcb->pcb_dr7;
2523 }
2524 return (0);
2525 }
2526
2527 int
2528 set_dbregs(struct thread *td, struct dbreg *dbregs)
2529 {
2530 struct pcb *pcb;
2531 int i;
2532 u_int32_t mask1, mask2;
2533
2534 if (td == NULL) {
2535 load_dr0(dbregs->dr[0]);
2536 load_dr1(dbregs->dr[1]);
2537 load_dr2(dbregs->dr[2]);
2538 load_dr3(dbregs->dr[3]);
2539 load_dr4(dbregs->dr[4]);
2540 load_dr5(dbregs->dr[5]);
2541 load_dr6(dbregs->dr[6]);
2542 load_dr7(dbregs->dr[7]);
2543 } else {
2544 /*
2545 * Don't let an illegal value for dr7 get set. Specifically,
2546 * check for undefined settings. Setting these bit patterns
2547 * result in undefined behaviour and can lead to an unexpected
2548 * TRCTRAP.
2549 */
2550 for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8;
2551 i++, mask1 <<= 2, mask2 <<= 2)
2552 if ((dbregs->dr[7] & mask1) == mask2)
2553 return (EINVAL);
2554
2555 pcb = td->td_pcb;
2556
2557 /*
2558 * Don't let a process set a breakpoint that is not within the
2559 * process's address space. If a process could do this, it
2560 * could halt the system by setting a breakpoint in the kernel
2561 * (if ddb was enabled). Thus, we need to check to make sure
2562 * that no breakpoints are being enabled for addresses outside
2563 * process's address space, unless, perhaps, we were called by
2564 * uid 0.
2565 *
2566 * XXX - what about when the watched area of the user's
2567 * address space is written into from within the kernel
2568 * ... wouldn't that still cause a breakpoint to be generated
2569 * from within kernel mode?
2570 */
2571
2572 if (suser(td) != 0) {
2573 if (dbregs->dr[7] & 0x3) {
2574 /* dr0 is enabled */
2575 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
2576 return (EINVAL);
2577 }
2578
2579 if (dbregs->dr[7] & (0x3<<2)) {
2580 /* dr1 is enabled */
2581 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
2582 return (EINVAL);
2583 }
2584
2585 if (dbregs->dr[7] & (0x3<<4)) {
2586 /* dr2 is enabled */
2587 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
2588 return (EINVAL);
2589 }
2590
2591 if (dbregs->dr[7] & (0x3<<6)) {
2592 /* dr3 is enabled */
2593 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
2594 return (EINVAL);
2595 }
2596 }
2597
2598 pcb->pcb_dr0 = dbregs->dr[0];
2599 pcb->pcb_dr1 = dbregs->dr[1];
2600 pcb->pcb_dr2 = dbregs->dr[2];
2601 pcb->pcb_dr3 = dbregs->dr[3];
2602 pcb->pcb_dr6 = dbregs->dr[6];
2603 pcb->pcb_dr7 = dbregs->dr[7];
2604
2605 pcb->pcb_flags |= PCB_DBREGS;
2606 }
2607
2608 return (0);
2609 }
2610
2611 /*
2612 * Return > 0 if a hardware breakpoint has been hit, and the
2613 * breakpoint was in user space. Return 0, otherwise.
2614 */
2615 int
2616 user_dbreg_trap(void)
2617 {
2618 u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
2619 u_int32_t bp; /* breakpoint bits extracted from dr6 */
2620 int nbp; /* number of breakpoints that triggered */
2621 caddr_t addr[4]; /* breakpoint addresses */
2622 int i;
2623
2624 dr7 = rdr7();
2625 if ((dr7 & 0x000000ff) == 0) {
2626 /*
2627 * all GE and LE bits in the dr7 register are zero,
2628 * thus the trap couldn't have been caused by the
2629 * hardware debug registers
2630 */
2631 return 0;
2632 }
2633
2634 nbp = 0;
2635 dr6 = rdr6();
2636 bp = dr6 & 0x0000000f;
2637
2638 if (!bp) {
2639 /*
2640 * None of the breakpoint bits are set meaning this
2641 * trap was not caused by any of the debug registers
2642 */
2643 return 0;
2644 }
2645
2646 /*
2647 * at least one of the breakpoints were hit, check to see
2648 * which ones and if any of them are user space addresses
2649 */
2650
2651 if (bp & 0x01) {
2652 addr[nbp++] = (caddr_t)rdr0();
2653 }
2654 if (bp & 0x02) {
2655 addr[nbp++] = (caddr_t)rdr1();
2656 }
2657 if (bp & 0x04) {
2658 addr[nbp++] = (caddr_t)rdr2();
2659 }
2660 if (bp & 0x08) {
2661 addr[nbp++] = (caddr_t)rdr3();
2662 }
2663
2664 for (i=0; i<nbp; i++) {
2665 if (addr[i] <
2666 (caddr_t)VM_MAXUSER_ADDRESS) {
2667 /*
2668 * addr[i] is in user space
2669 */
2670 return nbp;
2671 }
2672 }
2673
2674 /*
2675 * None of the breakpoints are in user space.
2676 */
2677 return 0;
2678 }
2679
2680
2681 #ifndef DDB
2682 void
2683 Debugger(const char *msg)
2684 {
2685 printf("Debugger(\"%s\") called.\n", msg);
2686 }
2687 #endif /* no DDB */
2688
2689 #ifdef DDB
2690
2691 /*
2692 * Provide inb() and outb() as functions. They are normally only
2693 * available as macros calling inlined functions, thus cannot be
2694 * called inside DDB.
2695 *
2696 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
2697 */
2698
2699 #undef inb
2700 #undef outb
2701
2702 /* silence compiler warnings */
2703 u_char inb(u_int);
2704 void outb(u_int, u_char);
2705
2706 u_char
2707 inb(u_int port)
2708 {
2709 u_char data;
2710 /*
2711 * We use %%dx and not %1 here because i/o is done at %dx and not at
2712 * %edx, while gcc generates inferior code (movw instead of movl)
2713 * if we tell it to load (u_short) port.
2714 */
2715 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
2716 return (data);
2717 }
2718
2719 void
2720 outb(u_int port, u_char data)
2721 {
2722 u_char al;
2723 /*
2724 * Use an unnecessary assignment to help gcc's register allocator.
2725 * This make a large difference for gcc-1.40 and a tiny difference
2726 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
2727 * best results. gcc-2.6.0 can't handle this.
2728 */
2729 al = data;
2730 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
2731 }
2732
2733 #endif /* DDB */
Cache object: 203940056471d8b025f830dfd4a12798
|