1 /*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: releng/6.0/sys/pc98/pc98/machdep.c 148486 2005-07-28 17:22:39Z nyan $");
42
43 #include "opt_atalk.h"
44 #include "opt_compat.h"
45 #include "opt_cpu.h"
46 #include "opt_ddb.h"
47 #include "opt_inet.h"
48 #include "opt_ipx.h"
49 #include "opt_isa.h"
50 #include "opt_kstack_pages.h"
51 #include "opt_maxmem.h"
52 #include "opt_msgbuf.h"
53 #include "opt_npx.h"
54 #include "opt_perfmon.h"
55
56 #include <sys/param.h>
57 #include <sys/proc.h>
58 #include <sys/systm.h>
59 #include <sys/bio.h>
60 #include <sys/buf.h>
61 #include <sys/bus.h>
62 #include <sys/callout.h>
63 #include <sys/cons.h>
64 #include <sys/cpu.h>
65 #include <sys/eventhandler.h>
66 #include <sys/exec.h>
67 #include <sys/imgact.h>
68 #include <sys/kdb.h>
69 #include <sys/kernel.h>
70 #include <sys/ktr.h>
71 #include <sys/linker.h>
72 #include <sys/lock.h>
73 #include <sys/malloc.h>
74 #include <sys/memrange.h>
75 #include <sys/msgbuf.h>
76 #include <sys/mutex.h>
77 #include <sys/pcpu.h>
78 #include <sys/ptrace.h>
79 #include <sys/reboot.h>
80 #include <sys/sched.h>
81 #include <sys/signalvar.h>
82 #include <sys/sysctl.h>
83 #include <sys/sysent.h>
84 #include <sys/sysproto.h>
85 #include <sys/ucontext.h>
86 #include <sys/vmmeter.h>
87
88 #include <vm/vm.h>
89 #include <vm/vm_extern.h>
90 #include <vm/vm_kern.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_map.h>
93 #include <vm/vm_object.h>
94 #include <vm/vm_pager.h>
95 #include <vm/vm_param.h>
96
97 #ifdef DDB
98 #ifndef KDB
99 #error KDB must be enabled in order for DDB to work!
100 #endif
101 #include <ddb/ddb.h>
102 #include <ddb/db_sym.h>
103 #endif
104
105 #include <pc98/pc98/pc98_machdep.h>
106 #include <pc98/cbus/cbus.h>
107
108 #include <net/netisr.h>
109
110 #include <machine/bootinfo.h>
111 #include <machine/clock.h>
112 #include <machine/cpu.h>
113 #include <machine/cputypes.h>
114 #include <machine/intr_machdep.h>
115 #include <machine/md_var.h>
116 #include <machine/pc/bios.h>
117 #include <machine/pcb.h>
118 #include <machine/pcb_ext.h>
119 #include <machine/proc.h>
120 #include <machine/reg.h>
121 #include <machine/sigframe.h>
122 #include <machine/specialreg.h>
123 #include <machine/vm86.h>
124 #ifdef PERFMON
125 #include <machine/perfmon.h>
126 #endif
127 #ifdef SMP
128 #include <machine/privatespace.h>
129 #include <machine/smp.h>
130 #endif
131
132 #ifdef DEV_ISA
133 #include <i386/isa/icu.h>
134 #endif
135
136 /* Sanity check for __curthread() */
137 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
138
139 extern void init386(int first);
140 extern void dblfault_handler(void);
141
142 extern void printcpuinfo(void); /* XXX header file */
143 extern void finishidentcpu(void);
144 extern void panicifcpuunsupported(void);
145 extern void initializecpu(void);
146
147 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
148 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
149
150 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
151 #define CPU_ENABLE_SSE
152 #endif
153
154 static void cpu_startup(void *);
155 static void fpstate_drop(struct thread *td);
156 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
157 static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
158 #ifdef CPU_ENABLE_SSE
159 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
160 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
161 #endif /* CPU_ENABLE_SSE */
162 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
163
164 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */
165 int need_post_dma_flush; /* If 1, use invd after DMA transfer. */
166
167 #ifdef DDB
168 extern vm_offset_t ksym_start, ksym_end;
169 #endif
170
171 int _udatasel, _ucodesel;
172 u_int basemem;
173
174 static int ispc98 = 1;
175 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
176
177 int cold = 1;
178
179 #ifdef COMPAT_43
180 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
181 #endif
182 #ifdef COMPAT_FREEBSD4
183 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
184 u_long code);
185 #endif
186
187 long Maxmem = 0;
188 long realmem = 0;
189
190 vm_paddr_t phys_avail[10];
191 vm_paddr_t dump_avail[10];
192
193 /* must be 2 less so 0 0 can signal end of chunks */
194 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
195 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
196
197 struct kva_md_info kmi;
198
199 static struct trapframe proc0_tf;
200 #ifndef SMP
201 static struct pcpu __pcpu;
202 #endif
203
204 struct mtx icu_lock;
205
206 struct mem_range_softc mem_range_softc;
207
208 static void
209 cpu_startup(dummy)
210 void *dummy;
211 {
212 /*
213 * Good {morning,afternoon,evening,night}.
214 */
215 startrtclock();
216 printcpuinfo();
217 panicifcpuunsupported();
218 #ifdef PERFMON
219 perfmon_init();
220 #endif
221 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
222 ptoa((uintmax_t)Maxmem) / 1048576);
223 realmem = Maxmem;
224 /*
225 * Display any holes after the first chunk of extended memory.
226 */
227 if (bootverbose) {
228 int indx;
229
230 printf("Physical memory chunk(s):\n");
231 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
232 vm_paddr_t size;
233
234 size = phys_avail[indx + 1] - phys_avail[indx];
235 printf(
236 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
237 (uintmax_t)phys_avail[indx],
238 (uintmax_t)phys_avail[indx + 1] - 1,
239 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
240 }
241 }
242
243 vm_ksubmap_init(&kmi);
244
245 printf("avail memory = %ju (%ju MB)\n",
246 ptoa((uintmax_t)cnt.v_free_count),
247 ptoa((uintmax_t)cnt.v_free_count) / 1048576);
248
249 /*
250 * Set up buffers, so they can be used to read disk labels.
251 */
252 bufinit();
253 vm_pager_bufferinit();
254
255 cpu_setregs();
256 }
257
258 /*
259 * Send an interrupt to process.
260 *
261 * Stack is set up to allow sigcode stored
262 * at top to call routine, followed by kcall
263 * to sigreturn routine below. After sigreturn
264 * resets the signal mask, the stack, and the
265 * frame pointer, it returns to the user
266 * specified pc, psl.
267 */
268 #ifdef COMPAT_43
269 static void
270 osendsig(catcher, sig, mask, code)
271 sig_t catcher;
272 int sig;
273 sigset_t *mask;
274 u_long code;
275 {
276 struct osigframe sf, *fp;
277 struct proc *p;
278 struct thread *td;
279 struct sigacts *psp;
280 struct trapframe *regs;
281 int oonstack;
282
283 td = curthread;
284 p = td->td_proc;
285 PROC_LOCK_ASSERT(p, MA_OWNED);
286 psp = p->p_sigacts;
287 mtx_assert(&psp->ps_mtx, MA_OWNED);
288 regs = td->td_frame;
289 oonstack = sigonstack(regs->tf_esp);
290
291 /* Allocate space for the signal handler context. */
292 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
293 SIGISMEMBER(psp->ps_sigonstack, sig)) {
294 fp = (struct osigframe *)(td->td_sigstk.ss_sp +
295 td->td_sigstk.ss_size - sizeof(struct osigframe));
296 #if defined(COMPAT_43)
297 td->td_sigstk.ss_flags |= SS_ONSTACK;
298 #endif
299 } else
300 fp = (struct osigframe *)regs->tf_esp - 1;
301
302 /* Translate the signal if appropriate. */
303 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
304 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
305
306 /* Build the argument list for the signal handler. */
307 sf.sf_signum = sig;
308 sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
309 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
310 /* Signal handler installed with SA_SIGINFO. */
311 sf.sf_arg2 = (register_t)&fp->sf_siginfo;
312 sf.sf_siginfo.si_signo = sig;
313 sf.sf_siginfo.si_code = code;
314 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
315 } else {
316 /* Old FreeBSD-style arguments. */
317 sf.sf_arg2 = code;
318 sf.sf_addr = regs->tf_err;
319 sf.sf_ahu.sf_handler = catcher;
320 }
321 mtx_unlock(&psp->ps_mtx);
322 PROC_UNLOCK(p);
323
324 /* Save most if not all of trap frame. */
325 sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
326 sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
327 sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
328 sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
329 sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
330 sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
331 sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
332 sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
333 sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
334 sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
335 sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
336 sf.sf_siginfo.si_sc.sc_gs = rgs();
337 sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
338
339 /* Build the signal context to be used by osigreturn(). */
340 sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
341 SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
342 sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
343 sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
344 sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
345 sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
346 sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
347 sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
348
349 /*
350 * If we're a vm86 process, we want to save the segment registers.
351 * We also change eflags to be our emulated eflags, not the actual
352 * eflags.
353 */
354 if (regs->tf_eflags & PSL_VM) {
355 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
356 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
357 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
358
359 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
360 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
361 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
362 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
363
364 if (vm86->vm86_has_vme == 0)
365 sf.sf_siginfo.si_sc.sc_ps =
366 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
367 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
368
369 /* See sendsig() for comments. */
370 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
371 }
372
373 /*
374 * Copy the sigframe out to the user's stack.
375 */
376 if (copyout(&sf, fp, sizeof(*fp)) != 0) {
377 #ifdef DEBUG
378 printf("process %ld has trashed its stack\n", (long)p->p_pid);
379 #endif
380 PROC_LOCK(p);
381 sigexit(td, SIGILL);
382 }
383
384 regs->tf_esp = (int)fp;
385 regs->tf_eip = PS_STRINGS - szosigcode;
386 regs->tf_eflags &= ~PSL_T;
387 regs->tf_cs = _ucodesel;
388 regs->tf_ds = _udatasel;
389 regs->tf_es = _udatasel;
390 regs->tf_fs = _udatasel;
391 load_gs(_udatasel);
392 regs->tf_ss = _udatasel;
393 PROC_LOCK(p);
394 mtx_lock(&psp->ps_mtx);
395 }
396 #endif /* COMPAT_43 */
397
398 #ifdef COMPAT_FREEBSD4
399 static void
400 freebsd4_sendsig(catcher, sig, mask, code)
401 sig_t catcher;
402 int sig;
403 sigset_t *mask;
404 u_long code;
405 {
406 struct sigframe4 sf, *sfp;
407 struct proc *p;
408 struct thread *td;
409 struct sigacts *psp;
410 struct trapframe *regs;
411 int oonstack;
412
413 td = curthread;
414 p = td->td_proc;
415 PROC_LOCK_ASSERT(p, MA_OWNED);
416 psp = p->p_sigacts;
417 mtx_assert(&psp->ps_mtx, MA_OWNED);
418 regs = td->td_frame;
419 oonstack = sigonstack(regs->tf_esp);
420
421 /* Save user context. */
422 bzero(&sf, sizeof(sf));
423 sf.sf_uc.uc_sigmask = *mask;
424 sf.sf_uc.uc_stack = td->td_sigstk;
425 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
426 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
427 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
428 sf.sf_uc.uc_mcontext.mc_gs = rgs();
429 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
430
431 /* Allocate space for the signal handler context. */
432 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
433 SIGISMEMBER(psp->ps_sigonstack, sig)) {
434 sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
435 td->td_sigstk.ss_size - sizeof(struct sigframe4));
436 #if defined(COMPAT_43)
437 td->td_sigstk.ss_flags |= SS_ONSTACK;
438 #endif
439 } else
440 sfp = (struct sigframe4 *)regs->tf_esp - 1;
441
442 /* Translate the signal if appropriate. */
443 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
444 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
445
446 /* Build the argument list for the signal handler. */
447 sf.sf_signum = sig;
448 sf.sf_ucontext = (register_t)&sfp->sf_uc;
449 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
450 /* Signal handler installed with SA_SIGINFO. */
451 sf.sf_siginfo = (register_t)&sfp->sf_si;
452 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
453
454 /* Fill in POSIX parts */
455 sf.sf_si.si_signo = sig;
456 sf.sf_si.si_code = code;
457 sf.sf_si.si_addr = (void *)regs->tf_err;
458 } else {
459 /* Old FreeBSD-style arguments. */
460 sf.sf_siginfo = code;
461 sf.sf_addr = regs->tf_err;
462 sf.sf_ahu.sf_handler = catcher;
463 }
464 mtx_unlock(&psp->ps_mtx);
465 PROC_UNLOCK(p);
466
467 /*
468 * If we're a vm86 process, we want to save the segment registers.
469 * We also change eflags to be our emulated eflags, not the actual
470 * eflags.
471 */
472 if (regs->tf_eflags & PSL_VM) {
473 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
474 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
475
476 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
477 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
478 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
479 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
480
481 if (vm86->vm86_has_vme == 0)
482 sf.sf_uc.uc_mcontext.mc_eflags =
483 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
484 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
485
486 /*
487 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
488 * syscalls made by the signal handler. This just avoids
489 * wasting time for our lazy fixup of such faults. PSL_NT
490 * does nothing in vm86 mode, but vm86 programs can set it
491 * almost legitimately in probes for old cpu types.
492 */
493 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
494 }
495
496 /*
497 * Copy the sigframe out to the user's stack.
498 */
499 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
500 #ifdef DEBUG
501 printf("process %ld has trashed its stack\n", (long)p->p_pid);
502 #endif
503 PROC_LOCK(p);
504 sigexit(td, SIGILL);
505 }
506
507 regs->tf_esp = (int)sfp;
508 regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
509 regs->tf_eflags &= ~PSL_T;
510 regs->tf_cs = _ucodesel;
511 regs->tf_ds = _udatasel;
512 regs->tf_es = _udatasel;
513 regs->tf_fs = _udatasel;
514 regs->tf_ss = _udatasel;
515 PROC_LOCK(p);
516 mtx_lock(&psp->ps_mtx);
517 }
518 #endif /* COMPAT_FREEBSD4 */
519
520 void
521 sendsig(catcher, sig, mask, code)
522 sig_t catcher;
523 int sig;
524 sigset_t *mask;
525 u_long code;
526 {
527 struct sigframe sf, *sfp;
528 struct proc *p;
529 struct thread *td;
530 struct sigacts *psp;
531 char *sp;
532 struct trapframe *regs;
533 int oonstack;
534
535 td = curthread;
536 p = td->td_proc;
537 PROC_LOCK_ASSERT(p, MA_OWNED);
538 psp = p->p_sigacts;
539 mtx_assert(&psp->ps_mtx, MA_OWNED);
540 #ifdef COMPAT_FREEBSD4
541 if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
542 freebsd4_sendsig(catcher, sig, mask, code);
543 return;
544 }
545 #endif
546 #ifdef COMPAT_43
547 if (SIGISMEMBER(psp->ps_osigset, sig)) {
548 osendsig(catcher, sig, mask, code);
549 return;
550 }
551 #endif
552 regs = td->td_frame;
553 oonstack = sigonstack(regs->tf_esp);
554
555 /* Save user context. */
556 bzero(&sf, sizeof(sf));
557 sf.sf_uc.uc_sigmask = *mask;
558 sf.sf_uc.uc_stack = td->td_sigstk;
559 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
560 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
561 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
562 sf.sf_uc.uc_mcontext.mc_gs = rgs();
563 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
564 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
565 get_fpcontext(td, &sf.sf_uc.uc_mcontext);
566 fpstate_drop(td);
567
568 /* Allocate space for the signal handler context. */
569 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
570 SIGISMEMBER(psp->ps_sigonstack, sig)) {
571 sp = td->td_sigstk.ss_sp +
572 td->td_sigstk.ss_size - sizeof(struct sigframe);
573 #if defined(COMPAT_43)
574 td->td_sigstk.ss_flags |= SS_ONSTACK;
575 #endif
576 } else
577 sp = (char *)regs->tf_esp - sizeof(struct sigframe);
578 /* Align to 16 bytes. */
579 sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
580
581 /* Translate the signal if appropriate. */
582 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
583 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
584
585 /* Build the argument list for the signal handler. */
586 sf.sf_signum = sig;
587 sf.sf_ucontext = (register_t)&sfp->sf_uc;
588 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
589 /* Signal handler installed with SA_SIGINFO. */
590 sf.sf_siginfo = (register_t)&sfp->sf_si;
591 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
592
593 /* Fill in POSIX parts */
594 sf.sf_si.si_signo = sig;
595 sf.sf_si.si_code = code;
596 sf.sf_si.si_addr = (void *)regs->tf_err;
597 } else {
598 /* Old FreeBSD-style arguments. */
599 sf.sf_siginfo = code;
600 sf.sf_addr = regs->tf_err;
601 sf.sf_ahu.sf_handler = catcher;
602 }
603 mtx_unlock(&psp->ps_mtx);
604 PROC_UNLOCK(p);
605
606 /*
607 * If we're a vm86 process, we want to save the segment registers.
608 * We also change eflags to be our emulated eflags, not the actual
609 * eflags.
610 */
611 if (regs->tf_eflags & PSL_VM) {
612 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
613 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
614
615 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
616 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
617 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
618 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
619
620 if (vm86->vm86_has_vme == 0)
621 sf.sf_uc.uc_mcontext.mc_eflags =
622 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
623 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
624
625 /*
626 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
627 * syscalls made by the signal handler. This just avoids
628 * wasting time for our lazy fixup of such faults. PSL_NT
629 * does nothing in vm86 mode, but vm86 programs can set it
630 * almost legitimately in probes for old cpu types.
631 */
632 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
633 }
634
635 /*
636 * Copy the sigframe out to the user's stack.
637 */
638 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
639 #ifdef DEBUG
640 printf("process %ld has trashed its stack\n", (long)p->p_pid);
641 #endif
642 PROC_LOCK(p);
643 sigexit(td, SIGILL);
644 }
645
646 regs->tf_esp = (int)sfp;
647 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
648 regs->tf_eflags &= ~PSL_T;
649 regs->tf_cs = _ucodesel;
650 regs->tf_ds = _udatasel;
651 regs->tf_es = _udatasel;
652 regs->tf_fs = _udatasel;
653 regs->tf_ss = _udatasel;
654 PROC_LOCK(p);
655 mtx_lock(&psp->ps_mtx);
656 }
657
658 /*
659 * Build siginfo_t for SA thread
660 */
661 void
662 cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
663 {
664 struct proc *p;
665 struct thread *td;
666
667 td = curthread;
668 p = td->td_proc;
669 PROC_LOCK_ASSERT(p, MA_OWNED);
670
671 bzero(si, sizeof(*si));
672 si->si_signo = sig;
673 si->si_code = code;
674 si->si_addr = (void *)td->td_frame->tf_err;
675 /* XXXKSE fill other fields */
676 }
677
678 /*
679 * System call to cleanup state after a signal
680 * has been taken. Reset signal mask and
681 * stack state from context left by sendsig (above).
682 * Return to previous pc and psl as specified by
683 * context left by sendsig. Check carefully to
684 * make sure that the user has not modified the
685 * state to gain improper privileges.
686 *
687 * MPSAFE
688 */
689 #ifdef COMPAT_43
690 int
691 osigreturn(td, uap)
692 struct thread *td;
693 struct osigreturn_args /* {
694 struct osigcontext *sigcntxp;
695 } */ *uap;
696 {
697 struct osigcontext sc;
698 struct trapframe *regs;
699 struct osigcontext *scp;
700 struct proc *p = td->td_proc;
701 int eflags, error;
702
703 regs = td->td_frame;
704 error = copyin(uap->sigcntxp, &sc, sizeof(sc));
705 if (error != 0)
706 return (error);
707 scp = ≻
708 eflags = scp->sc_ps;
709 if (eflags & PSL_VM) {
710 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
711 struct vm86_kernel *vm86;
712
713 /*
714 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
715 * set up the vm86 area, and we can't enter vm86 mode.
716 */
717 if (td->td_pcb->pcb_ext == 0)
718 return (EINVAL);
719 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
720 if (vm86->vm86_inited == 0)
721 return (EINVAL);
722
723 /* Go back to user mode if both flags are set. */
724 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
725 trapsignal(td, SIGBUS, 0);
726
727 if (vm86->vm86_has_vme) {
728 eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
729 (eflags & VME_USERCHANGE) | PSL_VM;
730 } else {
731 vm86->vm86_eflags = eflags; /* save VIF, VIP */
732 eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
733 (eflags & VM_USERCHANGE) | PSL_VM;
734 }
735 tf->tf_vm86_ds = scp->sc_ds;
736 tf->tf_vm86_es = scp->sc_es;
737 tf->tf_vm86_fs = scp->sc_fs;
738 tf->tf_vm86_gs = scp->sc_gs;
739 tf->tf_ds = _udatasel;
740 tf->tf_es = _udatasel;
741 tf->tf_fs = _udatasel;
742 } else {
743 /*
744 * Don't allow users to change privileged or reserved flags.
745 */
746 /*
747 * XXX do allow users to change the privileged flag PSL_RF.
748 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
749 * should sometimes set it there too. tf_eflags is kept in
750 * the signal context during signal handling and there is no
751 * other place to remember it, so the PSL_RF bit may be
752 * corrupted by the signal handler without us knowing.
753 * Corruption of the PSL_RF bit at worst causes one more or
754 * one less debugger trap, so allowing it is fairly harmless.
755 */
756 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
757 return (EINVAL);
758 }
759
760 /*
761 * Don't allow users to load a valid privileged %cs. Let the
762 * hardware check for invalid selectors, excess privilege in
763 * other selectors, invalid %eip's and invalid %esp's.
764 */
765 if (!CS_SECURE(scp->sc_cs)) {
766 trapsignal(td, SIGBUS, T_PROTFLT);
767 return (EINVAL);
768 }
769 regs->tf_ds = scp->sc_ds;
770 regs->tf_es = scp->sc_es;
771 regs->tf_fs = scp->sc_fs;
772 }
773
774 /* Restore remaining registers. */
775 regs->tf_eax = scp->sc_eax;
776 regs->tf_ebx = scp->sc_ebx;
777 regs->tf_ecx = scp->sc_ecx;
778 regs->tf_edx = scp->sc_edx;
779 regs->tf_esi = scp->sc_esi;
780 regs->tf_edi = scp->sc_edi;
781 regs->tf_cs = scp->sc_cs;
782 regs->tf_ss = scp->sc_ss;
783 regs->tf_isp = scp->sc_isp;
784 regs->tf_ebp = scp->sc_fp;
785 regs->tf_esp = scp->sc_sp;
786 regs->tf_eip = scp->sc_pc;
787 regs->tf_eflags = eflags;
788
789 PROC_LOCK(p);
790 #if defined(COMPAT_43)
791 if (scp->sc_onstack & 1)
792 td->td_sigstk.ss_flags |= SS_ONSTACK;
793 else
794 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
795 #endif
796 SIGSETOLD(td->td_sigmask, scp->sc_mask);
797 SIG_CANTMASK(td->td_sigmask);
798 signotify(td);
799 PROC_UNLOCK(p);
800 return (EJUSTRETURN);
801 }
802 #endif /* COMPAT_43 */
803
804 #ifdef COMPAT_FREEBSD4
805 /*
806 * MPSAFE
807 */
808 int
809 freebsd4_sigreturn(td, uap)
810 struct thread *td;
811 struct freebsd4_sigreturn_args /* {
812 const ucontext4 *sigcntxp;
813 } */ *uap;
814 {
815 struct ucontext4 uc;
816 struct proc *p = td->td_proc;
817 struct trapframe *regs;
818 const struct ucontext4 *ucp;
819 int cs, eflags, error;
820
821 error = copyin(uap->sigcntxp, &uc, sizeof(uc));
822 if (error != 0)
823 return (error);
824 ucp = &uc;
825 regs = td->td_frame;
826 eflags = ucp->uc_mcontext.mc_eflags;
827 if (eflags & PSL_VM) {
828 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
829 struct vm86_kernel *vm86;
830
831 /*
832 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
833 * set up the vm86 area, and we can't enter vm86 mode.
834 */
835 if (td->td_pcb->pcb_ext == 0)
836 return (EINVAL);
837 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
838 if (vm86->vm86_inited == 0)
839 return (EINVAL);
840
841 /* Go back to user mode if both flags are set. */
842 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
843 trapsignal(td, SIGBUS, 0);
844
845 if (vm86->vm86_has_vme) {
846 eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
847 (eflags & VME_USERCHANGE) | PSL_VM;
848 } else {
849 vm86->vm86_eflags = eflags; /* save VIF, VIP */
850 eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
851 (eflags & VM_USERCHANGE) | PSL_VM;
852 }
853 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
854 tf->tf_eflags = eflags;
855 tf->tf_vm86_ds = tf->tf_ds;
856 tf->tf_vm86_es = tf->tf_es;
857 tf->tf_vm86_fs = tf->tf_fs;
858 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
859 tf->tf_ds = _udatasel;
860 tf->tf_es = _udatasel;
861 tf->tf_fs = _udatasel;
862 } else {
863 /*
864 * Don't allow users to change privileged or reserved flags.
865 */
866 /*
867 * XXX do allow users to change the privileged flag PSL_RF.
868 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
869 * should sometimes set it there too. tf_eflags is kept in
870 * the signal context during signal handling and there is no
871 * other place to remember it, so the PSL_RF bit may be
872 * corrupted by the signal handler without us knowing.
873 * Corruption of the PSL_RF bit at worst causes one more or
874 * one less debugger trap, so allowing it is fairly harmless.
875 */
876 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
877 printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
878 return (EINVAL);
879 }
880
881 /*
882 * Don't allow users to load a valid privileged %cs. Let the
883 * hardware check for invalid selectors, excess privilege in
884 * other selectors, invalid %eip's and invalid %esp's.
885 */
886 cs = ucp->uc_mcontext.mc_cs;
887 if (!CS_SECURE(cs)) {
888 printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
889 trapsignal(td, SIGBUS, T_PROTFLT);
890 return (EINVAL);
891 }
892
893 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
894 }
895
896 PROC_LOCK(p);
897 #if defined(COMPAT_43)
898 if (ucp->uc_mcontext.mc_onstack & 1)
899 td->td_sigstk.ss_flags |= SS_ONSTACK;
900 else
901 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
902 #endif
903
904 td->td_sigmask = ucp->uc_sigmask;
905 SIG_CANTMASK(td->td_sigmask);
906 signotify(td);
907 PROC_UNLOCK(p);
908 return (EJUSTRETURN);
909 }
910 #endif /* COMPAT_FREEBSD4 */
911
912 /*
913 * MPSAFE
914 */
915 int
916 sigreturn(td, uap)
917 struct thread *td;
918 struct sigreturn_args /* {
919 const __ucontext *sigcntxp;
920 } */ *uap;
921 {
922 ucontext_t uc;
923 struct proc *p = td->td_proc;
924 struct trapframe *regs;
925 const ucontext_t *ucp;
926 int cs, eflags, error, ret;
927
928 error = copyin(uap->sigcntxp, &uc, sizeof(uc));
929 if (error != 0)
930 return (error);
931 ucp = &uc;
932 regs = td->td_frame;
933 eflags = ucp->uc_mcontext.mc_eflags;
934 if (eflags & PSL_VM) {
935 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
936 struct vm86_kernel *vm86;
937
938 /*
939 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
940 * set up the vm86 area, and we can't enter vm86 mode.
941 */
942 if (td->td_pcb->pcb_ext == 0)
943 return (EINVAL);
944 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
945 if (vm86->vm86_inited == 0)
946 return (EINVAL);
947
948 /* Go back to user mode if both flags are set. */
949 if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
950 trapsignal(td, SIGBUS, 0);
951
952 if (vm86->vm86_has_vme) {
953 eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
954 (eflags & VME_USERCHANGE) | PSL_VM;
955 } else {
956 vm86->vm86_eflags = eflags; /* save VIF, VIP */
957 eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
958 (eflags & VM_USERCHANGE) | PSL_VM;
959 }
960 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
961 tf->tf_eflags = eflags;
962 tf->tf_vm86_ds = tf->tf_ds;
963 tf->tf_vm86_es = tf->tf_es;
964 tf->tf_vm86_fs = tf->tf_fs;
965 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
966 tf->tf_ds = _udatasel;
967 tf->tf_es = _udatasel;
968 tf->tf_fs = _udatasel;
969 } else {
970 /*
971 * Don't allow users to change privileged or reserved flags.
972 */
973 /*
974 * XXX do allow users to change the privileged flag PSL_RF.
975 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
976 * should sometimes set it there too. tf_eflags is kept in
977 * the signal context during signal handling and there is no
978 * other place to remember it, so the PSL_RF bit may be
979 * corrupted by the signal handler without us knowing.
980 * Corruption of the PSL_RF bit at worst causes one more or
981 * one less debugger trap, so allowing it is fairly harmless.
982 */
983 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
984 printf("sigreturn: eflags = 0x%x\n", eflags);
985 return (EINVAL);
986 }
987
988 /*
989 * Don't allow users to load a valid privileged %cs. Let the
990 * hardware check for invalid selectors, excess privilege in
991 * other selectors, invalid %eip's and invalid %esp's.
992 */
993 cs = ucp->uc_mcontext.mc_cs;
994 if (!CS_SECURE(cs)) {
995 printf("sigreturn: cs = 0x%x\n", cs);
996 trapsignal(td, SIGBUS, T_PROTFLT);
997 return (EINVAL);
998 }
999
1000 ret = set_fpcontext(td, &ucp->uc_mcontext);
1001 if (ret != 0)
1002 return (ret);
1003 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
1004 }
1005
1006 PROC_LOCK(p);
1007 #if defined(COMPAT_43)
1008 if (ucp->uc_mcontext.mc_onstack & 1)
1009 td->td_sigstk.ss_flags |= SS_ONSTACK;
1010 else
1011 td->td_sigstk.ss_flags &= ~SS_ONSTACK;
1012 #endif
1013
1014 td->td_sigmask = ucp->uc_sigmask;
1015 SIG_CANTMASK(td->td_sigmask);
1016 signotify(td);
1017 PROC_UNLOCK(p);
1018 return (EJUSTRETURN);
1019 }
1020
1021 /*
1022 * Machine dependent boot() routine
1023 *
1024 * I haven't seen anything to put here yet
1025 * Possibly some stuff might be grafted back here from boot()
1026 */
1027 void
1028 cpu_boot(int howto)
1029 {
1030 }
1031
1032 /* Get current clock frequency for the given cpu id. */
1033 int
1034 cpu_est_clockrate(int cpu_id, uint64_t *rate)
1035 {
1036 register_t reg;
1037 uint64_t tsc1, tsc2;
1038
1039 if (pcpu_find(cpu_id) == NULL || rate == NULL)
1040 return (EINVAL);
1041 if (!tsc_present)
1042 return (EOPNOTSUPP);
1043
1044 /* If we're booting, trust the rate calibrated moments ago. */
1045 if (cold) {
1046 *rate = tsc_freq;
1047 return (0);
1048 }
1049
1050 #ifdef SMP
1051 /* Schedule ourselves on the indicated cpu. */
1052 mtx_lock_spin(&sched_lock);
1053 sched_bind(curthread, cpu_id);
1054 mtx_unlock_spin(&sched_lock);
1055 #endif
1056
1057 /* Calibrate by measuring a short delay. */
1058 reg = intr_disable();
1059 tsc1 = rdtsc();
1060 DELAY(1000);
1061 tsc2 = rdtsc();
1062 intr_restore(reg);
1063
1064 #ifdef SMP
1065 mtx_lock_spin(&sched_lock);
1066 sched_unbind(curthread);
1067 mtx_unlock_spin(&sched_lock);
1068 #endif
1069
1070 /*
1071 * Calculate the difference in readings, convert to Mhz, and
1072 * subtract 0.5% of the total. Empirical testing has shown that
1073 * overhead in DELAY() works out to approximately this value.
1074 */
1075 tsc2 -= tsc1;
1076 *rate = tsc2 * 1000 - tsc2 * 5;
1077 return (0);
1078 }
1079
1080 /*
1081 * Shutdown the CPU as much as possible
1082 */
1083 void
1084 cpu_halt(void)
1085 {
1086 for (;;)
1087 __asm__ ("hlt");
1088 }
1089
1090 /*
1091 * Hook to idle the CPU when possible. In the SMP case we default to
1092 * off because a halted cpu will not currently pick up a new thread in the
1093 * run queue until the next timer tick. If turned on this will result in
1094 * approximately a 4.2% loss in real time performance in buildworld tests
1095 * (but improves user and sys times oddly enough), and saves approximately
1096 * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
1097 *
1098 * XXX we need to have a cpu mask of idle cpus and generate an IPI or
1099 * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
1100 * Then we can have our cake and eat it too.
1101 *
1102 * XXX I'm turning it on for SMP as well by default for now. It seems to
1103 * help lock contention somewhat, and this is critical for HTT. -Peter
1104 */
1105 static int cpu_idle_hlt = 1;
1106 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
1107 &cpu_idle_hlt, 0, "Idle loop HLT enable");
1108
1109 static void
1110 cpu_idle_default(void)
1111 {
1112 /*
1113 * we must absolutely guarentee that hlt is the
1114 * absolute next instruction after sti or we
1115 * introduce a timing window.
1116 */
1117 __asm __volatile("sti; hlt");
1118 }
1119
1120 /*
1121 * Note that we have to be careful here to avoid a race between checking
1122 * sched_runnable() and actually halting. If we don't do this, we may waste
1123 * the time between calling hlt and the next interrupt even though there
1124 * is a runnable process.
1125 */
1126 void
1127 cpu_idle(void)
1128 {
1129
1130 #ifdef SMP
1131 if (mp_grab_cpu_hlt())
1132 return;
1133 #endif
1134
1135 if (cpu_idle_hlt) {
1136 disable_intr();
1137 if (sched_runnable())
1138 enable_intr();
1139 else
1140 (*cpu_idle_hook)();
1141 }
1142 }
1143
1144 /* Other subsystems (e.g., ACPI) can hook this later. */
1145 void (*cpu_idle_hook)(void) = cpu_idle_default;
1146
1147 /*
1148 * Clear registers on exec
1149 */
1150 void
1151 exec_setregs(td, entry, stack, ps_strings)
1152 struct thread *td;
1153 u_long entry;
1154 u_long stack;
1155 u_long ps_strings;
1156 {
1157 struct trapframe *regs = td->td_frame;
1158 struct pcb *pcb = td->td_pcb;
1159
1160 /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
1161 pcb->pcb_gs = _udatasel;
1162 load_gs(_udatasel);
1163
1164 if (td->td_proc->p_md.md_ldt)
1165 user_ldt_free(td);
1166
1167 bzero((char *)regs, sizeof(struct trapframe));
1168 regs->tf_eip = entry;
1169 regs->tf_esp = stack;
1170 regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
1171 regs->tf_ss = _udatasel;
1172 regs->tf_ds = _udatasel;
1173 regs->tf_es = _udatasel;
1174 regs->tf_fs = _udatasel;
1175 regs->tf_cs = _ucodesel;
1176
1177 /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */
1178 regs->tf_ebx = ps_strings;
1179
1180 /*
1181 * Reset the hardware debug registers if they were in use.
1182 * They won't have any meaning for the newly exec'd process.
1183 */
1184 if (pcb->pcb_flags & PCB_DBREGS) {
1185 pcb->pcb_dr0 = 0;
1186 pcb->pcb_dr1 = 0;
1187 pcb->pcb_dr2 = 0;
1188 pcb->pcb_dr3 = 0;
1189 pcb->pcb_dr6 = 0;
1190 pcb->pcb_dr7 = 0;
1191 if (pcb == PCPU_GET(curpcb)) {
1192 /*
1193 * Clear the debug registers on the running
1194 * CPU, otherwise they will end up affecting
1195 * the next process we switch to.
1196 */
1197 reset_dbregs();
1198 }
1199 pcb->pcb_flags &= ~PCB_DBREGS;
1200 }
1201
1202 /*
1203 * Initialize the math emulator (if any) for the current process.
1204 * Actually, just clear the bit that says that the emulator has
1205 * been initialized. Initialization is delayed until the process
1206 * traps to the emulator (if it is done at all) mainly because
1207 * emulators don't provide an entry point for initialization.
1208 */
1209 td->td_pcb->pcb_flags &= ~FP_SOFTFP;
1210
1211 /*
1212 * Drop the FP state if we hold it, so that the process gets a
1213 * clean FP state if it uses the FPU again.
1214 */
1215 fpstate_drop(td);
1216
1217 /*
1218 * XXX - Linux emulator
1219 * Make sure sure edx is 0x0 on entry. Linux binaries depend
1220 * on it.
1221 */
1222 td->td_retval[1] = 0;
1223 }
1224
1225 void
1226 cpu_setregs(void)
1227 {
1228 unsigned int cr0;
1229
1230 cr0 = rcr0();
1231 /*
1232 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
1233 * BSP. See the comments there about why we set them.
1234 */
1235 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
1236 load_cr0(cr0);
1237 load_gs(_udatasel);
1238 }
1239
1240 static int
1241 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
1242 {
1243 int error;
1244 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1245 req);
1246 if (!error && req->newptr)
1247 resettodr();
1248 return (error);
1249 }
1250
1251 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1252 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1253
1254 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1255 CTLFLAG_RW, &disable_rtc_set, 0, "");
1256
1257 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1258 CTLFLAG_RD, &bootinfo, bootinfo, "");
1259
1260 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
1261 CTLFLAG_RW, &wall_cmos_clock, 0, "");
1262
1263 u_long bootdev; /* not a struct cdev *- encoding is different */
1264 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
1265 CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
1266
1267 /*
1268 * Initialize 386 and configure to run kernel
1269 */
1270
1271 /*
1272 * Initialize segments & interrupt table
1273 */
1274
1275 int _default_ldt;
1276 union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
1277 static struct gate_descriptor idt0[NIDT];
1278 struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
1279 union descriptor ldt[NLDT]; /* local descriptor table */
1280 struct region_descriptor r_gdt, r_idt; /* table descriptors */
1281
1282 int private_tss; /* flag indicating private tss */
1283
1284 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1285 extern int has_f00f_bug;
1286 #endif
1287
1288 static struct i386tss dblfault_tss;
1289 static char dblfault_stack[PAGE_SIZE];
1290
1291 extern vm_offset_t proc0kstack;
1292
1293
1294 /*
1295 * software prototypes -- in more palatable form.
1296 *
1297 * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
1298 * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
1299 */
1300 struct soft_segment_descriptor gdt_segs[] = {
1301 /* GNULL_SEL 0 Null Descriptor */
1302 { 0x0, /* segment base address */
1303 0x0, /* length */
1304 0, /* segment type */
1305 0, /* segment descriptor priority level */
1306 0, /* segment descriptor present */
1307 0, 0,
1308 0, /* default 32 vs 16 bit size */
1309 0 /* limit granularity (byte/page units)*/ },
1310 /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */
1311 { 0x0, /* segment base address */
1312 0xfffff, /* length - all address space */
1313 SDT_MEMRWA, /* segment type */
1314 0, /* segment descriptor priority level */
1315 1, /* segment descriptor present */
1316 0, 0,
1317 1, /* default 32 vs 16 bit size */
1318 1 /* limit granularity (byte/page units)*/ },
1319 /* GUFS_SEL 2 %fs Descriptor for user */
1320 { 0x0, /* segment base address */
1321 0xfffff, /* length - all address space */
1322 SDT_MEMRWA, /* segment type */
1323 SEL_UPL, /* segment descriptor priority level */
1324 1, /* segment descriptor present */
1325 0, 0,
1326 1, /* default 32 vs 16 bit size */
1327 1 /* limit granularity (byte/page units)*/ },
1328 /* GUGS_SEL 3 %gs Descriptor for user */
1329 { 0x0, /* segment base address */
1330 0xfffff, /* length - all address space */
1331 SDT_MEMRWA, /* segment type */
1332 SEL_UPL, /* segment descriptor priority level */
1333 1, /* segment descriptor present */
1334 0, 0,
1335 1, /* default 32 vs 16 bit size */
1336 1 /* limit granularity (byte/page units)*/ },
1337 /* GCODE_SEL 4 Code Descriptor for kernel */
1338 { 0x0, /* segment base address */
1339 0xfffff, /* length - all address space */
1340 SDT_MEMERA, /* segment type */
1341 0, /* segment descriptor priority level */
1342 1, /* segment descriptor present */
1343 0, 0,
1344 1, /* default 32 vs 16 bit size */
1345 1 /* limit granularity (byte/page units)*/ },
1346 /* GDATA_SEL 5 Data Descriptor for kernel */
1347 { 0x0, /* segment base address */
1348 0xfffff, /* length - all address space */
1349 SDT_MEMRWA, /* segment type */
1350 0, /* segment descriptor priority level */
1351 1, /* segment descriptor present */
1352 0, 0,
1353 1, /* default 32 vs 16 bit size */
1354 1 /* limit granularity (byte/page units)*/ },
1355 /* GUCODE_SEL 6 Code Descriptor for user */
1356 { 0x0, /* segment base address */
1357 0xfffff, /* length - all address space */
1358 SDT_MEMERA, /* segment type */
1359 SEL_UPL, /* segment descriptor priority level */
1360 1, /* segment descriptor present */
1361 0, 0,
1362 1, /* default 32 vs 16 bit size */
1363 1 /* limit granularity (byte/page units)*/ },
1364 /* GUDATA_SEL 7 Data Descriptor for user */
1365 { 0x0, /* segment base address */
1366 0xfffff, /* length - all address space */
1367 SDT_MEMRWA, /* segment type */
1368 SEL_UPL, /* segment descriptor priority level */
1369 1, /* segment descriptor present */
1370 0, 0,
1371 1, /* default 32 vs 16 bit size */
1372 1 /* limit granularity (byte/page units)*/ },
1373 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
1374 { 0x400, /* segment base address */
1375 0xfffff, /* length */
1376 SDT_MEMRWA, /* segment type */
1377 0, /* segment descriptor priority level */
1378 1, /* segment descriptor present */
1379 0, 0,
1380 1, /* default 32 vs 16 bit size */
1381 1 /* limit granularity (byte/page units)*/ },
1382 /* GPROC0_SEL 9 Proc 0 Tss Descriptor */
1383 {
1384 0x0, /* segment base address */
1385 sizeof(struct i386tss)-1,/* length */
1386 SDT_SYS386TSS, /* segment type */
1387 0, /* segment descriptor priority level */
1388 1, /* segment descriptor present */
1389 0, 0,
1390 0, /* unused - default 32 vs 16 bit size */
1391 0 /* limit granularity (byte/page units)*/ },
1392 /* GLDT_SEL 10 LDT Descriptor */
1393 { (int) ldt, /* segment base address */
1394 sizeof(ldt)-1, /* length - all address space */
1395 SDT_SYSLDT, /* segment type */
1396 SEL_UPL, /* segment descriptor priority level */
1397 1, /* segment descriptor present */
1398 0, 0,
1399 0, /* unused - default 32 vs 16 bit size */
1400 0 /* limit granularity (byte/page units)*/ },
1401 /* GUSERLDT_SEL 11 User LDT Descriptor per process */
1402 { (int) ldt, /* segment base address */
1403 (512 * sizeof(union descriptor)-1), /* length */
1404 SDT_SYSLDT, /* segment type */
1405 0, /* segment descriptor priority level */
1406 1, /* segment descriptor present */
1407 0, 0,
1408 0, /* unused - default 32 vs 16 bit size */
1409 0 /* limit granularity (byte/page units)*/ },
1410 /* GPANIC_SEL 12 Panic Tss Descriptor */
1411 { (int) &dblfault_tss, /* segment base address */
1412 sizeof(struct i386tss)-1,/* length - all address space */
1413 SDT_SYS386TSS, /* segment type */
1414 0, /* segment descriptor priority level */
1415 1, /* segment descriptor present */
1416 0, 0,
1417 0, /* unused - default 32 vs 16 bit size */
1418 0 /* limit granularity (byte/page units)*/ },
1419 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
1420 { 0, /* segment base address (overwritten) */
1421 0xfffff, /* length */
1422 SDT_MEMERA, /* segment type */
1423 0, /* segment descriptor priority level */
1424 1, /* segment descriptor present */
1425 0, 0,
1426 0, /* default 32 vs 16 bit size */
1427 1 /* limit granularity (byte/page units)*/ },
1428 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
1429 { 0, /* segment base address (overwritten) */
1430 0xfffff, /* length */
1431 SDT_MEMERA, /* segment type */
1432 0, /* segment descriptor priority level */
1433 1, /* segment descriptor present */
1434 0, 0,
1435 0, /* default 32 vs 16 bit size */
1436 1 /* limit granularity (byte/page units)*/ },
1437 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
1438 { 0, /* segment base address (overwritten) */
1439 0xfffff, /* length */
1440 SDT_MEMRWA, /* segment type */
1441 0, /* segment descriptor priority level */
1442 1, /* segment descriptor present */
1443 0, 0,
1444 1, /* default 32 vs 16 bit size */
1445 1 /* limit granularity (byte/page units)*/ },
1446 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
1447 { 0, /* segment base address (overwritten) */
1448 0xfffff, /* length */
1449 SDT_MEMRWA, /* segment type */
1450 0, /* segment descriptor priority level */
1451 1, /* segment descriptor present */
1452 0, 0,
1453 0, /* default 32 vs 16 bit size */
1454 1 /* limit granularity (byte/page units)*/ },
1455 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
1456 { 0, /* segment base address (overwritten) */
1457 0xfffff, /* length */
1458 SDT_MEMRWA, /* segment type */
1459 0, /* segment descriptor priority level */
1460 1, /* segment descriptor present */
1461 0, 0,
1462 0, /* default 32 vs 16 bit size */
1463 1 /* limit granularity (byte/page units)*/ },
1464 /* GNDIS_SEL 18 NDIS Descriptor */
1465 { 0x0, /* segment base address */
1466 0x0, /* length */
1467 0, /* segment type */
1468 0, /* segment descriptor priority level */
1469 0, /* segment descriptor present */
1470 0, 0,
1471 0, /* default 32 vs 16 bit size */
1472 0 /* limit granularity (byte/page units)*/ },
1473 };
1474
1475 static struct soft_segment_descriptor ldt_segs[] = {
1476 /* Null Descriptor - overwritten by call gate */
1477 { 0x0, /* segment base address */
1478 0x0, /* length - all address space */
1479 0, /* segment type */
1480 0, /* segment descriptor priority level */
1481 0, /* segment descriptor present */
1482 0, 0,
1483 0, /* default 32 vs 16 bit size */
1484 0 /* limit granularity (byte/page units)*/ },
1485 /* Null Descriptor - overwritten by call gate */
1486 { 0x0, /* segment base address */
1487 0x0, /* length - all address space */
1488 0, /* segment type */
1489 0, /* segment descriptor priority level */
1490 0, /* segment descriptor present */
1491 0, 0,
1492 0, /* default 32 vs 16 bit size */
1493 0 /* limit granularity (byte/page units)*/ },
1494 /* Null Descriptor - overwritten by call gate */
1495 { 0x0, /* segment base address */
1496 0x0, /* length - all address space */
1497 0, /* segment type */
1498 0, /* segment descriptor priority level */
1499 0, /* segment descriptor present */
1500 0, 0,
1501 0, /* default 32 vs 16 bit size */
1502 0 /* limit granularity (byte/page units)*/ },
1503 /* Code Descriptor for user */
1504 { 0x0, /* segment base address */
1505 0xfffff, /* length - all address space */
1506 SDT_MEMERA, /* segment type */
1507 SEL_UPL, /* segment descriptor priority level */
1508 1, /* segment descriptor present */
1509 0, 0,
1510 1, /* default 32 vs 16 bit size */
1511 1 /* limit granularity (byte/page units)*/ },
1512 /* Null Descriptor - overwritten by call gate */
1513 { 0x0, /* segment base address */
1514 0x0, /* length - all address space */
1515 0, /* segment type */
1516 0, /* segment descriptor priority level */
1517 0, /* segment descriptor present */
1518 0, 0,
1519 0, /* default 32 vs 16 bit size */
1520 0 /* limit granularity (byte/page units)*/ },
1521 /* Data Descriptor for user */
1522 { 0x0, /* segment base address */
1523 0xfffff, /* length - all address space */
1524 SDT_MEMRWA, /* segment type */
1525 SEL_UPL, /* segment descriptor priority level */
1526 1, /* segment descriptor present */
1527 0, 0,
1528 1, /* default 32 vs 16 bit size */
1529 1 /* limit granularity (byte/page units)*/ },
1530 };
1531
1532 void
1533 setidt(idx, func, typ, dpl, selec)
1534 int idx;
1535 inthand_t *func;
1536 int typ;
1537 int dpl;
1538 int selec;
1539 {
1540 struct gate_descriptor *ip;
1541
1542 ip = idt + idx;
1543 ip->gd_looffset = (int)func;
1544 ip->gd_selector = selec;
1545 ip->gd_stkcpy = 0;
1546 ip->gd_xx = 0;
1547 ip->gd_type = typ;
1548 ip->gd_dpl = dpl;
1549 ip->gd_p = 1;
1550 ip->gd_hioffset = ((int)func)>>16 ;
1551 }
1552
1553 #define IDTVEC(name) __CONCAT(X,name)
1554
1555 extern inthand_t
1556 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1557 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1558 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1559 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1560 IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
1561
1562 #ifdef DDB
1563 /*
1564 * Display the index and function name of any IDT entries that don't use
1565 * the default 'rsvd' entry point.
1566 */
1567 DB_SHOW_COMMAND(idt, db_show_idt)
1568 {
1569 struct gate_descriptor *ip;
1570 int idx, quit;
1571 uintptr_t func;
1572
1573 ip = idt;
1574 db_setup_paging(db_simple_pager, &quit, db_lines_per_page);
1575 for (idx = 0, quit = 0; idx < NIDT; idx++) {
1576 func = (ip->gd_hioffset << 16 | ip->gd_looffset);
1577 if (func != (uintptr_t)&IDTVEC(rsvd)) {
1578 db_printf("%3d\t", idx);
1579 db_printsym(func, DB_STGY_PROC);
1580 db_printf("\n");
1581 }
1582 ip++;
1583 }
1584 }
1585 #endif
1586
1587 void
1588 sdtossd(sd, ssd)
1589 struct segment_descriptor *sd;
1590 struct soft_segment_descriptor *ssd;
1591 {
1592 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase;
1593 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1594 ssd->ssd_type = sd->sd_type;
1595 ssd->ssd_dpl = sd->sd_dpl;
1596 ssd->ssd_p = sd->sd_p;
1597 ssd->ssd_def32 = sd->sd_def32;
1598 ssd->ssd_gran = sd->sd_gran;
1599 }
1600
1601 #define PHYSMAP_SIZE (2 * 8)
1602
1603 /*
1604 * Populate the (physmap) array with base/bound pairs describing the
1605 * available physical memory in the system, then test this memory and
1606 * build the phys_avail array describing the actually-available memory.
1607 *
1608 * If we cannot accurately determine the physical memory map, then use
1609 * value from the 0xE801 call, and failing that, the RTC.
1610 *
1611 * Total memory size may be set by the kernel environment variable
1612 * hw.physmem or the compile-time define MAXMEM.
1613 *
1614 * XXX first should be vm_paddr_t.
1615 */
1616 static void
1617 getmemsize(int first)
1618 {
1619 int i, physmap_idx, pa_indx, da_indx;
1620 int pg_n;
1621 u_long physmem_tunable;
1622 u_int extmem, under16;
1623 vm_paddr_t pa, physmap[PHYSMAP_SIZE];
1624 pt_entry_t *pte;
1625 quad_t dcons_addr, dcons_size;
1626
1627 bzero(physmap, sizeof(physmap));
1628
1629 /* XXX - some of EPSON machines can't use PG_N */
1630 pg_n = PG_N;
1631 if (pc98_machine_type & M_EPSON_PC98) {
1632 switch (epson_machine_id) {
1633 #ifdef WB_CACHE
1634 default:
1635 #endif
1636 case 0x34: /* PC-486HX */
1637 case 0x35: /* PC-486HG */
1638 case 0x3B: /* PC-486HA */
1639 pg_n = 0;
1640 break;
1641 }
1642 }
1643
1644 /*
1645 * Perform "base memory" related probes & setup
1646 */
1647 under16 = pc98_getmemsize(&basemem, &extmem);
1648 if (basemem > 640) {
1649 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
1650 basemem);
1651 basemem = 640;
1652 }
1653
1654 /*
1655 * XXX if biosbasemem is now < 640, there is a `hole'
1656 * between the end of base memory and the start of
1657 * ISA memory. The hole may be empty or it may
1658 * contain BIOS code or data. Map it read/write so
1659 * that the BIOS can write to it. (Memory from 0 to
1660 * the physical end of the kernel is mapped read-only
1661 * to begin with and then parts of it are remapped.
1662 * The parts that aren't remapped form holes that
1663 * remain read-only and are unused by the kernel.
1664 * The base memory area is below the physical end of
1665 * the kernel and right now forms a read-only hole.
1666 * The part of it from PAGE_SIZE to
1667 * (trunc_page(biosbasemem * 1024) - 1) will be
1668 * remapped and used by the kernel later.)
1669 *
1670 * This code is similar to the code used in
1671 * pmap_mapdev, but since no memory needs to be
1672 * allocated we simply change the mapping.
1673 */
1674 for (pa = trunc_page(basemem * 1024);
1675 pa < ISA_HOLE_START; pa += PAGE_SIZE)
1676 pmap_kenter(KERNBASE + pa, pa);
1677
1678 /*
1679 * if basemem != 640, map pages r/w into vm86 page table so
1680 * that the bios can scribble on it.
1681 */
1682 pte = (pt_entry_t *)vm86paddr;
1683 for (i = basemem / 4; i < 160; i++)
1684 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
1685
1686 physmap[0] = 0;
1687 physmap[1] = basemem * 1024;
1688 physmap_idx = 2;
1689 physmap[physmap_idx] = 0x100000;
1690 physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
1691
1692 /*
1693 * Now, physmap contains a map of physical memory.
1694 */
1695
1696 #ifdef SMP
1697 /* make hole for AP bootstrap code */
1698 physmap[1] = mp_bootaddress(physmap[1]);
1699 #endif
1700
1701 /*
1702 * Maxmem isn't the "maximum memory", it's one larger than the
1703 * highest page of the physical address space. It should be
1704 * called something like "Maxphyspage". We may adjust this
1705 * based on ``hw.physmem'' and the results of the memory test.
1706 */
1707 Maxmem = atop(physmap[physmap_idx + 1]);
1708
1709 #ifdef MAXMEM
1710 Maxmem = MAXMEM / 4;
1711 #endif
1712
1713 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
1714 Maxmem = atop(physmem_tunable);
1715
1716 if (atop(physmap[physmap_idx + 1]) != Maxmem &&
1717 (boothowto & RB_VERBOSE))
1718 printf("Physical memory use set to %ldK\n", Maxmem * 4);
1719
1720 /*
1721 * If Maxmem has been increased beyond what the system has detected,
1722 * extend the last memory segment to the new limit.
1723 */
1724 if (atop(physmap[physmap_idx + 1]) < Maxmem)
1725 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
1726
1727 /*
1728 * We need to divide chunk if Maxmem is larger than 16MB and
1729 * under 16MB area is not full of memory.
1730 * (1) system area (15-16MB region) is cut off
1731 * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY")
1732 */
1733 if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
1734 /* 15M - 16M region is cut off, so need to divide chunk */
1735 physmap[physmap_idx + 1] = under16 * 1024;
1736 physmap_idx += 2;
1737 physmap[physmap_idx] = 0x1000000;
1738 physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
1739 }
1740
1741 /* call pmap initialization to make new kernel address space */
1742 pmap_bootstrap(first, 0);
1743
1744 /*
1745 * Size up each available chunk of physical memory.
1746 */
1747 physmap[0] = PAGE_SIZE; /* mask off page 0 */
1748 pa_indx = 0;
1749 da_indx = 1;
1750 phys_avail[pa_indx++] = physmap[0];
1751 phys_avail[pa_indx] = physmap[0];
1752 dump_avail[da_indx] = physmap[0];
1753 pte = CMAP1;
1754
1755 /*
1756 * Get dcons buffer address
1757 */
1758 if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
1759 getenv_quad("dcons.size", &dcons_size) == 0)
1760 dcons_addr = 0;
1761
1762 /*
1763 * physmap is in bytes, so when converting to page boundaries,
1764 * round up the start address and round down the end address.
1765 */
1766 for (i = 0; i <= physmap_idx; i += 2) {
1767 vm_paddr_t end;
1768
1769 end = ptoa((vm_paddr_t)Maxmem);
1770 if (physmap[i + 1] < end)
1771 end = trunc_page(physmap[i + 1]);
1772 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
1773 int tmp, page_bad, full;
1774 int *ptr = (int *)CADDR1;
1775
1776 full = FALSE;
1777 /*
1778 * block out kernel memory as not available.
1779 */
1780 if (pa >= KERNLOAD && pa < first)
1781 goto do_dump_avail;
1782
1783 /*
1784 * block out dcons buffer
1785 */
1786 if (dcons_addr > 0
1787 && pa >= trunc_page(dcons_addr)
1788 && pa < dcons_addr + dcons_size)
1789 goto do_dump_avail;
1790
1791 page_bad = FALSE;
1792
1793 /*
1794 * map page into kernel: valid, read/write,non-cacheable
1795 */
1796 *pte = pa | PG_V | PG_RW | pg_n;
1797 invltlb();
1798
1799 tmp = *(int *)ptr;
1800 /*
1801 * Test for alternating 1's and 0's
1802 */
1803 *(volatile int *)ptr = 0xaaaaaaaa;
1804 if (*(volatile int *)ptr != 0xaaaaaaaa)
1805 page_bad = TRUE;
1806 /*
1807 * Test for alternating 0's and 1's
1808 */
1809 *(volatile int *)ptr = 0x55555555;
1810 if (*(volatile int *)ptr != 0x55555555)
1811 page_bad = TRUE;
1812 /*
1813 * Test for all 1's
1814 */
1815 *(volatile int *)ptr = 0xffffffff;
1816 if (*(volatile int *)ptr != 0xffffffff)
1817 page_bad = TRUE;
1818 /*
1819 * Test for all 0's
1820 */
1821 *(volatile int *)ptr = 0x0;
1822 if (*(volatile int *)ptr != 0x0)
1823 page_bad = TRUE;
1824 /*
1825 * Restore original value.
1826 */
1827 *(int *)ptr = tmp;
1828
1829 /*
1830 * Adjust array of valid/good pages.
1831 */
1832 if (page_bad == TRUE)
1833 continue;
1834 /*
1835 * If this good page is a continuation of the
1836 * previous set of good pages, then just increase
1837 * the end pointer. Otherwise start a new chunk.
1838 * Note that "end" points one higher than end,
1839 * making the range >= start and < end.
1840 * If we're also doing a speculative memory
1841 * test and we at or past the end, bump up Maxmem
1842 * so that we keep going. The first bad page
1843 * will terminate the loop.
1844 */
1845 if (phys_avail[pa_indx] == pa) {
1846 phys_avail[pa_indx] += PAGE_SIZE;
1847 } else {
1848 pa_indx++;
1849 if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1850 printf(
1851 "Too many holes in the physical address space, giving up\n");
1852 pa_indx--;
1853 full = TRUE;
1854 goto do_dump_avail;
1855 }
1856 phys_avail[pa_indx++] = pa; /* start */
1857 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
1858 }
1859 physmem++;
1860 do_dump_avail:
1861 if (dump_avail[da_indx] == pa) {
1862 dump_avail[da_indx] += PAGE_SIZE;
1863 } else {
1864 da_indx++;
1865 if (da_indx == DUMP_AVAIL_ARRAY_END) {
1866 da_indx--;
1867 goto do_next;
1868 }
1869 dump_avail[da_indx++] = pa; /* start */
1870 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
1871 }
1872 do_next:
1873 if (full)
1874 break;
1875 }
1876 }
1877 *pte = 0;
1878 invltlb();
1879
1880 /*
1881 * XXX
1882 * The last chunk must contain at least one page plus the message
1883 * buffer to avoid complicating other code (message buffer address
1884 * calculation, etc.).
1885 */
1886 while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1887 round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
1888 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1889 phys_avail[pa_indx--] = 0;
1890 phys_avail[pa_indx--] = 0;
1891 }
1892
1893 Maxmem = atop(phys_avail[pa_indx]);
1894
1895 /* Trim off space for the message buffer. */
1896 phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
1897
1898 avail_end = phys_avail[pa_indx];
1899 }
1900
1901 void
1902 init386(first)
1903 int first;
1904 {
1905 struct gate_descriptor *gdp;
1906 int gsel_tss, metadata_missing, off, x;
1907 struct pcpu *pc;
1908
1909 thread0.td_kstack = proc0kstack;
1910 thread0.td_pcb = (struct pcb *)
1911 (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
1912
1913 /*
1914 * This may be done better later if it gets more high level
1915 * components in it. If so just link td->td_proc here.
1916 */
1917 proc_linkup(&proc0, &ksegrp0, &thread0);
1918
1919 /*
1920 * Initialize DMAC
1921 */
1922 pc98_init_dmac();
1923
1924 metadata_missing = 0;
1925 if (bootinfo.bi_modulep) {
1926 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
1927 preload_bootstrap_relocate(KERNBASE);
1928 } else {
1929 metadata_missing = 1;
1930 }
1931 if (envmode == 1)
1932 kern_envp = static_env;
1933 else if (bootinfo.bi_envp)
1934 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
1935
1936 /* Init basic tunables, hz etc */
1937 init_param1();
1938
1939 /*
1940 * Make gdt memory segments. All segments cover the full 4GB
1941 * of address space and permissions are enforced at page level.
1942 */
1943 gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
1944 gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
1945 gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
1946 gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
1947 gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
1948 gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
1949
1950 #ifdef SMP
1951 pc = &SMP_prvspace[0].pcpu;
1952 #else
1953 pc = &__pcpu;
1954 #endif
1955 gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
1956 gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
1957 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
1958
1959 for (x = 0; x < NGDT; x++)
1960 ssdtosd(&gdt_segs[x], &gdt[x].sd);
1961
1962 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
1963 r_gdt.rd_base = (int) gdt;
1964 lgdt(&r_gdt);
1965
1966 pcpu_init(pc, 0, sizeof(struct pcpu));
1967 PCPU_SET(prvspace, pc);
1968 PCPU_SET(curthread, &thread0);
1969 PCPU_SET(curpcb, thread0.td_pcb);
1970
1971 /*
1972 * Initialize mutexes.
1973 *
1974 * icu_lock: in order to allow an interrupt to occur in a critical
1975 * section, to set pcpu->ipending (etc...) properly, we
1976 * must be able to get the icu lock, so it can't be
1977 * under witness.
1978 */
1979 mutex_init();
1980 mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
1981 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
1982
1983 /* make ldt memory segments */
1984 ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
1985 ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
1986 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
1987 ssdtosd(&ldt_segs[x], &ldt[x].sd);
1988
1989 _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1990 lldt(_default_ldt);
1991 PCPU_SET(currentldt, _default_ldt);
1992
1993 /* exceptions */
1994 for (x = 0; x < NIDT; x++)
1995 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
1996 GSEL(GCODE_SEL, SEL_KPL));
1997 setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL,
1998 GSEL(GCODE_SEL, SEL_KPL));
1999 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
2000 GSEL(GCODE_SEL, SEL_KPL));
2001 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
2002 GSEL(GCODE_SEL, SEL_KPL));
2003 setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
2004 GSEL(GCODE_SEL, SEL_KPL));
2005 setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL,
2006 GSEL(GCODE_SEL, SEL_KPL));
2007 setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL,
2008 GSEL(GCODE_SEL, SEL_KPL));
2009 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
2010 GSEL(GCODE_SEL, SEL_KPL));
2011 setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL
2012 , GSEL(GCODE_SEL, SEL_KPL));
2013 setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
2014 setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL,
2015 GSEL(GCODE_SEL, SEL_KPL));
2016 setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL,
2017 GSEL(GCODE_SEL, SEL_KPL));
2018 setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL,
2019 GSEL(GCODE_SEL, SEL_KPL));
2020 setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL,
2021 GSEL(GCODE_SEL, SEL_KPL));
2022 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
2023 GSEL(GCODE_SEL, SEL_KPL));
2024 setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
2025 GSEL(GCODE_SEL, SEL_KPL));
2026 setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL,
2027 GSEL(GCODE_SEL, SEL_KPL));
2028 setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
2029 GSEL(GCODE_SEL, SEL_KPL));
2030 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL,
2031 GSEL(GCODE_SEL, SEL_KPL));
2032 setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
2033 GSEL(GCODE_SEL, SEL_KPL));
2034 setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
2035 GSEL(GCODE_SEL, SEL_KPL));
2036
2037 r_idt.rd_limit = sizeof(idt0) - 1;
2038 r_idt.rd_base = (int) idt;
2039 lidt(&r_idt);
2040
2041 /*
2042 * Initialize the console before we print anything out.
2043 */
2044 cninit();
2045
2046 if (metadata_missing)
2047 printf("WARNING: loader(8) metadata is missing!\n");
2048
2049 #ifdef DEV_ISA
2050 atpic_startup();
2051 #endif
2052
2053 #ifdef DDB
2054 ksym_start = bootinfo.bi_symtab;
2055 ksym_end = bootinfo.bi_esymtab;
2056 #endif
2057
2058 kdb_init();
2059
2060 #ifdef KDB
2061 if (boothowto & RB_KDB)
2062 kdb_enter("Boot flags requested debugger");
2063 #endif
2064
2065 finishidentcpu(); /* Final stage of CPU initialization */
2066 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
2067 GSEL(GCODE_SEL, SEL_KPL));
2068 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
2069 GSEL(GCODE_SEL, SEL_KPL));
2070 initializecpu(); /* Initialize CPU registers */
2071
2072 /* make an initial tss so cpu can get interrupt stack on syscall! */
2073 /* Note: -16 is so we can grow the trapframe if we came from vm86 */
2074 PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
2075 KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
2076 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
2077 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
2078 private_tss = 0;
2079 PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
2080 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
2081 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
2082 ltr(gsel_tss);
2083
2084 /* pointer to selector slot for %fs/%gs */
2085 PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
2086
2087 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
2088 dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
2089 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
2090 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
2091 dblfault_tss.tss_cr3 = (int)IdlePTD;
2092 dblfault_tss.tss_eip = (int)dblfault_handler;
2093 dblfault_tss.tss_eflags = PSL_KERNEL;
2094 dblfault_tss.tss_ds = dblfault_tss.tss_es =
2095 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
2096 dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
2097 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
2098 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
2099
2100 vm86_initialize();
2101 getmemsize(first);
2102 init_param2(physmem);
2103
2104 /* now running on new page tables, configured,and u/iom is accessible */
2105
2106 /* Map the message buffer. */
2107 for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
2108 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
2109
2110 msgbufinit(msgbufp, MSGBUF_SIZE);
2111
2112 /* make a call gate to reenter kernel with */
2113 gdp = &ldt[LSYS5CALLS_SEL].gd;
2114
2115 x = (int) &IDTVEC(lcall_syscall);
2116 gdp->gd_looffset = x;
2117 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
2118 gdp->gd_stkcpy = 1;
2119 gdp->gd_type = SDT_SYS386CGT;
2120 gdp->gd_dpl = SEL_UPL;
2121 gdp->gd_p = 1;
2122 gdp->gd_hioffset = x >> 16;
2123
2124 /* XXX does this work? */
2125 /* XXX yes! */
2126 ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
2127 ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
2128
2129 /* transfer to user mode */
2130
2131 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
2132 _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
2133
2134 /* setup proc 0's pcb */
2135 thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
2136 thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
2137 thread0.td_pcb->pcb_ext = 0;
2138 thread0.td_frame = &proc0_tf;
2139 }
2140
2141 void
2142 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
2143 {
2144
2145 }
2146
2147 void
2148 spinlock_enter(void)
2149 {
2150 struct thread *td;
2151
2152 td = curthread;
2153 if (td->td_md.md_spinlock_count == 0)
2154 td->td_md.md_saved_flags = intr_disable();
2155 td->td_md.md_spinlock_count++;
2156 critical_enter();
2157 }
2158
2159 void
2160 spinlock_exit(void)
2161 {
2162 struct thread *td;
2163
2164 td = curthread;
2165 critical_exit();
2166 td->td_md.md_spinlock_count--;
2167 if (td->td_md.md_spinlock_count == 0)
2168 intr_restore(td->td_md.md_saved_flags);
2169 }
2170
2171 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
2172 static void f00f_hack(void *unused);
2173 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL)
2174
2175 static void
2176 f00f_hack(void *unused)
2177 {
2178 struct gate_descriptor *new_idt;
2179 vm_offset_t tmp;
2180
2181 if (!has_f00f_bug)
2182 return;
2183
2184 GIANT_REQUIRED;
2185
2186 printf("Intel Pentium detected, installing workaround for F00F bug\n");
2187
2188 tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
2189 if (tmp == 0)
2190 panic("kmem_alloc returned 0");
2191
2192 /* Put the problematic entry (#6) at the end of the lower page. */
2193 new_idt = (struct gate_descriptor*)
2194 (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
2195 bcopy(idt, new_idt, sizeof(idt0));
2196 r_idt.rd_base = (u_int)new_idt;
2197 lidt(&r_idt);
2198 idt = new_idt;
2199 if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
2200 VM_PROT_READ, FALSE) != KERN_SUCCESS)
2201 panic("vm_map_protect failed");
2202 }
2203 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
2204
2205 /*
2206 * Construct a PCB from a trapframe. This is called from kdb_trap() where
2207 * we want to start a backtrace from the function that caused us to enter
2208 * the debugger. We have the context in the trapframe, but base the trace
2209 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
2210 * enough for a backtrace.
2211 */
2212 void
2213 makectx(struct trapframe *tf, struct pcb *pcb)
2214 {
2215
2216 pcb->pcb_edi = tf->tf_edi;
2217 pcb->pcb_esi = tf->tf_esi;
2218 pcb->pcb_ebp = tf->tf_ebp;
2219 pcb->pcb_ebx = tf->tf_ebx;
2220 pcb->pcb_eip = tf->tf_eip;
2221 pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
2222 }
2223
2224 int
2225 ptrace_set_pc(struct thread *td, u_long addr)
2226 {
2227
2228 td->td_frame->tf_eip = addr;
2229 return (0);
2230 }
2231
2232 int
2233 ptrace_single_step(struct thread *td)
2234 {
2235 td->td_frame->tf_eflags |= PSL_T;
2236 return (0);
2237 }
2238
2239 int
2240 ptrace_clear_single_step(struct thread *td)
2241 {
2242 td->td_frame->tf_eflags &= ~PSL_T;
2243 return (0);
2244 }
2245
2246 int
2247 fill_regs(struct thread *td, struct reg *regs)
2248 {
2249 struct pcb *pcb;
2250 struct trapframe *tp;
2251
2252 tp = td->td_frame;
2253 pcb = td->td_pcb;
2254 regs->r_fs = tp->tf_fs;
2255 regs->r_es = tp->tf_es;
2256 regs->r_ds = tp->tf_ds;
2257 regs->r_edi = tp->tf_edi;
2258 regs->r_esi = tp->tf_esi;
2259 regs->r_ebp = tp->tf_ebp;
2260 regs->r_ebx = tp->tf_ebx;
2261 regs->r_edx = tp->tf_edx;
2262 regs->r_ecx = tp->tf_ecx;
2263 regs->r_eax = tp->tf_eax;
2264 regs->r_eip = tp->tf_eip;
2265 regs->r_cs = tp->tf_cs;
2266 regs->r_eflags = tp->tf_eflags;
2267 regs->r_esp = tp->tf_esp;
2268 regs->r_ss = tp->tf_ss;
2269 regs->r_gs = pcb->pcb_gs;
2270 return (0);
2271 }
2272
2273 int
2274 set_regs(struct thread *td, struct reg *regs)
2275 {
2276 struct pcb *pcb;
2277 struct trapframe *tp;
2278
2279 tp = td->td_frame;
2280 if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
2281 !CS_SECURE(regs->r_cs))
2282 return (EINVAL);
2283 pcb = td->td_pcb;
2284 tp->tf_fs = regs->r_fs;
2285 tp->tf_es = regs->r_es;
2286 tp->tf_ds = regs->r_ds;
2287 tp->tf_edi = regs->r_edi;
2288 tp->tf_esi = regs->r_esi;
2289 tp->tf_ebp = regs->r_ebp;
2290 tp->tf_ebx = regs->r_ebx;
2291 tp->tf_edx = regs->r_edx;
2292 tp->tf_ecx = regs->r_ecx;
2293 tp->tf_eax = regs->r_eax;
2294 tp->tf_eip = regs->r_eip;
2295 tp->tf_cs = regs->r_cs;
2296 tp->tf_eflags = regs->r_eflags;
2297 tp->tf_esp = regs->r_esp;
2298 tp->tf_ss = regs->r_ss;
2299 pcb->pcb_gs = regs->r_gs;
2300 return (0);
2301 }
2302
2303 #ifdef CPU_ENABLE_SSE
2304 static void
2305 fill_fpregs_xmm(sv_xmm, sv_87)
2306 struct savexmm *sv_xmm;
2307 struct save87 *sv_87;
2308 {
2309 register struct env87 *penv_87 = &sv_87->sv_env;
2310 register struct envxmm *penv_xmm = &sv_xmm->sv_env;
2311 int i;
2312
2313 bzero(sv_87, sizeof(*sv_87));
2314
2315 /* FPU control/status */
2316 penv_87->en_cw = penv_xmm->en_cw;
2317 penv_87->en_sw = penv_xmm->en_sw;
2318 penv_87->en_tw = penv_xmm->en_tw;
2319 penv_87->en_fip = penv_xmm->en_fip;
2320 penv_87->en_fcs = penv_xmm->en_fcs;
2321 penv_87->en_opcode = penv_xmm->en_opcode;
2322 penv_87->en_foo = penv_xmm->en_foo;
2323 penv_87->en_fos = penv_xmm->en_fos;
2324
2325 /* FPU registers */
2326 for (i = 0; i < 8; ++i)
2327 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
2328 }
2329
2330 static void
2331 set_fpregs_xmm(sv_87, sv_xmm)
2332 struct save87 *sv_87;
2333 struct savexmm *sv_xmm;
2334 {
2335 register struct env87 *penv_87 = &sv_87->sv_env;
2336 register struct envxmm *penv_xmm = &sv_xmm->sv_env;
2337 int i;
2338
2339 /* FPU control/status */
2340 penv_xmm->en_cw = penv_87->en_cw;
2341 penv_xmm->en_sw = penv_87->en_sw;
2342 penv_xmm->en_tw = penv_87->en_tw;
2343 penv_xmm->en_fip = penv_87->en_fip;
2344 penv_xmm->en_fcs = penv_87->en_fcs;
2345 penv_xmm->en_opcode = penv_87->en_opcode;
2346 penv_xmm->en_foo = penv_87->en_foo;
2347 penv_xmm->en_fos = penv_87->en_fos;
2348
2349 /* FPU registers */
2350 for (i = 0; i < 8; ++i)
2351 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
2352 }
2353 #endif /* CPU_ENABLE_SSE */
2354
2355 int
2356 fill_fpregs(struct thread *td, struct fpreg *fpregs)
2357 {
2358 #ifdef CPU_ENABLE_SSE
2359 if (cpu_fxsr) {
2360 fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
2361 (struct save87 *)fpregs);
2362 return (0);
2363 }
2364 #endif /* CPU_ENABLE_SSE */
2365 bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
2366 return (0);
2367 }
2368
2369 int
2370 set_fpregs(struct thread *td, struct fpreg *fpregs)
2371 {
2372 #ifdef CPU_ENABLE_SSE
2373 if (cpu_fxsr) {
2374 set_fpregs_xmm((struct save87 *)fpregs,
2375 &td->td_pcb->pcb_save.sv_xmm);
2376 return (0);
2377 }
2378 #endif /* CPU_ENABLE_SSE */
2379 bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
2380 return (0);
2381 }
2382
2383 /*
2384 * Get machine context.
2385 */
2386 int
2387 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
2388 {
2389 struct trapframe *tp;
2390
2391 tp = td->td_frame;
2392
2393 PROC_LOCK(curthread->td_proc);
2394 mcp->mc_onstack = sigonstack(tp->tf_esp);
2395 PROC_UNLOCK(curthread->td_proc);
2396 mcp->mc_gs = td->td_pcb->pcb_gs;
2397 mcp->mc_fs = tp->tf_fs;
2398 mcp->mc_es = tp->tf_es;
2399 mcp->mc_ds = tp->tf_ds;
2400 mcp->mc_edi = tp->tf_edi;
2401 mcp->mc_esi = tp->tf_esi;
2402 mcp->mc_ebp = tp->tf_ebp;
2403 mcp->mc_isp = tp->tf_isp;
2404 if (flags & GET_MC_CLEAR_RET) {
2405 mcp->mc_eax = 0;
2406 mcp->mc_edx = 0;
2407 } else {
2408 mcp->mc_eax = tp->tf_eax;
2409 mcp->mc_edx = tp->tf_edx;
2410 }
2411 mcp->mc_ebx = tp->tf_ebx;
2412 mcp->mc_ecx = tp->tf_ecx;
2413 mcp->mc_eip = tp->tf_eip;
2414 mcp->mc_cs = tp->tf_cs;
2415 mcp->mc_eflags = tp->tf_eflags;
2416 mcp->mc_esp = tp->tf_esp;
2417 mcp->mc_ss = tp->tf_ss;
2418 mcp->mc_len = sizeof(*mcp);
2419 get_fpcontext(td, mcp);
2420 return (0);
2421 }
2422
2423 /*
2424 * Set machine context.
2425 *
2426 * However, we don't set any but the user modifiable flags, and we won't
2427 * touch the cs selector.
2428 */
2429 int
2430 set_mcontext(struct thread *td, const mcontext_t *mcp)
2431 {
2432 struct trapframe *tp;
2433 int eflags, ret;
2434
2435 tp = td->td_frame;
2436 if (mcp->mc_len != sizeof(*mcp))
2437 return (EINVAL);
2438 eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
2439 (tp->tf_eflags & ~PSL_USERCHANGE);
2440 if ((ret = set_fpcontext(td, mcp)) == 0) {
2441 tp->tf_fs = mcp->mc_fs;
2442 tp->tf_es = mcp->mc_es;
2443 tp->tf_ds = mcp->mc_ds;
2444 tp->tf_edi = mcp->mc_edi;
2445 tp->tf_esi = mcp->mc_esi;
2446 tp->tf_ebp = mcp->mc_ebp;
2447 tp->tf_ebx = mcp->mc_ebx;
2448 tp->tf_edx = mcp->mc_edx;
2449 tp->tf_ecx = mcp->mc_ecx;
2450 tp->tf_eax = mcp->mc_eax;
2451 tp->tf_eip = mcp->mc_eip;
2452 tp->tf_eflags = eflags;
2453 tp->tf_esp = mcp->mc_esp;
2454 tp->tf_ss = mcp->mc_ss;
2455 td->td_pcb->pcb_gs = mcp->mc_gs;
2456 ret = 0;
2457 }
2458 return (ret);
2459 }
2460
2461 static void
2462 get_fpcontext(struct thread *td, mcontext_t *mcp)
2463 {
2464 #ifndef DEV_NPX
2465 mcp->mc_fpformat = _MC_FPFMT_NODEV;
2466 mcp->mc_ownedfp = _MC_FPOWNED_NONE;
2467 #else
2468 union savefpu *addr;
2469
2470 /*
2471 * XXX mc_fpstate might be misaligned, since its declaration is not
2472 * unportabilized using __attribute__((aligned(16))) like the
2473 * declaration of struct savemm, and anyway, alignment doesn't work
2474 * for auto variables since we don't use gcc's pessimal stack
2475 * alignment. Work around this by abusing the spare fields after
2476 * mcp->mc_fpstate.
2477 *
2478 * XXX unpessimize most cases by only aligning when fxsave might be
2479 * called, although this requires knowing too much about
2480 * npxgetregs()'s internals.
2481 */
2482 addr = (union savefpu *)&mcp->mc_fpstate;
2483 if (td == PCPU_GET(fpcurthread) &&
2484 #ifdef CPU_ENABLE_SSE
2485 cpu_fxsr &&
2486 #endif
2487 ((uintptr_t)(void *)addr & 0xF)) {
2488 do
2489 addr = (void *)((char *)addr + 4);
2490 while ((uintptr_t)(void *)addr & 0xF);
2491 }
2492 mcp->mc_ownedfp = npxgetregs(td, addr);
2493 if (addr != (union savefpu *)&mcp->mc_fpstate) {
2494 bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
2495 bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
2496 }
2497 mcp->mc_fpformat = npxformat();
2498 #endif
2499 }
2500
2501 static int
2502 set_fpcontext(struct thread *td, const mcontext_t *mcp)
2503 {
2504 union savefpu *addr;
2505
2506 if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
2507 return (0);
2508 else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
2509 mcp->mc_fpformat != _MC_FPFMT_XMM)
2510 return (EINVAL);
2511 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
2512 /* We don't care what state is left in the FPU or PCB. */
2513 fpstate_drop(td);
2514 else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
2515 mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
2516 /* XXX align as above. */
2517 addr = (union savefpu *)&mcp->mc_fpstate;
2518 if (td == PCPU_GET(fpcurthread) &&
2519 #ifdef CPU_ENABLE_SSE
2520 cpu_fxsr &&
2521 #endif
2522 ((uintptr_t)(void *)addr & 0xF)) {
2523 do
2524 addr = (void *)((char *)addr + 4);
2525 while ((uintptr_t)(void *)addr & 0xF);
2526 bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
2527 }
2528 #ifdef DEV_NPX
2529 /*
2530 * XXX we violate the dubious requirement that npxsetregs()
2531 * be called with interrupts disabled.
2532 */
2533 npxsetregs(td, addr);
2534 #endif
2535 /*
2536 * Don't bother putting things back where they were in the
2537 * misaligned case, since we know that the caller won't use
2538 * them again.
2539 */
2540 } else
2541 return (EINVAL);
2542 return (0);
2543 }
2544
2545 static void
2546 fpstate_drop(struct thread *td)
2547 {
2548 register_t s;
2549
2550 s = intr_disable();
2551 #ifdef DEV_NPX
2552 if (PCPU_GET(fpcurthread) == td)
2553 npxdrop();
2554 #endif
2555 /*
2556 * XXX force a full drop of the npx. The above only drops it if we
2557 * owned it. npxgetregs() has the same bug in the !cpu_fxsr case.
2558 *
2559 * XXX I don't much like npxgetregs()'s semantics of doing a full
2560 * drop. Dropping only to the pcb matches fnsave's behaviour.
2561 * We only need to drop to !PCB_INITDONE in sendsig(). But
2562 * sendsig() is the only caller of npxgetregs()... perhaps we just
2563 * have too many layers.
2564 */
2565 curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
2566 intr_restore(s);
2567 }
2568
2569 int
2570 fill_dbregs(struct thread *td, struct dbreg *dbregs)
2571 {
2572 struct pcb *pcb;
2573
2574 if (td == NULL) {
2575 dbregs->dr[0] = rdr0();
2576 dbregs->dr[1] = rdr1();
2577 dbregs->dr[2] = rdr2();
2578 dbregs->dr[3] = rdr3();
2579 dbregs->dr[4] = rdr4();
2580 dbregs->dr[5] = rdr5();
2581 dbregs->dr[6] = rdr6();
2582 dbregs->dr[7] = rdr7();
2583 } else {
2584 pcb = td->td_pcb;
2585 dbregs->dr[0] = pcb->pcb_dr0;
2586 dbregs->dr[1] = pcb->pcb_dr1;
2587 dbregs->dr[2] = pcb->pcb_dr2;
2588 dbregs->dr[3] = pcb->pcb_dr3;
2589 dbregs->dr[4] = 0;
2590 dbregs->dr[5] = 0;
2591 dbregs->dr[6] = pcb->pcb_dr6;
2592 dbregs->dr[7] = pcb->pcb_dr7;
2593 }
2594 return (0);
2595 }
2596
2597 int
2598 set_dbregs(struct thread *td, struct dbreg *dbregs)
2599 {
2600 struct pcb *pcb;
2601 int i;
2602 u_int32_t mask1, mask2;
2603
2604 if (td == NULL) {
2605 load_dr0(dbregs->dr[0]);
2606 load_dr1(dbregs->dr[1]);
2607 load_dr2(dbregs->dr[2]);
2608 load_dr3(dbregs->dr[3]);
2609 load_dr4(dbregs->dr[4]);
2610 load_dr5(dbregs->dr[5]);
2611 load_dr6(dbregs->dr[6]);
2612 load_dr7(dbregs->dr[7]);
2613 } else {
2614 /*
2615 * Don't let an illegal value for dr7 get set. Specifically,
2616 * check for undefined settings. Setting these bit patterns
2617 * result in undefined behaviour and can lead to an unexpected
2618 * TRCTRAP.
2619 */
2620 for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8;
2621 i++, mask1 <<= 2, mask2 <<= 2)
2622 if ((dbregs->dr[7] & mask1) == mask2)
2623 return (EINVAL);
2624
2625 pcb = td->td_pcb;
2626
2627 /*
2628 * Don't let a process set a breakpoint that is not within the
2629 * process's address space. If a process could do this, it
2630 * could halt the system by setting a breakpoint in the kernel
2631 * (if ddb was enabled). Thus, we need to check to make sure
2632 * that no breakpoints are being enabled for addresses outside
2633 * process's address space, unless, perhaps, we were called by
2634 * uid 0.
2635 *
2636 * XXX - what about when the watched area of the user's
2637 * address space is written into from within the kernel
2638 * ... wouldn't that still cause a breakpoint to be generated
2639 * from within kernel mode?
2640 */
2641
2642 if (suser(td) != 0) {
2643 if (dbregs->dr[7] & 0x3) {
2644 /* dr0 is enabled */
2645 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
2646 return (EINVAL);
2647 }
2648
2649 if (dbregs->dr[7] & (0x3<<2)) {
2650 /* dr1 is enabled */
2651 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
2652 return (EINVAL);
2653 }
2654
2655 if (dbregs->dr[7] & (0x3<<4)) {
2656 /* dr2 is enabled */
2657 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
2658 return (EINVAL);
2659 }
2660
2661 if (dbregs->dr[7] & (0x3<<6)) {
2662 /* dr3 is enabled */
2663 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
2664 return (EINVAL);
2665 }
2666 }
2667
2668 pcb->pcb_dr0 = dbregs->dr[0];
2669 pcb->pcb_dr1 = dbregs->dr[1];
2670 pcb->pcb_dr2 = dbregs->dr[2];
2671 pcb->pcb_dr3 = dbregs->dr[3];
2672 pcb->pcb_dr6 = dbregs->dr[6];
2673 pcb->pcb_dr7 = dbregs->dr[7];
2674
2675 pcb->pcb_flags |= PCB_DBREGS;
2676 }
2677
2678 return (0);
2679 }
2680
2681 /*
2682 * Return > 0 if a hardware breakpoint has been hit, and the
2683 * breakpoint was in user space. Return 0, otherwise.
2684 */
2685 int
2686 user_dbreg_trap(void)
2687 {
2688 u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
2689 u_int32_t bp; /* breakpoint bits extracted from dr6 */
2690 int nbp; /* number of breakpoints that triggered */
2691 caddr_t addr[4]; /* breakpoint addresses */
2692 int i;
2693
2694 dr7 = rdr7();
2695 if ((dr7 & 0x000000ff) == 0) {
2696 /*
2697 * all GE and LE bits in the dr7 register are zero,
2698 * thus the trap couldn't have been caused by the
2699 * hardware debug registers
2700 */
2701 return 0;
2702 }
2703
2704 nbp = 0;
2705 dr6 = rdr6();
2706 bp = dr6 & 0x0000000f;
2707
2708 if (!bp) {
2709 /*
2710 * None of the breakpoint bits are set meaning this
2711 * trap was not caused by any of the debug registers
2712 */
2713 return 0;
2714 }
2715
2716 /*
2717 * at least one of the breakpoints were hit, check to see
2718 * which ones and if any of them are user space addresses
2719 */
2720
2721 if (bp & 0x01) {
2722 addr[nbp++] = (caddr_t)rdr0();
2723 }
2724 if (bp & 0x02) {
2725 addr[nbp++] = (caddr_t)rdr1();
2726 }
2727 if (bp & 0x04) {
2728 addr[nbp++] = (caddr_t)rdr2();
2729 }
2730 if (bp & 0x08) {
2731 addr[nbp++] = (caddr_t)rdr3();
2732 }
2733
2734 for (i=0; i<nbp; i++) {
2735 if (addr[i] <
2736 (caddr_t)VM_MAXUSER_ADDRESS) {
2737 /*
2738 * addr[i] is in user space
2739 */
2740 return nbp;
2741 }
2742 }
2743
2744 /*
2745 * None of the breakpoints are in user space.
2746 */
2747 return 0;
2748 }
2749
2750 #ifdef KDB
2751
2752 /*
2753 * Provide inb() and outb() as functions. They are normally only
2754 * available as macros calling inlined functions, thus cannot be
2755 * called from the debugger.
2756 *
2757 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
2758 */
2759
2760 #undef inb
2761 #undef outb
2762
2763 /* silence compiler warnings */
2764 u_char inb(u_int);
2765 void outb(u_int, u_char);
2766
2767 u_char
2768 inb(u_int port)
2769 {
2770 u_char data;
2771 /*
2772 * We use %%dx and not %1 here because i/o is done at %dx and not at
2773 * %edx, while gcc generates inferior code (movw instead of movl)
2774 * if we tell it to load (u_short) port.
2775 */
2776 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
2777 return (data);
2778 }
2779
2780 void
2781 outb(u_int port, u_char data)
2782 {
2783 u_char al;
2784 /*
2785 * Use an unnecessary assignment to help gcc's register allocator.
2786 * This make a large difference for gcc-1.40 and a tiny difference
2787 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
2788 * best results. gcc-2.6.0 can't handle this.
2789 */
2790 al = data;
2791 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
2792 }
2793
2794 #endif /* KDB */
Cache object: 65b3ca19dd5e1968f0027da0b1e755fb
|