FreeBSD/Linux Kernel Cross Reference
sys/i386/isa/npx.c
1 /*-
2 * Copyright (c) 1990 William Jolitz.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include "opt_cpu.h"
37 #include "opt_isa.h"
38 #include "opt_npx.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/bus.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/mutex.h>
48 #include <sys/mutex.h>
49 #include <sys/proc.h>
50 #include <sys/smp.h>
51 #include <sys/sysctl.h>
52 #include <machine/bus.h>
53 #include <sys/rman.h>
54 #ifdef NPX_DEBUG
55 #include <sys/syslog.h>
56 #endif
57 #include <sys/signalvar.h>
58
59 #include <machine/asmacros.h>
60 #include <machine/cputypes.h>
61 #include <machine/frame.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 #include <machine/psl.h>
65 #include <machine/resource.h>
66 #include <machine/specialreg.h>
67 #include <machine/segments.h>
68 #include <machine/ucontext.h>
69
70 #include <machine/intr_machdep.h>
71 #ifdef XEN
72 #include <machine/xen/xen-os.h>
73 #include <xen/hypervisor.h>
74 #endif
75
76 #ifdef DEV_ISA
77 #include <isa/isavar.h>
78 #endif
79
80 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
81 #define CPU_ENABLE_SSE
82 #endif
83
84 /*
85 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
86 */
87
88 #if defined(__GNUCLIKE_ASM) && !defined(lint)
89
90 #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw))
91 #define fnclex() __asm __volatile("fnclex")
92 #define fninit() __asm __volatile("fninit")
93 #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr)))
94 #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
95 #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr)))
96 #define fp_divide_by_0() __asm __volatile( \
97 "fldz; fld1; fdiv %st,%st(1); fnop")
98 #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr)))
99 #ifdef CPU_ENABLE_SSE
100 #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
101 #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
102 #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
103 #endif
104 #else /* !(__GNUCLIKE_ASM && !lint) */
105
106 void fldcw(u_short cw);
107 void fnclex(void);
108 void fninit(void);
109 void fnsave(caddr_t addr);
110 void fnstcw(caddr_t addr);
111 void fnstsw(caddr_t addr);
112 void fp_divide_by_0(void);
113 void frstor(caddr_t addr);
114 #ifdef CPU_ENABLE_SSE
115 void fxsave(caddr_t addr);
116 void fxrstor(caddr_t addr);
117 void stmxcsr(u_int *csr);
118 #endif
119
120 #endif /* __GNUCLIKE_ASM && !lint */
121
122 #ifdef XEN
123 #define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
124 #define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
125 #else
126 #define start_emulating() load_cr0(rcr0() | CR0_TS)
127 #define stop_emulating() clts()
128 #endif
129
130 #ifdef CPU_ENABLE_SSE
131 #define GET_FPU_CW(thread) \
132 (cpu_fxsr ? \
133 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
134 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
135 #define GET_FPU_SW(thread) \
136 (cpu_fxsr ? \
137 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
138 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
139 #define SET_FPU_CW(savefpu, value) do { \
140 if (cpu_fxsr) \
141 (savefpu)->sv_xmm.sv_env.en_cw = (value); \
142 else \
143 (savefpu)->sv_87.sv_env.en_cw = (value); \
144 } while (0)
145 #else /* CPU_ENABLE_SSE */
146 #define GET_FPU_CW(thread) \
147 (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
148 #define GET_FPU_SW(thread) \
149 (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
150 #define SET_FPU_CW(savefpu, value) \
151 (savefpu)->sv_87.sv_env.en_cw = (value)
152 #endif /* CPU_ENABLE_SSE */
153
154 typedef u_char bool_t;
155
156 #ifdef CPU_ENABLE_SSE
157 static void fpu_clean_state(void);
158 #endif
159
160 static void fpusave(union savefpu *);
161 static void fpurstor(union savefpu *);
162 static int npx_attach(device_t dev);
163 static void npx_identify(driver_t *driver, device_t parent);
164 static int npx_probe(device_t dev);
165
166 int hw_float;
167
168 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
169 &hw_float, 0, "Floating point instructions executed in hardware");
170
171 static volatile u_int npx_traps_while_probing;
172 static union savefpu npx_initialstate;
173
174 alias_for_inthand_t probetrap;
175 __asm(" \n\
176 .text \n\
177 .p2align 2,0x90 \n\
178 .type " __XSTRING(CNAME(probetrap)) ",@function \n\
179 " __XSTRING(CNAME(probetrap)) ": \n\
180 ss \n\
181 incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\
182 fnclex \n\
183 iret \n\
184 ");
185
186 /*
187 * Identify routine. Create a connection point on our parent for probing.
188 */
189 static void
190 npx_identify(driver, parent)
191 driver_t *driver;
192 device_t parent;
193 {
194 device_t child;
195
196 child = BUS_ADD_CHILD(parent, 0, "npx", 0);
197 if (child == NULL)
198 panic("npx_identify");
199 }
200
201 /*
202 * Probe routine. Set flags to tell npxattach() what to do. Set up an
203 * interrupt handler if npx needs to use interrupts.
204 */
205 static int
206 npx_probe(device_t dev)
207 {
208 struct gate_descriptor save_idt_npxtrap;
209 u_short control, status;
210
211 device_set_desc(dev, "math processor");
212
213 /*
214 * Modern CPUs all have an FPU that uses the INT16 interface
215 * and provide a simple way to verify that, so handle the
216 * common case right away.
217 */
218 if (cpu_feature & CPUID_FPU) {
219 hw_float = 1;
220 device_quiet(dev);
221 return (0);
222 }
223
224 save_idt_npxtrap = idt[IDT_MF];
225 setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
226 GSEL(GCODE_SEL, SEL_KPL));
227
228 /*
229 * Don't trap while we're probing.
230 */
231 stop_emulating();
232
233 /*
234 * Finish resetting the coprocessor, if any. If there is an error
235 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
236 * it OK. Bogus halts have never been observed, but we enabled
237 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
238 */
239 fninit();
240
241 /*
242 * Don't use fwait here because it might hang.
243 * Don't use fnop here because it usually hangs if there is no FPU.
244 */
245 DELAY(1000); /* wait for any IRQ13 */
246 #ifdef DIAGNOSTIC
247 if (npx_traps_while_probing != 0)
248 printf("fninit caused %u bogus npx trap(s)\n",
249 npx_traps_while_probing);
250 #endif
251 /*
252 * Check for a status of mostly zero.
253 */
254 status = 0x5a5a;
255 fnstsw(&status);
256 if ((status & 0xb8ff) == 0) {
257 /*
258 * Good, now check for a proper control word.
259 */
260 control = 0x5a5a;
261 fnstcw(&control);
262 if ((control & 0x1f3f) == 0x033f) {
263 /*
264 * We have an npx, now divide by 0 to see if exception
265 * 16 works.
266 */
267 control &= ~(1 << 2); /* enable divide by 0 trap */
268 fldcw(control);
269 #ifdef FPU_ERROR_BROKEN
270 /*
271 * FPU error signal doesn't work on some CPU
272 * accelerator board.
273 */
274 hw_float = 1;
275 return (0);
276 #endif
277 npx_traps_while_probing = 0;
278 fp_divide_by_0();
279 if (npx_traps_while_probing != 0) {
280 /*
281 * Good, exception 16 works.
282 */
283 hw_float = 1;
284 goto cleanup;
285 }
286 device_printf(dev,
287 "FPU does not use exception 16 for error reporting\n");
288 goto cleanup;
289 }
290 }
291
292 /*
293 * Probe failed. Floating point simply won't work.
294 * Notify user and disable FPU/MMX/SSE instruction execution.
295 */
296 device_printf(dev, "WARNING: no FPU!\n");
297 __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
298 "n" (CR0_EM | CR0_MP) : "ax");
299
300 cleanup:
301 idt[IDT_MF] = save_idt_npxtrap;
302 return (hw_float ? 0 : ENXIO);
303 }
304
305 /*
306 * Attach routine - announce which it is, and wire into system
307 */
308 static int
309 npx_attach(device_t dev)
310 {
311
312 npxinit();
313 critical_enter();
314 stop_emulating();
315 fpusave(&npx_initialstate);
316 start_emulating();
317 #ifdef CPU_ENABLE_SSE
318 if (cpu_fxsr) {
319 if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask)
320 cpu_mxcsr_mask =
321 npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask;
322 else
323 cpu_mxcsr_mask = 0xFFBF;
324 bzero(npx_initialstate.sv_xmm.sv_fp,
325 sizeof(npx_initialstate.sv_xmm.sv_fp));
326 bzero(npx_initialstate.sv_xmm.sv_xmm,
327 sizeof(npx_initialstate.sv_xmm.sv_xmm));
328 /* XXX might need even more zeroing. */
329 } else
330 #endif
331 bzero(npx_initialstate.sv_87.sv_ac,
332 sizeof(npx_initialstate.sv_87.sv_ac));
333 critical_exit();
334
335 return (0);
336 }
337
338 /*
339 * Initialize floating point unit.
340 */
341 void
342 npxinit(void)
343 {
344 static union savefpu dummy;
345 register_t saveintr;
346 u_short control;
347
348 if (!hw_float)
349 return;
350 /*
351 * fninit has the same h/w bugs as fnsave. Use the detoxified
352 * fnsave to throw away any junk in the fpu. npxsave() initializes
353 * the fpu and sets fpcurthread = NULL as important side effects.
354 *
355 * It is too early for critical_enter() to work on AP.
356 */
357 saveintr = intr_disable();
358 npxsave(&dummy);
359 stop_emulating();
360 #ifdef CPU_ENABLE_SSE
361 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
362 if (cpu_fxsr)
363 fninit();
364 #endif
365 control = __INITIAL_NPXCW__;
366 fldcw(control);
367 start_emulating();
368 intr_restore(saveintr);
369 }
370
371 /*
372 * Free coprocessor (if we have it).
373 */
374 void
375 npxexit(td)
376 struct thread *td;
377 {
378
379 critical_enter();
380 if (curthread == PCPU_GET(fpcurthread))
381 npxsave(curpcb->pcb_save);
382 critical_exit();
383 #ifdef NPX_DEBUG
384 if (hw_float) {
385 u_int masked_exceptions;
386
387 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
388 /*
389 * Log exceptions that would have trapped with the old
390 * control word (overflow, divide by 0, and invalid operand).
391 */
392 if (masked_exceptions & 0x0d)
393 log(LOG_ERR,
394 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
395 td->td_proc->p_pid, td->td_proc->p_comm,
396 masked_exceptions);
397 }
398 #endif
399 }
400
401 int
402 npxformat()
403 {
404
405 if (!hw_float)
406 return (_MC_FPFMT_NODEV);
407 #ifdef CPU_ENABLE_SSE
408 if (cpu_fxsr)
409 return (_MC_FPFMT_XMM);
410 #endif
411 return (_MC_FPFMT_387);
412 }
413
414 /*
415 * The following mechanism is used to ensure that the FPE_... value
416 * that is passed as a trapcode to the signal handler of the user
417 * process does not have more than one bit set.
418 *
419 * Multiple bits may be set if the user process modifies the control
420 * word while a status word bit is already set. While this is a sign
421 * of bad coding, we have no choise than to narrow them down to one
422 * bit, since we must not send a trapcode that is not exactly one of
423 * the FPE_ macros.
424 *
425 * The mechanism has a static table with 127 entries. Each combination
426 * of the 7 FPU status word exception bits directly translates to a
427 * position in this table, where a single FPE_... value is stored.
428 * This FPE_... value stored there is considered the "most important"
429 * of the exception bits and will be sent as the signal code. The
430 * precedence of the bits is based upon Intel Document "Numerical
431 * Applications", Chapter "Special Computational Situations".
432 *
433 * The macro to choose one of these values does these steps: 1) Throw
434 * away status word bits that cannot be masked. 2) Throw away the bits
435 * currently masked in the control word, assuming the user isn't
436 * interested in them anymore. 3) Reinsert status word bit 7 (stack
437 * fault) if it is set, which cannot be masked but must be presered.
438 * 4) Use the remaining bits to point into the trapcode table.
439 *
440 * The 6 maskable bits in order of their preference, as stated in the
441 * above referenced Intel manual:
442 * 1 Invalid operation (FP_X_INV)
443 * 1a Stack underflow
444 * 1b Stack overflow
445 * 1c Operand of unsupported format
446 * 1d SNaN operand.
447 * 2 QNaN operand (not an exception, irrelavant here)
448 * 3 Any other invalid-operation not mentioned above or zero divide
449 * (FP_X_INV, FP_X_DZ)
450 * 4 Denormal operand (FP_X_DNML)
451 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
452 * 6 Inexact result (FP_X_IMP)
453 */
454 static char fpetable[128] = {
455 0,
456 FPE_FLTINV, /* 1 - INV */
457 FPE_FLTUND, /* 2 - DNML */
458 FPE_FLTINV, /* 3 - INV | DNML */
459 FPE_FLTDIV, /* 4 - DZ */
460 FPE_FLTINV, /* 5 - INV | DZ */
461 FPE_FLTDIV, /* 6 - DNML | DZ */
462 FPE_FLTINV, /* 7 - INV | DNML | DZ */
463 FPE_FLTOVF, /* 8 - OFL */
464 FPE_FLTINV, /* 9 - INV | OFL */
465 FPE_FLTUND, /* A - DNML | OFL */
466 FPE_FLTINV, /* B - INV | DNML | OFL */
467 FPE_FLTDIV, /* C - DZ | OFL */
468 FPE_FLTINV, /* D - INV | DZ | OFL */
469 FPE_FLTDIV, /* E - DNML | DZ | OFL */
470 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
471 FPE_FLTUND, /* 10 - UFL */
472 FPE_FLTINV, /* 11 - INV | UFL */
473 FPE_FLTUND, /* 12 - DNML | UFL */
474 FPE_FLTINV, /* 13 - INV | DNML | UFL */
475 FPE_FLTDIV, /* 14 - DZ | UFL */
476 FPE_FLTINV, /* 15 - INV | DZ | UFL */
477 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
478 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
479 FPE_FLTOVF, /* 18 - OFL | UFL */
480 FPE_FLTINV, /* 19 - INV | OFL | UFL */
481 FPE_FLTUND, /* 1A - DNML | OFL | UFL */
482 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
483 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
484 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
485 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
486 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
487 FPE_FLTRES, /* 20 - IMP */
488 FPE_FLTINV, /* 21 - INV | IMP */
489 FPE_FLTUND, /* 22 - DNML | IMP */
490 FPE_FLTINV, /* 23 - INV | DNML | IMP */
491 FPE_FLTDIV, /* 24 - DZ | IMP */
492 FPE_FLTINV, /* 25 - INV | DZ | IMP */
493 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
494 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
495 FPE_FLTOVF, /* 28 - OFL | IMP */
496 FPE_FLTINV, /* 29 - INV | OFL | IMP */
497 FPE_FLTUND, /* 2A - DNML | OFL | IMP */
498 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
499 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
500 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
501 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
502 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
503 FPE_FLTUND, /* 30 - UFL | IMP */
504 FPE_FLTINV, /* 31 - INV | UFL | IMP */
505 FPE_FLTUND, /* 32 - DNML | UFL | IMP */
506 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
507 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
508 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
509 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
510 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
511 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
512 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
513 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
514 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
515 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
516 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
517 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
518 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
519 FPE_FLTSUB, /* 40 - STK */
520 FPE_FLTSUB, /* 41 - INV | STK */
521 FPE_FLTUND, /* 42 - DNML | STK */
522 FPE_FLTSUB, /* 43 - INV | DNML | STK */
523 FPE_FLTDIV, /* 44 - DZ | STK */
524 FPE_FLTSUB, /* 45 - INV | DZ | STK */
525 FPE_FLTDIV, /* 46 - DNML | DZ | STK */
526 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
527 FPE_FLTOVF, /* 48 - OFL | STK */
528 FPE_FLTSUB, /* 49 - INV | OFL | STK */
529 FPE_FLTUND, /* 4A - DNML | OFL | STK */
530 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
531 FPE_FLTDIV, /* 4C - DZ | OFL | STK */
532 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
533 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
534 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
535 FPE_FLTUND, /* 50 - UFL | STK */
536 FPE_FLTSUB, /* 51 - INV | UFL | STK */
537 FPE_FLTUND, /* 52 - DNML | UFL | STK */
538 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
539 FPE_FLTDIV, /* 54 - DZ | UFL | STK */
540 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
541 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
542 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
543 FPE_FLTOVF, /* 58 - OFL | UFL | STK */
544 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
545 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
546 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
547 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
548 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
549 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
550 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
551 FPE_FLTRES, /* 60 - IMP | STK */
552 FPE_FLTSUB, /* 61 - INV | IMP | STK */
553 FPE_FLTUND, /* 62 - DNML | IMP | STK */
554 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
555 FPE_FLTDIV, /* 64 - DZ | IMP | STK */
556 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
557 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
558 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
559 FPE_FLTOVF, /* 68 - OFL | IMP | STK */
560 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
561 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
562 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
563 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
564 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
565 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
566 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
567 FPE_FLTUND, /* 70 - UFL | IMP | STK */
568 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
569 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
570 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
571 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
572 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
573 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
574 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
575 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
576 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
577 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
578 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
579 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
580 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
581 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
582 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
583 };
584
585 /*
586 * Read the FP status and control words, then generate si_code value
587 * for SIGFPE. The error code chosen will be one of the
588 * FPE_... macros. It will be sent as the second argument to old
589 * BSD-style signal handlers and as "siginfo_t->si_code" (second
590 * argument) to SA_SIGINFO signal handlers.
591 *
592 * Some time ago, we cleared the x87 exceptions with FNCLEX there.
593 * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
594 * usermode code which understands the FPU hardware enough to enable
595 * the exceptions, can also handle clearing the exception state in the
596 * handler. The only consequence of not clearing the exception is the
597 * rethrow of the SIGFPE on return from the signal handler and
598 * reexecution of the corresponding instruction.
599 *
600 * For XMM traps, the exceptions were never cleared.
601 */
602 int
603 npxtrap_x87(void)
604 {
605 u_short control, status;
606
607 if (!hw_float) {
608 printf(
609 "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
610 PCPU_GET(fpcurthread), curthread, hw_float);
611 panic("npxtrap from nowhere");
612 }
613 critical_enter();
614
615 /*
616 * Interrupt handling (for another interrupt) may have pushed the
617 * state to memory. Fetch the relevant parts of the state from
618 * wherever they are.
619 */
620 if (PCPU_GET(fpcurthread) != curthread) {
621 control = GET_FPU_CW(curthread);
622 status = GET_FPU_SW(curthread);
623 } else {
624 fnstcw(&control);
625 fnstsw(&status);
626 }
627 critical_exit();
628 return (fpetable[status & ((~control & 0x3f) | 0x40)]);
629 }
630
631 #ifdef CPU_ENABLE_SSE
632 int
633 npxtrap_sse(void)
634 {
635 u_int mxcsr;
636
637 if (!hw_float) {
638 printf(
639 "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
640 PCPU_GET(fpcurthread), curthread, hw_float);
641 panic("npxtrap from nowhere");
642 }
643 critical_enter();
644 if (PCPU_GET(fpcurthread) != curthread)
645 mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
646 else
647 stmxcsr(&mxcsr);
648 critical_exit();
649 return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
650 }
651 #endif
652
653 /*
654 * Implement device not available (DNA) exception
655 *
656 * It would be better to switch FP context here (if curthread != fpcurthread)
657 * and not necessarily for every context switch, but it is too hard to
658 * access foreign pcb's.
659 */
660
661 static int err_count = 0;
662
663 int
664 npxdna(void)
665 {
666
667 if (!hw_float)
668 return (0);
669 critical_enter();
670 if (PCPU_GET(fpcurthread) == curthread) {
671 printf("npxdna: fpcurthread == curthread %d times\n",
672 ++err_count);
673 stop_emulating();
674 critical_exit();
675 return (1);
676 }
677 if (PCPU_GET(fpcurthread) != NULL) {
678 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
679 PCPU_GET(fpcurthread),
680 PCPU_GET(fpcurthread)->td_proc->p_pid,
681 curthread, curthread->td_proc->p_pid);
682 panic("npxdna");
683 }
684 stop_emulating();
685 /*
686 * Record new context early in case frstor causes an IRQ13.
687 */
688 PCPU_SET(fpcurthread, curthread);
689
690 #ifdef CPU_ENABLE_SSE
691 if (cpu_fxsr)
692 fpu_clean_state();
693 #endif
694
695 if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
696 /*
697 * This is the first time this thread has used the FPU or
698 * the PCB doesn't contain a clean FPU state. Explicitly
699 * load an initial state.
700 */
701 fpurstor(&npx_initialstate);
702 if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
703 fldcw(curpcb->pcb_initial_npxcw);
704 curpcb->pcb_flags |= PCB_NPXINITDONE;
705 if (PCB_USER_FPU(curpcb))
706 curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
707 } else {
708 /*
709 * The following fpurstor() may cause an IRQ13 when the
710 * state being restored has a pending error. The error will
711 * appear to have been triggered by the current (npx) user
712 * instruction even when that instruction is a no-wait
713 * instruction that should not trigger an error (e.g.,
714 * fnclex). On at least one 486 system all of the no-wait
715 * instructions are broken the same as frstor, so our
716 * treatment does not amplify the breakage. On at least
717 * one 386/Cyrix 387 system, fnclex works correctly while
718 * frstor and fnsave are broken, so our treatment breaks
719 * fnclex if it is the first FPU instruction after a context
720 * switch.
721 */
722 fpurstor(curpcb->pcb_save);
723 }
724 critical_exit();
725
726 return (1);
727 }
728
729 /*
730 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx
731 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
732 * no-wait npx instructions. See the Intel application note AP-578 for
733 * details. This doesn't cause any additional complications here. IRQ13's
734 * are inherently asynchronous unless the CPU is frozen to deliver them --
735 * one that started in userland may be delivered many instructions later,
736 * after the process has entered the kernel. It may even be delivered after
737 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in
738 * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
739 * it is normally ignored at first because we set fpcurthread to NULL; it is
740 * normally retriggered in npxdna() after return to user mode.
741 *
742 * npxsave() must be called with interrupts disabled, so that it clears
743 * fpcurthread atomically with saving the state. We require callers to do the
744 * disabling, since most callers need to disable interrupts anyway to call
745 * npxsave() atomically with checking fpcurthread.
746 *
747 * A previous version of npxsave() went to great lengths to excecute fnsave
748 * with interrupts enabled in case executing it froze the CPU. This case
749 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply
750 * spurious freezes.
751 */
752 void
753 npxsave(addr)
754 union savefpu *addr;
755 {
756
757 stop_emulating();
758 fpusave(addr);
759
760 start_emulating();
761 PCPU_SET(fpcurthread, NULL);
762 }
763
764 void
765 npxdrop()
766 {
767 struct thread *td;
768
769 /*
770 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
771 * ones don't cause a panic on the next frstor.
772 */
773 #ifdef CPU_ENABLE_SSE
774 if (!cpu_fxsr)
775 #endif
776 fnclex();
777
778 td = PCPU_GET(fpcurthread);
779 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
780 CRITICAL_ASSERT(td);
781 PCPU_SET(fpcurthread, NULL);
782 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
783 start_emulating();
784 }
785
786 /*
787 * Get the user state of the FPU into pcb->pcb_user_save without
788 * dropping ownership (if possible). It returns the FPU ownership
789 * status.
790 */
791 int
792 npxgetregs(struct thread *td)
793 {
794 struct pcb *pcb;
795
796 if (!hw_float)
797 return (_MC_FPOWNED_NONE);
798
799 pcb = td->td_pcb;
800 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
801 bcopy(&npx_initialstate, &pcb->pcb_user_save,
802 sizeof(npx_initialstate));
803 SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw);
804 npxuserinited(td);
805 return (_MC_FPOWNED_PCB);
806 }
807 critical_enter();
808 if (td == PCPU_GET(fpcurthread)) {
809 fpusave(&pcb->pcb_user_save);
810 #ifdef CPU_ENABLE_SSE
811 if (!cpu_fxsr)
812 #endif
813 /*
814 * fnsave initializes the FPU and destroys whatever
815 * context it contains. Make sure the FPU owner
816 * starts with a clean state next time.
817 */
818 npxdrop();
819 critical_exit();
820 return (_MC_FPOWNED_FPU);
821 } else {
822 critical_exit();
823 return (_MC_FPOWNED_PCB);
824 }
825 }
826
827 void
828 npxuserinited(struct thread *td)
829 {
830 struct pcb *pcb;
831
832 pcb = td->td_pcb;
833 if (PCB_USER_FPU(pcb))
834 pcb->pcb_flags |= PCB_NPXINITDONE;
835 pcb->pcb_flags |= PCB_NPXUSERINITDONE;
836 }
837
838
839 void
840 npxsetregs(struct thread *td, union savefpu *addr)
841 {
842 struct pcb *pcb;
843
844 if (!hw_float)
845 return;
846
847 pcb = td->td_pcb;
848 critical_enter();
849 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
850 #ifdef CPU_ENABLE_SSE
851 if (!cpu_fxsr)
852 #endif
853 fnclex(); /* As in npxdrop(). */
854 if (((uintptr_t)addr & 0xf) != 0) {
855 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
856 fpurstor(&pcb->pcb_user_save);
857 } else
858 fpurstor(addr);
859 critical_exit();
860 pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
861 } else {
862 critical_exit();
863 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
864 npxuserinited(td);
865 }
866 }
867
868 static void
869 fpusave(addr)
870 union savefpu *addr;
871 {
872
873 #ifdef CPU_ENABLE_SSE
874 if (cpu_fxsr)
875 fxsave(addr);
876 else
877 #endif
878 fnsave(addr);
879 }
880
881 #ifdef CPU_ENABLE_SSE
882 /*
883 * On AuthenticAMD processors, the fxrstor instruction does not restore
884 * the x87's stored last instruction pointer, last data pointer, and last
885 * opcode values, except in the rare case in which the exception summary
886 * (ES) bit in the x87 status word is set to 1.
887 *
888 * In order to avoid leaking this information across processes, we clean
889 * these values by performing a dummy load before executing fxrstor().
890 */
891 static void
892 fpu_clean_state(void)
893 {
894 static float dummy_variable = 0.0;
895 u_short status;
896
897 /*
898 * Clear the ES bit in the x87 status word if it is currently
899 * set, in order to avoid causing a fault in the upcoming load.
900 */
901 fnstsw(&status);
902 if (status & 0x80)
903 fnclex();
904
905 /*
906 * Load the dummy variable into the x87 stack. This mangles
907 * the x87 stack, but we don't care since we're about to call
908 * fxrstor() anyway.
909 */
910 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
911 }
912 #endif /* CPU_ENABLE_SSE */
913
914 static void
915 fpurstor(addr)
916 union savefpu *addr;
917 {
918
919 #ifdef CPU_ENABLE_SSE
920 if (cpu_fxsr)
921 fxrstor(addr);
922 else
923 #endif
924 frstor(addr);
925 }
926
927 static device_method_t npx_methods[] = {
928 /* Device interface */
929 DEVMETHOD(device_identify, npx_identify),
930 DEVMETHOD(device_probe, npx_probe),
931 DEVMETHOD(device_attach, npx_attach),
932 DEVMETHOD(device_detach, bus_generic_detach),
933 DEVMETHOD(device_shutdown, bus_generic_shutdown),
934 DEVMETHOD(device_suspend, bus_generic_suspend),
935 DEVMETHOD(device_resume, bus_generic_resume),
936
937 { 0, 0 }
938 };
939
940 static driver_t npx_driver = {
941 "npx",
942 npx_methods,
943 1, /* no softc */
944 };
945
946 static devclass_t npx_devclass;
947
948 /*
949 * We prefer to attach to the root nexus so that the usual case (exception 16)
950 * doesn't describe the processor as being `on isa'.
951 */
952 DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
953
954 #ifdef DEV_ISA
955 /*
956 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
957 */
958 static struct isa_pnp_id npxisa_ids[] = {
959 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
960 { 0 }
961 };
962
963 static int
964 npxisa_probe(device_t dev)
965 {
966 int result;
967 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
968 device_quiet(dev);
969 }
970 return(result);
971 }
972
973 static int
974 npxisa_attach(device_t dev)
975 {
976 return (0);
977 }
978
979 static device_method_t npxisa_methods[] = {
980 /* Device interface */
981 DEVMETHOD(device_probe, npxisa_probe),
982 DEVMETHOD(device_attach, npxisa_attach),
983 DEVMETHOD(device_detach, bus_generic_detach),
984 DEVMETHOD(device_shutdown, bus_generic_shutdown),
985 DEVMETHOD(device_suspend, bus_generic_suspend),
986 DEVMETHOD(device_resume, bus_generic_resume),
987
988 { 0, 0 }
989 };
990
991 static driver_t npxisa_driver = {
992 "npxisa",
993 npxisa_methods,
994 1, /* no softc */
995 };
996
997 static devclass_t npxisa_devclass;
998
999 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
1000 #ifndef PC98
1001 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
1002 #endif
1003 #endif /* DEV_ISA */
1004
1005 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
1006 "Kernel contexts for FPU state");
1007
1008 #define XSAVE_AREA_ALIGN 64
1009
1010 #define FPU_KERN_CTX_NPXINITDONE 0x01
1011
1012 struct fpu_kern_ctx {
1013 union savefpu *prev;
1014 uint32_t flags;
1015 char hwstate1[];
1016 };
1017
1018 struct fpu_kern_ctx *
1019 fpu_kern_alloc_ctx(u_int flags)
1020 {
1021 struct fpu_kern_ctx *res;
1022 size_t sz;
1023
1024 sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
1025 sizeof(union savefpu);
1026 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
1027 M_NOWAIT : M_WAITOK) | M_ZERO);
1028 return (res);
1029 }
1030
1031 void
1032 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
1033 {
1034
1035 /* XXXKIB clear the memory ? */
1036 free(ctx, M_FPUKERN_CTX);
1037 }
1038
1039 static union savefpu *
1040 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
1041 {
1042 vm_offset_t p;
1043
1044 p = (vm_offset_t)&ctx->hwstate1;
1045 p = roundup2(p, XSAVE_AREA_ALIGN);
1046 return ((union savefpu *)p);
1047 }
1048
1049 int
1050 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
1051 {
1052 struct pcb *pcb;
1053
1054 pcb = td->td_pcb;
1055 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
1056 ("mangled pcb_save"));
1057 ctx->flags = 0;
1058 if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
1059 ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
1060 npxexit(td);
1061 ctx->prev = pcb->pcb_save;
1062 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
1063 pcb->pcb_flags |= PCB_KERNNPX;
1064 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1065 return (0);
1066 }
1067
1068 int
1069 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
1070 {
1071 struct pcb *pcb;
1072
1073 pcb = td->td_pcb;
1074 critical_enter();
1075 if (curthread == PCPU_GET(fpcurthread))
1076 npxdrop();
1077 critical_exit();
1078 pcb->pcb_save = ctx->prev;
1079 if (pcb->pcb_save == &pcb->pcb_user_save) {
1080 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
1081 pcb->pcb_flags |= PCB_NPXINITDONE;
1082 else
1083 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1084 pcb->pcb_flags &= ~PCB_KERNNPX;
1085 } else {
1086 if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
1087 pcb->pcb_flags |= PCB_NPXINITDONE;
1088 else
1089 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1090 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
1091 }
1092 return (0);
1093 }
1094
1095 int
1096 fpu_kern_thread(u_int flags)
1097 {
1098 struct pcb *pcb;
1099
1100 pcb = curpcb;
1101 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
1102 ("Only kthread may use fpu_kern_thread"));
1103 KASSERT(curpcb->pcb_save == &curpcb->pcb_user_save,
1104 ("mangled pcb_save"));
1105 KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
1106
1107 curpcb->pcb_flags |= PCB_KERNNPX;
1108 return (0);
1109 }
1110
1111 int
1112 is_fpu_kern_thread(u_int flags)
1113 {
1114
1115 if ((curthread->td_pflags & TDP_KTHREAD) == 0)
1116 return (0);
1117 return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);
1118 }
Cache object: c0ced8d739996a0954dceae2718c20f1
|