The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/fpu.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1990 William Jolitz.
    3  * Copyright (c) 1991 The Regents of the University of California.
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 4. Neither the name of the University nor the names of its contributors
   15  *    may be used to endorse or promote products derived from this software
   16  *    without specific prior written permission.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  *      from: @(#)npx.c 7.2 (Berkeley) 5/12/91
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __FBSDID("$FreeBSD$");
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/bus.h>
   39 #include <sys/kernel.h>
   40 #include <sys/lock.h>
   41 #include <sys/malloc.h>
   42 #include <sys/module.h>
   43 #include <sys/mutex.h>
   44 #include <sys/mutex.h>
   45 #include <sys/proc.h>
   46 #include <sys/sysctl.h>
   47 #include <machine/bus.h>
   48 #include <sys/rman.h>
   49 #include <sys/signalvar.h>
   50 
   51 #include <machine/cputypes.h>
   52 #include <machine/frame.h>
   53 #include <machine/intr_machdep.h>
   54 #include <machine/md_var.h>
   55 #include <machine/pcb.h>
   56 #include <machine/psl.h>
   57 #include <machine/resource.h>
   58 #include <machine/specialreg.h>
   59 #include <machine/segments.h>
   60 #include <machine/ucontext.h>
   61 
   62 /*
   63  * Floating point support.
   64  */
   65 
   66 #if defined(__GNUCLIKE_ASM) && !defined(lint)
   67 
   68 #define fldcw(cw)               __asm __volatile("fldcw %0" : : "m" (cw))
   69 #define fnclex()                __asm __volatile("fnclex")
   70 #define fninit()                __asm __volatile("fninit")
   71 #define fnstcw(addr)            __asm __volatile("fnstcw %0" : "=m" (*(addr)))
   72 #define fnstsw(addr)            __asm __volatile("fnstsw %0" : "=am" (*(addr)))
   73 #define fxrstor(addr)           __asm __volatile("fxrstor %0" : : "m" (*(addr)))
   74 #define fxsave(addr)            __asm __volatile("fxsave %0" : "=m" (*(addr)))
   75 #define ldmxcsr(csr)            __asm __volatile("ldmxcsr %0" : : "m" (csr))
   76 #define start_emulating()       __asm __volatile( \
   77                                     "smsw %%ax; orb %0,%%al; lmsw %%ax" \
   78                                     : : "n" (CR0_TS) : "ax")
   79 #define stop_emulating()        __asm __volatile("clts")
   80 
   81 static __inline void
   82 xrstor(char *addr, uint64_t mask)
   83 {
   84         uint32_t low, hi;
   85 
   86         low = mask;
   87         hi = mask >> 32;
   88         /* xrstor (%rdi) */
   89         __asm __volatile(".byte 0x0f,0xae,0x2f" : :
   90             "a" (low), "d" (hi), "D" (addr));
   91 }
   92 
   93 static __inline void
   94 xsave(char *addr, uint64_t mask)
   95 {
   96         uint32_t low, hi;
   97 
   98         low = mask;
   99         hi = mask >> 32;
  100         /* xsave (%rdi) */
  101         __asm __volatile(".byte 0x0f,0xae,0x27" : :
  102             "a" (low), "d" (hi), "D" (addr) : "memory");
  103 }
  104 
  105 static __inline void
  106 xsetbv(uint32_t reg, uint64_t val)
  107 {
  108         uint32_t low, hi;
  109 
  110         low = val;
  111         hi = val >> 32;
  112         __asm __volatile(".byte 0x0f,0x01,0xd1" : :
  113             "c" (reg), "a" (low), "d" (hi));
  114 }
  115 
  116 #else   /* !(__GNUCLIKE_ASM && !lint) */
  117 
  118 void    fldcw(u_short cw);
  119 void    fnclex(void);
  120 void    fninit(void);
  121 void    fnstcw(caddr_t addr);
  122 void    fnstsw(caddr_t addr);
  123 void    fxsave(caddr_t addr);
  124 void    fxrstor(caddr_t addr);
  125 void    ldmxcsr(u_int csr);
  126 void    start_emulating(void);
  127 void    stop_emulating(void);
  128 void    xrstor(char *addr, uint64_t mask);
  129 void    xsave(char *addr, uint64_t mask);
  130 void    xsetbv(uint32_t reg, uint64_t val);
  131 
  132 #endif  /* __GNUCLIKE_ASM && !lint */
  133 
  134 #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
  135 #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
  136 
  137 CTASSERT(sizeof(struct savefpu) == 512);
  138 CTASSERT(sizeof(struct xstate_hdr) == 64);
  139 CTASSERT(sizeof(struct savefpu_ymm) == 832);
  140 
  141 /*
  142  * This requirement is to make it easier for asm code to calculate
  143  * offset of the fpu save area from the pcb address. FPU save area
  144  * must by 64-bytes aligned.
  145  */
  146 CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
  147 
  148 static  void    fpu_clean_state(void);
  149 
  150 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
  151     NULL, 1, "Floating point instructions executed in hardware");
  152 
  153 int use_xsave;                  /* non-static for cpu_switch.S */
  154 uint64_t xsave_mask;            /* the same */
  155 static  struct savefpu *fpu_initialstate;
  156 
  157 struct xsave_area_elm_descr {
  158         u_int   offset;
  159         u_int   size;
  160 } *xsave_area_desc;
  161 
  162 void
  163 fpusave(void *addr)
  164 {
  165 
  166         if (use_xsave)
  167                 xsave((char *)addr, xsave_mask);
  168         else
  169                 fxsave((char *)addr);
  170 }
  171 
  172 static void
  173 fpurestore(void *addr)
  174 {
  175 
  176         if (use_xsave)
  177                 xrstor((char *)addr, xsave_mask);
  178         else
  179                 fxrstor((char *)addr);
  180 }
  181 
  182 void
  183 fpususpend(void *addr)
  184 {
  185         u_long cr0;
  186 
  187         cr0 = rcr0();
  188         stop_emulating();
  189         fpusave(addr);
  190         load_cr0(cr0);
  191 }
  192 
  193 /*
  194  * Enable XSAVE if supported and allowed by user.
  195  * Calculate the xsave_mask.
  196  */
  197 static void
  198 fpuinit_bsp1(void)
  199 {
  200         u_int cp[4];
  201         uint64_t xsave_mask_user;
  202 
  203         if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
  204                 use_xsave = 1;
  205                 TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
  206         }
  207         if (!use_xsave)
  208                 return;
  209 
  210         cpuid_count(0xd, 0x0, cp);
  211         xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
  212         if ((cp[0] & xsave_mask) != xsave_mask)
  213                 panic("CPU0 does not support X87 or SSE: %x", cp[0]);
  214         xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
  215         xsave_mask_user = xsave_mask;
  216         TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
  217         xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
  218         xsave_mask &= xsave_mask_user;
  219 
  220         cpuid_count(0xd, 0x1, cp);
  221         if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
  222                 /*
  223                  * Patch the XSAVE instruction in the cpu_switch code
  224                  * to XSAVEOPT.  We assume that XSAVE encoding used
  225                  * REX byte, and set the bit 4 of the r/m byte.
  226                  */
  227                 ctx_switch_xsave[3] |= 0x10;
  228         }
  229 }
  230 
  231 /*
  232  * Calculate the fpu save area size.
  233  */
  234 static void
  235 fpuinit_bsp2(void)
  236 {
  237         u_int cp[4];
  238 
  239         if (use_xsave) {
  240                 cpuid_count(0xd, 0x0, cp);
  241                 cpu_max_ext_state_size = cp[1];
  242 
  243                 /*
  244                  * Reload the cpu_feature2, since we enabled OSXSAVE.
  245                  */
  246                 do_cpuid(1, cp);
  247                 cpu_feature2 = cp[2];
  248         } else
  249                 cpu_max_ext_state_size = sizeof(struct savefpu);
  250 }
  251 
  252 /*
  253  * Initialize the floating point unit.
  254  */
  255 void
  256 fpuinit(void)
  257 {
  258         register_t saveintr;
  259         u_int mxcsr;
  260         u_short control;
  261 
  262         if (IS_BSP())
  263                 fpuinit_bsp1();
  264 
  265         if (use_xsave) {
  266                 load_cr4(rcr4() | CR4_XSAVE);
  267                 xsetbv(XCR0, xsave_mask);
  268         }
  269 
  270         /*
  271          * XCR0 shall be set up before CPU can report the save area size.
  272          */
  273         if (IS_BSP())
  274                 fpuinit_bsp2();
  275 
  276         /*
  277          * It is too early for critical_enter() to work on AP.
  278          */
  279         saveintr = intr_disable();
  280         stop_emulating();
  281         fninit();
  282         control = __INITIAL_FPUCW__;
  283         fldcw(control);
  284         mxcsr = __INITIAL_MXCSR__;
  285         ldmxcsr(mxcsr);
  286         start_emulating();
  287         intr_restore(saveintr);
  288 }
  289 
  290 /*
  291  * On the boot CPU we generate a clean state that is used to
  292  * initialize the floating point unit when it is first used by a
  293  * process.
  294  */
  295 static void
  296 fpuinitstate(void *arg __unused)
  297 {
  298         register_t saveintr;
  299         int cp[4], i, max_ext_n;
  300 
  301         fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
  302             M_WAITOK | M_ZERO);
  303         saveintr = intr_disable();
  304         stop_emulating();
  305 
  306         fpusave(fpu_initialstate);
  307         if (fpu_initialstate->sv_env.en_mxcsr_mask)
  308                 cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask;
  309         else
  310                 cpu_mxcsr_mask = 0xFFBF;
  311 
  312         /*
  313          * The fninit instruction does not modify XMM registers.  The
  314          * fpusave call dumped the garbage contained in the registers
  315          * after reset to the initial state saved.  Clear XMM
  316          * registers file image to make the startup program state and
  317          * signal handler XMM register content predictable.
  318          */
  319         bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
  320 
  321         /*
  322          * Create a table describing the layout of the CPU Extended
  323          * Save Area.
  324          */
  325         if (use_xsave) {
  326                 max_ext_n = flsl(xsave_mask);
  327                 xsave_area_desc = malloc(max_ext_n * sizeof(struct
  328                     xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
  329                 /* x87 state */
  330                 xsave_area_desc[0].offset = 0;
  331                 xsave_area_desc[0].size = 160;
  332                 /* XMM */
  333                 xsave_area_desc[1].offset = 160;
  334                 xsave_area_desc[1].size = 288 - 160;
  335 
  336                 for (i = 2; i < max_ext_n; i++) {
  337                         cpuid_count(0xd, i, cp);
  338                         xsave_area_desc[i].offset = cp[1];
  339                         xsave_area_desc[i].size = cp[0];
  340                 }
  341         }
  342 
  343         start_emulating();
  344         intr_restore(saveintr);
  345 }
  346 SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL);
  347 
  348 /*
  349  * Free coprocessor (if we have it).
  350  */
  351 void
  352 fpuexit(struct thread *td)
  353 {
  354 
  355         critical_enter();
  356         if (curthread == PCPU_GET(fpcurthread)) {
  357                 stop_emulating();
  358                 fpusave(PCPU_GET(curpcb)->pcb_save);
  359                 start_emulating();
  360                 PCPU_SET(fpcurthread, 0);
  361         }
  362         critical_exit();
  363 }
  364 
  365 int
  366 fpuformat()
  367 {
  368 
  369         return (_MC_FPFMT_XMM);
  370 }
  371 
  372 /* 
  373  * The following mechanism is used to ensure that the FPE_... value
  374  * that is passed as a trapcode to the signal handler of the user
  375  * process does not have more than one bit set.
  376  * 
  377  * Multiple bits may be set if the user process modifies the control
  378  * word while a status word bit is already set.  While this is a sign
  379  * of bad coding, we have no choise than to narrow them down to one
  380  * bit, since we must not send a trapcode that is not exactly one of
  381  * the FPE_ macros.
  382  *
  383  * The mechanism has a static table with 127 entries.  Each combination
  384  * of the 7 FPU status word exception bits directly translates to a
  385  * position in this table, where a single FPE_... value is stored.
  386  * This FPE_... value stored there is considered the "most important"
  387  * of the exception bits and will be sent as the signal code.  The
  388  * precedence of the bits is based upon Intel Document "Numerical
  389  * Applications", Chapter "Special Computational Situations".
  390  *
  391  * The macro to choose one of these values does these steps: 1) Throw
  392  * away status word bits that cannot be masked.  2) Throw away the bits
  393  * currently masked in the control word, assuming the user isn't
  394  * interested in them anymore.  3) Reinsert status word bit 7 (stack
  395  * fault) if it is set, which cannot be masked but must be presered.
  396  * 4) Use the remaining bits to point into the trapcode table.
  397  *
  398  * The 6 maskable bits in order of their preference, as stated in the
  399  * above referenced Intel manual:
  400  * 1  Invalid operation (FP_X_INV)
  401  * 1a   Stack underflow
  402  * 1b   Stack overflow
  403  * 1c   Operand of unsupported format
  404  * 1d   SNaN operand.
  405  * 2  QNaN operand (not an exception, irrelavant here)
  406  * 3  Any other invalid-operation not mentioned above or zero divide
  407  *      (FP_X_INV, FP_X_DZ)
  408  * 4  Denormal operand (FP_X_DNML)
  409  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
  410  * 6  Inexact result (FP_X_IMP) 
  411  */
  412 static char fpetable[128] = {
  413         0,
  414         FPE_FLTINV,     /*  1 - INV */
  415         FPE_FLTUND,     /*  2 - DNML */
  416         FPE_FLTINV,     /*  3 - INV | DNML */
  417         FPE_FLTDIV,     /*  4 - DZ */
  418         FPE_FLTINV,     /*  5 - INV | DZ */
  419         FPE_FLTDIV,     /*  6 - DNML | DZ */
  420         FPE_FLTINV,     /*  7 - INV | DNML | DZ */
  421         FPE_FLTOVF,     /*  8 - OFL */
  422         FPE_FLTINV,     /*  9 - INV | OFL */
  423         FPE_FLTUND,     /*  A - DNML | OFL */
  424         FPE_FLTINV,     /*  B - INV | DNML | OFL */
  425         FPE_FLTDIV,     /*  C - DZ | OFL */
  426         FPE_FLTINV,     /*  D - INV | DZ | OFL */
  427         FPE_FLTDIV,     /*  E - DNML | DZ | OFL */
  428         FPE_FLTINV,     /*  F - INV | DNML | DZ | OFL */
  429         FPE_FLTUND,     /* 10 - UFL */
  430         FPE_FLTINV,     /* 11 - INV | UFL */
  431         FPE_FLTUND,     /* 12 - DNML | UFL */
  432         FPE_FLTINV,     /* 13 - INV | DNML | UFL */
  433         FPE_FLTDIV,     /* 14 - DZ | UFL */
  434         FPE_FLTINV,     /* 15 - INV | DZ | UFL */
  435         FPE_FLTDIV,     /* 16 - DNML | DZ | UFL */
  436         FPE_FLTINV,     /* 17 - INV | DNML | DZ | UFL */
  437         FPE_FLTOVF,     /* 18 - OFL | UFL */
  438         FPE_FLTINV,     /* 19 - INV | OFL | UFL */
  439         FPE_FLTUND,     /* 1A - DNML | OFL | UFL */
  440         FPE_FLTINV,     /* 1B - INV | DNML | OFL | UFL */
  441         FPE_FLTDIV,     /* 1C - DZ | OFL | UFL */
  442         FPE_FLTINV,     /* 1D - INV | DZ | OFL | UFL */
  443         FPE_FLTDIV,     /* 1E - DNML | DZ | OFL | UFL */
  444         FPE_FLTINV,     /* 1F - INV | DNML | DZ | OFL | UFL */
  445         FPE_FLTRES,     /* 20 - IMP */
  446         FPE_FLTINV,     /* 21 - INV | IMP */
  447         FPE_FLTUND,     /* 22 - DNML | IMP */
  448         FPE_FLTINV,     /* 23 - INV | DNML | IMP */
  449         FPE_FLTDIV,     /* 24 - DZ | IMP */
  450         FPE_FLTINV,     /* 25 - INV | DZ | IMP */
  451         FPE_FLTDIV,     /* 26 - DNML | DZ | IMP */
  452         FPE_FLTINV,     /* 27 - INV | DNML | DZ | IMP */
  453         FPE_FLTOVF,     /* 28 - OFL | IMP */
  454         FPE_FLTINV,     /* 29 - INV | OFL | IMP */
  455         FPE_FLTUND,     /* 2A - DNML | OFL | IMP */
  456         FPE_FLTINV,     /* 2B - INV | DNML | OFL | IMP */
  457         FPE_FLTDIV,     /* 2C - DZ | OFL | IMP */
  458         FPE_FLTINV,     /* 2D - INV | DZ | OFL | IMP */
  459         FPE_FLTDIV,     /* 2E - DNML | DZ | OFL | IMP */
  460         FPE_FLTINV,     /* 2F - INV | DNML | DZ | OFL | IMP */
  461         FPE_FLTUND,     /* 30 - UFL | IMP */
  462         FPE_FLTINV,     /* 31 - INV | UFL | IMP */
  463         FPE_FLTUND,     /* 32 - DNML | UFL | IMP */
  464         FPE_FLTINV,     /* 33 - INV | DNML | UFL | IMP */
  465         FPE_FLTDIV,     /* 34 - DZ | UFL | IMP */
  466         FPE_FLTINV,     /* 35 - INV | DZ | UFL | IMP */
  467         FPE_FLTDIV,     /* 36 - DNML | DZ | UFL | IMP */
  468         FPE_FLTINV,     /* 37 - INV | DNML | DZ | UFL | IMP */
  469         FPE_FLTOVF,     /* 38 - OFL | UFL | IMP */
  470         FPE_FLTINV,     /* 39 - INV | OFL | UFL | IMP */
  471         FPE_FLTUND,     /* 3A - DNML | OFL | UFL | IMP */
  472         FPE_FLTINV,     /* 3B - INV | DNML | OFL | UFL | IMP */
  473         FPE_FLTDIV,     /* 3C - DZ | OFL | UFL | IMP */
  474         FPE_FLTINV,     /* 3D - INV | DZ | OFL | UFL | IMP */
  475         FPE_FLTDIV,     /* 3E - DNML | DZ | OFL | UFL | IMP */
  476         FPE_FLTINV,     /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
  477         FPE_FLTSUB,     /* 40 - STK */
  478         FPE_FLTSUB,     /* 41 - INV | STK */
  479         FPE_FLTUND,     /* 42 - DNML | STK */
  480         FPE_FLTSUB,     /* 43 - INV | DNML | STK */
  481         FPE_FLTDIV,     /* 44 - DZ | STK */
  482         FPE_FLTSUB,     /* 45 - INV | DZ | STK */
  483         FPE_FLTDIV,     /* 46 - DNML | DZ | STK */
  484         FPE_FLTSUB,     /* 47 - INV | DNML | DZ | STK */
  485         FPE_FLTOVF,     /* 48 - OFL | STK */
  486         FPE_FLTSUB,     /* 49 - INV | OFL | STK */
  487         FPE_FLTUND,     /* 4A - DNML | OFL | STK */
  488         FPE_FLTSUB,     /* 4B - INV | DNML | OFL | STK */
  489         FPE_FLTDIV,     /* 4C - DZ | OFL | STK */
  490         FPE_FLTSUB,     /* 4D - INV | DZ | OFL | STK */
  491         FPE_FLTDIV,     /* 4E - DNML | DZ | OFL | STK */
  492         FPE_FLTSUB,     /* 4F - INV | DNML | DZ | OFL | STK */
  493         FPE_FLTUND,     /* 50 - UFL | STK */
  494         FPE_FLTSUB,     /* 51 - INV | UFL | STK */
  495         FPE_FLTUND,     /* 52 - DNML | UFL | STK */
  496         FPE_FLTSUB,     /* 53 - INV | DNML | UFL | STK */
  497         FPE_FLTDIV,     /* 54 - DZ | UFL | STK */
  498         FPE_FLTSUB,     /* 55 - INV | DZ | UFL | STK */
  499         FPE_FLTDIV,     /* 56 - DNML | DZ | UFL | STK */
  500         FPE_FLTSUB,     /* 57 - INV | DNML | DZ | UFL | STK */
  501         FPE_FLTOVF,     /* 58 - OFL | UFL | STK */
  502         FPE_FLTSUB,     /* 59 - INV | OFL | UFL | STK */
  503         FPE_FLTUND,     /* 5A - DNML | OFL | UFL | STK */
  504         FPE_FLTSUB,     /* 5B - INV | DNML | OFL | UFL | STK */
  505         FPE_FLTDIV,     /* 5C - DZ | OFL | UFL | STK */
  506         FPE_FLTSUB,     /* 5D - INV | DZ | OFL | UFL | STK */
  507         FPE_FLTDIV,     /* 5E - DNML | DZ | OFL | UFL | STK */
  508         FPE_FLTSUB,     /* 5F - INV | DNML | DZ | OFL | UFL | STK */
  509         FPE_FLTRES,     /* 60 - IMP | STK */
  510         FPE_FLTSUB,     /* 61 - INV | IMP | STK */
  511         FPE_FLTUND,     /* 62 - DNML | IMP | STK */
  512         FPE_FLTSUB,     /* 63 - INV | DNML | IMP | STK */
  513         FPE_FLTDIV,     /* 64 - DZ | IMP | STK */
  514         FPE_FLTSUB,     /* 65 - INV | DZ | IMP | STK */
  515         FPE_FLTDIV,     /* 66 - DNML | DZ | IMP | STK */
  516         FPE_FLTSUB,     /* 67 - INV | DNML | DZ | IMP | STK */
  517         FPE_FLTOVF,     /* 68 - OFL | IMP | STK */
  518         FPE_FLTSUB,     /* 69 - INV | OFL | IMP | STK */
  519         FPE_FLTUND,     /* 6A - DNML | OFL | IMP | STK */
  520         FPE_FLTSUB,     /* 6B - INV | DNML | OFL | IMP | STK */
  521         FPE_FLTDIV,     /* 6C - DZ | OFL | IMP | STK */
  522         FPE_FLTSUB,     /* 6D - INV | DZ | OFL | IMP | STK */
  523         FPE_FLTDIV,     /* 6E - DNML | DZ | OFL | IMP | STK */
  524         FPE_FLTSUB,     /* 6F - INV | DNML | DZ | OFL | IMP | STK */
  525         FPE_FLTUND,     /* 70 - UFL | IMP | STK */
  526         FPE_FLTSUB,     /* 71 - INV | UFL | IMP | STK */
  527         FPE_FLTUND,     /* 72 - DNML | UFL | IMP | STK */
  528         FPE_FLTSUB,     /* 73 - INV | DNML | UFL | IMP | STK */
  529         FPE_FLTDIV,     /* 74 - DZ | UFL | IMP | STK */
  530         FPE_FLTSUB,     /* 75 - INV | DZ | UFL | IMP | STK */
  531         FPE_FLTDIV,     /* 76 - DNML | DZ | UFL | IMP | STK */
  532         FPE_FLTSUB,     /* 77 - INV | DNML | DZ | UFL | IMP | STK */
  533         FPE_FLTOVF,     /* 78 - OFL | UFL | IMP | STK */
  534         FPE_FLTSUB,     /* 79 - INV | OFL | UFL | IMP | STK */
  535         FPE_FLTUND,     /* 7A - DNML | OFL | UFL | IMP | STK */
  536         FPE_FLTSUB,     /* 7B - INV | DNML | OFL | UFL | IMP | STK */
  537         FPE_FLTDIV,     /* 7C - DZ | OFL | UFL | IMP | STK */
  538         FPE_FLTSUB,     /* 7D - INV | DZ | OFL | UFL | IMP | STK */
  539         FPE_FLTDIV,     /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
  540         FPE_FLTSUB,     /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
  541 };
  542 
  543 /*
  544  * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
  545  *
  546  * Clearing exceptions is necessary mainly to avoid IRQ13 bugs.  We now
  547  * depend on longjmp() restoring a usable state.  Restoring the state
  548  * or examining it might fail if we didn't clear exceptions.
  549  *
  550  * The error code chosen will be one of the FPE_... macros. It will be
  551  * sent as the second argument to old BSD-style signal handlers and as
  552  * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
  553  *
  554  * XXX the FP state is not preserved across signal handlers.  So signal
  555  * handlers cannot afford to do FP unless they preserve the state or
  556  * longjmp() out.  Both preserving the state and longjmp()ing may be
  557  * destroyed by IRQ13 bugs.  Clearing FP exceptions is not an acceptable
  558  * solution for signals other than SIGFPE.
  559  */
  560 int
  561 fputrap()
  562 {
  563         u_short control, status;
  564 
  565         critical_enter();
  566 
  567         /*
  568          * Interrupt handling (for another interrupt) may have pushed the
  569          * state to memory.  Fetch the relevant parts of the state from
  570          * wherever they are.
  571          */
  572         if (PCPU_GET(fpcurthread) != curthread) {
  573                 control = GET_FPU_CW(curthread);
  574                 status = GET_FPU_SW(curthread);
  575         } else {
  576                 fnstcw(&control);
  577                 fnstsw(&status);
  578         }
  579 
  580         if (PCPU_GET(fpcurthread) == curthread)
  581                 fnclex();
  582         critical_exit();
  583         return (fpetable[status & ((~control & 0x3f) | 0x40)]);
  584 }
  585 
  586 /*
  587  * Implement device not available (DNA) exception
  588  *
  589  * It would be better to switch FP context here (if curthread != fpcurthread)
  590  * and not necessarily for every context switch, but it is too hard to
  591  * access foreign pcb's.
  592  */
  593 
  594 static int err_count = 0;
  595 
  596 void
  597 fpudna(void)
  598 {
  599         struct pcb *pcb;
  600 
  601         critical_enter();
  602         if (PCPU_GET(fpcurthread) == curthread) {
  603                 printf("fpudna: fpcurthread == curthread %d times\n",
  604                     ++err_count);
  605                 stop_emulating();
  606                 critical_exit();
  607                 return;
  608         }
  609         if (PCPU_GET(fpcurthread) != NULL) {
  610                 printf("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
  611                        PCPU_GET(fpcurthread),
  612                        PCPU_GET(fpcurthread)->td_proc->p_pid,
  613                        curthread, curthread->td_proc->p_pid);
  614                 panic("fpudna");
  615         }
  616         stop_emulating();
  617         /*
  618          * Record new context early in case frstor causes a trap.
  619          */
  620         PCPU_SET(fpcurthread, curthread);
  621         pcb = PCPU_GET(curpcb);
  622 
  623         fpu_clean_state();
  624 
  625         if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
  626                 /*
  627                  * This is the first time this thread has used the FPU or
  628                  * the PCB doesn't contain a clean FPU state.  Explicitly
  629                  * load an initial state.
  630                  *
  631                  * We prefer to restore the state from the actual save
  632                  * area in PCB instead of directly loading from
  633                  * fpu_initialstate, to ignite the XSAVEOPT
  634                  * tracking engine.
  635                  */
  636                 bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
  637                 fpurestore(pcb->pcb_save);
  638                 if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
  639                         fldcw(pcb->pcb_initial_fpucw);
  640                 if (PCB_USER_FPU(pcb))
  641                         set_pcb_flags(pcb,
  642                             PCB_FPUINITDONE | PCB_USERFPUINITDONE);
  643                 else
  644                         set_pcb_flags(pcb, PCB_FPUINITDONE);
  645         } else
  646                 fpurestore(pcb->pcb_save);
  647         critical_exit();
  648 }
  649 
  650 void
  651 fpudrop()
  652 {
  653         struct thread *td;
  654 
  655         td = PCPU_GET(fpcurthread);
  656         KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
  657         CRITICAL_ASSERT(td);
  658         PCPU_SET(fpcurthread, NULL);
  659         clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE);
  660         start_emulating();
  661 }
  662 
  663 /*
  664  * Get the user state of the FPU into pcb->pcb_user_save without
  665  * dropping ownership (if possible).  It returns the FPU ownership
  666  * status.
  667  */
  668 int
  669 fpugetregs(struct thread *td)
  670 {
  671         struct pcb *pcb;
  672         uint64_t *xstate_bv, bit;
  673         char *sa;
  674         int max_ext_n, i, owned;
  675 
  676         pcb = td->td_pcb;
  677         if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
  678                 bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb),
  679                     cpu_max_ext_state_size);
  680                 get_pcb_user_save_pcb(pcb)->sv_env.en_cw =
  681                     pcb->pcb_initial_fpucw;
  682                 fpuuserinited(td);
  683                 return (_MC_FPOWNED_PCB);
  684         }
  685         critical_enter();
  686         if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
  687                 fpusave(get_pcb_user_save_pcb(pcb));
  688                 owned = _MC_FPOWNED_FPU;
  689         } else {
  690                 owned = _MC_FPOWNED_PCB;
  691         }
  692         critical_exit();
  693         if (use_xsave) {
  694                 /*
  695                  * Handle partially saved state.
  696                  */
  697                 sa = (char *)get_pcb_user_save_pcb(pcb);
  698                 xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
  699                     offsetof(struct xstate_hdr, xstate_bv));
  700                 max_ext_n = flsl(xsave_mask);
  701                 for (i = 0; i < max_ext_n; i++) {
  702                         bit = 1ULL << i;
  703                         if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0)
  704                                 continue;
  705                         bcopy((char *)fpu_initialstate +
  706                             xsave_area_desc[i].offset,
  707                             sa + xsave_area_desc[i].offset,
  708                             xsave_area_desc[i].size);
  709                         *xstate_bv |= bit;
  710                 }
  711         }
  712         return (owned);
  713 }
  714 
  715 void
  716 fpuuserinited(struct thread *td)
  717 {
  718         struct pcb *pcb;
  719 
  720         pcb = td->td_pcb;
  721         if (PCB_USER_FPU(pcb))
  722                 set_pcb_flags(pcb,
  723                     PCB_FPUINITDONE | PCB_USERFPUINITDONE);
  724         else
  725                 set_pcb_flags(pcb, PCB_FPUINITDONE);
  726 }
  727 
  728 int
  729 fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
  730 {
  731         struct xstate_hdr *hdr, *ehdr;
  732         size_t len, max_len;
  733         uint64_t bv;
  734 
  735         /* XXXKIB should we clear all extended state in xstate_bv instead ? */
  736         if (xfpustate == NULL)
  737                 return (0);
  738         if (!use_xsave)
  739                 return (EOPNOTSUPP);
  740 
  741         len = xfpustate_size;
  742         if (len < sizeof(struct xstate_hdr))
  743                 return (EINVAL);
  744         max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
  745         if (len > max_len)
  746                 return (EINVAL);
  747 
  748         ehdr = (struct xstate_hdr *)xfpustate;
  749         bv = ehdr->xstate_bv;
  750 
  751         /*
  752          * Avoid #gp.
  753          */
  754         if (bv & ~xsave_mask)
  755                 return (EINVAL);
  756 
  757         hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
  758 
  759         hdr->xstate_bv = bv;
  760         bcopy(xfpustate + sizeof(struct xstate_hdr),
  761             (char *)(hdr + 1), len - sizeof(struct xstate_hdr));
  762 
  763         return (0);
  764 }
  765 
  766 /*
  767  * Set the state of the FPU.
  768  */
  769 int
  770 fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate,
  771     size_t xfpustate_size)
  772 {
  773         struct pcb *pcb;
  774         int error;
  775 
  776         pcb = td->td_pcb;
  777         critical_enter();
  778         if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
  779                 error = fpusetxstate(td, xfpustate, xfpustate_size);
  780                 if (error != 0) {
  781                         critical_exit();
  782                         return (error);
  783                 }
  784                 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
  785                 fpurestore(get_pcb_user_save_td(td));
  786                 critical_exit();
  787                 set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE);
  788         } else {
  789                 critical_exit();
  790                 error = fpusetxstate(td, xfpustate, xfpustate_size);
  791                 if (error != 0)
  792                         return (error);
  793                 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
  794                 fpuuserinited(td);
  795         }
  796         return (0);
  797 }
  798 
  799 /*
  800  * On AuthenticAMD processors, the fxrstor instruction does not restore
  801  * the x87's stored last instruction pointer, last data pointer, and last
  802  * opcode values, except in the rare case in which the exception summary
  803  * (ES) bit in the x87 status word is set to 1.
  804  *
  805  * In order to avoid leaking this information across processes, we clean
  806  * these values by performing a dummy load before executing fxrstor().
  807  */
  808 static void
  809 fpu_clean_state(void)
  810 {
  811         static float dummy_variable = 0.0;
  812         u_short status;
  813 
  814         /*
  815          * Clear the ES bit in the x87 status word if it is currently
  816          * set, in order to avoid causing a fault in the upcoming load.
  817          */
  818         fnstsw(&status);
  819         if (status & 0x80)
  820                 fnclex();
  821 
  822         /*
  823          * Load the dummy variable into the x87 stack.  This mangles
  824          * the x87 stack, but we don't care since we're about to call
  825          * fxrstor() anyway.
  826          */
  827         __asm __volatile("ffree %%st(7); fld %0" : : "m" (dummy_variable));
  828 }
  829 
  830 /*
  831  * This really sucks.  We want the acpi version only, but it requires
  832  * the isa_if.h file in order to get the definitions.
  833  */
  834 #include "opt_isa.h"
  835 #ifdef DEV_ISA
  836 #include <isa/isavar.h>
  837 /*
  838  * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
  839  */
  840 static struct isa_pnp_id fpupnp_ids[] = {
  841         { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
  842         { 0 }
  843 };
  844 
  845 static int
  846 fpupnp_probe(device_t dev)
  847 {
  848         int result;
  849 
  850         result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids);
  851         if (result <= 0)
  852                 device_quiet(dev);
  853         return (result);
  854 }
  855 
  856 static int
  857 fpupnp_attach(device_t dev)
  858 {
  859 
  860         return (0);
  861 }
  862 
  863 static device_method_t fpupnp_methods[] = {
  864         /* Device interface */
  865         DEVMETHOD(device_probe,         fpupnp_probe),
  866         DEVMETHOD(device_attach,        fpupnp_attach),
  867         DEVMETHOD(device_detach,        bus_generic_detach),
  868         DEVMETHOD(device_shutdown,      bus_generic_shutdown),
  869         DEVMETHOD(device_suspend,       bus_generic_suspend),
  870         DEVMETHOD(device_resume,        bus_generic_resume),
  871         
  872         { 0, 0 }
  873 };
  874 
  875 static driver_t fpupnp_driver = {
  876         "fpupnp",
  877         fpupnp_methods,
  878         1,                      /* no softc */
  879 };
  880 
  881 static devclass_t fpupnp_devclass;
  882 
  883 DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0);
  884 #endif  /* DEV_ISA */
  885 
  886 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
  887     "Kernel contexts for FPU state");
  888 
  889 #define FPU_KERN_CTX_FPUINITDONE 0x01
  890 
  891 struct fpu_kern_ctx {
  892         struct savefpu *prev;
  893         uint32_t flags;
  894         char hwstate1[];
  895 };
  896 
  897 struct fpu_kern_ctx *
  898 fpu_kern_alloc_ctx(u_int flags)
  899 {
  900         struct fpu_kern_ctx *res;
  901         size_t sz;
  902 
  903         sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
  904             cpu_max_ext_state_size;
  905         res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
  906             M_NOWAIT : M_WAITOK) | M_ZERO);
  907         return (res);
  908 }
  909 
  910 void
  911 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
  912 {
  913 
  914         /* XXXKIB clear the memory ? */
  915         free(ctx, M_FPUKERN_CTX);
  916 }
  917 
  918 static struct savefpu *
  919 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
  920 {
  921         vm_offset_t p;
  922 
  923         p = (vm_offset_t)&ctx->hwstate1;
  924         p = roundup2(p, XSAVE_AREA_ALIGN);
  925         return ((struct savefpu *)p);
  926 }
  927 
  928 int
  929 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
  930 {
  931         struct pcb *pcb;
  932 
  933         pcb = td->td_pcb;
  934         KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
  935             get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
  936         ctx->flags = 0;
  937         if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0)
  938                 ctx->flags |= FPU_KERN_CTX_FPUINITDONE;
  939         fpuexit(td);
  940         ctx->prev = pcb->pcb_save;
  941         pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
  942         set_pcb_flags(pcb, PCB_KERNFPU);
  943         clear_pcb_flags(pcb, PCB_FPUINITDONE);
  944         return (0);
  945 }
  946 
  947 int
  948 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
  949 {
  950         struct pcb *pcb;
  951 
  952         pcb = td->td_pcb;
  953         critical_enter();
  954         if (curthread == PCPU_GET(fpcurthread))
  955                 fpudrop();
  956         critical_exit();
  957         pcb->pcb_save = ctx->prev;
  958         if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
  959                 if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
  960                         set_pcb_flags(pcb, PCB_FPUINITDONE);
  961                         clear_pcb_flags(pcb, PCB_KERNFPU);
  962                 } else
  963                         clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU);
  964         } else {
  965                 if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0)
  966                         set_pcb_flags(pcb, PCB_FPUINITDONE);
  967                 else
  968                         clear_pcb_flags(pcb, PCB_FPUINITDONE);
  969                 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
  970         }
  971         return (0);
  972 }
  973 
  974 int
  975 fpu_kern_thread(u_int flags)
  976 {
  977         struct pcb *pcb;
  978 
  979         pcb = PCPU_GET(curpcb);
  980         KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
  981             ("Only kthread may use fpu_kern_thread"));
  982         KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb),
  983             ("mangled pcb_save"));
  984         KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
  985 
  986         set_pcb_flags(pcb, PCB_KERNFPU);
  987         return (0);
  988 }
  989 
  990 int
  991 is_fpu_kern_thread(u_int flags)
  992 {
  993 
  994         if ((curthread->td_pflags & TDP_KTHREAD) == 0)
  995                 return (0);
  996         return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNFPU) != 0);
  997 }

Cache object: c8961e6a636128d2a9048ea382492a54


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.