The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/arch/i386/math-emu/poly_sin.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*---------------------------------------------------------------------------+
    2  |  poly_sin.c                                                               |
    3  |                                                                           |
    4  |  Computation of an approximation of the sin function and the cosine       |
    5  |  function by a polynomial.                                                |
    6  |                                                                           |
    7  | Copyright (C) 1992,1993,1994,1997,1999                                    |
    8  |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
    9  |                  E-mail   billm@melbpc.org.au                             |
   10  |                                                                           |
   11  |                                                                           |
   12  +---------------------------------------------------------------------------*/
   13 
   14 
   15 #include "exception.h"
   16 #include "reg_constant.h"
   17 #include "fpu_emu.h"
   18 #include "fpu_system.h"
   19 #include "control_w.h"
   20 #include "poly.h"
   21 
   22 
   23 #define N_COEFF_P       4
   24 #define N_COEFF_N       4
   25 
   26 static const unsigned long long pos_terms_l[N_COEFF_P] =
   27 {
   28   0xaaaaaaaaaaaaaaabLL,
   29   0x00d00d00d00cf906LL,
   30   0x000006b99159a8bbLL,
   31   0x000000000d7392e6LL
   32 };
   33 
   34 static const unsigned long long neg_terms_l[N_COEFF_N] =
   35 {
   36   0x2222222222222167LL,
   37   0x0002e3bc74aab624LL,
   38   0x0000000b09229062LL,
   39   0x00000000000c7973LL
   40 };
   41 
   42 
   43 
   44 #define N_COEFF_PH      4
   45 #define N_COEFF_NH      4
   46 static const unsigned long long pos_terms_h[N_COEFF_PH] =
   47 {
   48   0x0000000000000000LL,
   49   0x05b05b05b05b0406LL,
   50   0x000049f93edd91a9LL,
   51   0x00000000c9c9ed62LL
   52 };
   53 
   54 static const unsigned long long neg_terms_h[N_COEFF_NH] =
   55 {
   56   0xaaaaaaaaaaaaaa98LL,
   57   0x001a01a01a019064LL,
   58   0x0000008f76c68a77LL,
   59   0x0000000000d58f5eLL
   60 };
   61 
   62 
   63 /*--- poly_sine() -----------------------------------------------------------+
   64  |                                                                           |
   65  +---------------------------------------------------------------------------*/
   66 void    poly_sine(FPU_REG *st0_ptr)
   67 {
   68   int                 exponent, echange;
   69   Xsig                accumulator, argSqrd, argTo4;
   70   unsigned long       fix_up, adj;
   71   unsigned long long  fixed_arg;
   72   FPU_REG             result;
   73 
   74   exponent = exponent(st0_ptr);
   75 
   76   accumulator.lsw = accumulator.midw = accumulator.msw = 0;
   77 
   78   /* Split into two ranges, for arguments below and above 1.0 */
   79   /* The boundary between upper and lower is approx 0.88309101259 */
   80   if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
   81     {
   82       /* The argument is <= 0.88309101259 */
   83 
   84       argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
   85       mul64_Xsig(&argSqrd, &significand(st0_ptr));
   86       shr_Xsig(&argSqrd, 2*(-1-exponent));
   87       argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
   88       argTo4.lsw = argSqrd.lsw;
   89       mul_Xsig_Xsig(&argTo4, &argTo4);
   90 
   91       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
   92                       N_COEFF_N-1);
   93       mul_Xsig_Xsig(&accumulator, &argSqrd);
   94       negate_Xsig(&accumulator);
   95 
   96       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
   97                       N_COEFF_P-1);
   98 
   99       shr_Xsig(&accumulator, 2);    /* Divide by four */
  100       accumulator.msw |= 0x80000000;  /* Add 1.0 */
  101 
  102       mul64_Xsig(&accumulator, &significand(st0_ptr));
  103       mul64_Xsig(&accumulator, &significand(st0_ptr));
  104       mul64_Xsig(&accumulator, &significand(st0_ptr));
  105 
  106       /* Divide by four, FPU_REG compatible, etc */
  107       exponent = 3*exponent;
  108 
  109       /* The minimum exponent difference is 3 */
  110       shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
  111 
  112       negate_Xsig(&accumulator);
  113       XSIG_LL(accumulator) += significand(st0_ptr);
  114 
  115       echange = round_Xsig(&accumulator);
  116 
  117       setexponentpos(&result, exponent(st0_ptr) + echange);
  118     }
  119   else
  120     {
  121       /* The argument is > 0.88309101259 */
  122       /* We use sin(st(0)) = cos(pi/2-st(0)) */
  123 
  124       fixed_arg = significand(st0_ptr);
  125 
  126       if ( exponent == 0 )
  127         {
  128           /* The argument is >= 1.0 */
  129 
  130           /* Put the binary point at the left. */
  131           fixed_arg <<= 1;
  132         }
  133       /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
  134       fixed_arg = 0x921fb54442d18469LL - fixed_arg;
  135       /* There is a special case which arises due to rounding, to fix here. */
  136       if ( fixed_arg == 0xffffffffffffffffLL )
  137         fixed_arg = 0;
  138 
  139       XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
  140       mul64_Xsig(&argSqrd, &fixed_arg);
  141 
  142       XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
  143       mul_Xsig_Xsig(&argTo4, &argTo4);
  144 
  145       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
  146                       N_COEFF_NH-1);
  147       mul_Xsig_Xsig(&accumulator, &argSqrd);
  148       negate_Xsig(&accumulator);
  149 
  150       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
  151                       N_COEFF_PH-1);
  152       negate_Xsig(&accumulator);
  153 
  154       mul64_Xsig(&accumulator, &fixed_arg);
  155       mul64_Xsig(&accumulator, &fixed_arg);
  156 
  157       shr_Xsig(&accumulator, 3);
  158       negate_Xsig(&accumulator);
  159 
  160       add_Xsig_Xsig(&accumulator, &argSqrd);
  161 
  162       shr_Xsig(&accumulator, 1);
  163 
  164       accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
  165       negate_Xsig(&accumulator);
  166 
  167       /* The basic computation is complete. Now fix the answer to
  168          compensate for the error due to the approximation used for
  169          pi/2
  170          */
  171 
  172       /* This has an exponent of -65 */
  173       fix_up = 0x898cc517;
  174       /* The fix-up needs to be improved for larger args */
  175       if ( argSqrd.msw & 0xffc00000 )
  176         {
  177           /* Get about 32 bit precision in these: */
  178           fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
  179         }
  180       fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
  181 
  182       adj = accumulator.lsw;    /* temp save */
  183       accumulator.lsw -= fix_up;
  184       if ( accumulator.lsw > adj )
  185         XSIG_LL(accumulator) --;
  186 
  187       echange = round_Xsig(&accumulator);
  188 
  189       setexponentpos(&result, echange - 1);
  190     }
  191 
  192   significand(&result) = XSIG_LL(accumulator);
  193   setsign(&result, getsign(st0_ptr));
  194   FPU_copy_to_reg0(&result, TAG_Valid);
  195 
  196 #ifdef PARANOID
  197   if ( (exponent(&result) >= 0)
  198       && (significand(&result) > 0x8000000000000000LL) )
  199     {
  200       EXCEPTION(EX_INTERNAL|0x150);
  201     }
  202 #endif /* PARANOID */
  203 
  204 }
  205 
  206 
  207 
  208 /*--- poly_cos() ------------------------------------------------------------+
  209  |                                                                           |
  210  +---------------------------------------------------------------------------*/
  211 void    poly_cos(FPU_REG *st0_ptr)
  212 {
  213   FPU_REG             result;
  214   long int            exponent, exp2, echange;
  215   Xsig                accumulator, argSqrd, fix_up, argTo4;
  216   unsigned long long  fixed_arg;
  217 
  218 #ifdef PARANOID
  219   if ( (exponent(st0_ptr) > 0)
  220       || ((exponent(st0_ptr) == 0)
  221           && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
  222     {
  223       EXCEPTION(EX_Invalid);
  224       FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
  225       return;
  226     }
  227 #endif /* PARANOID */
  228 
  229   exponent = exponent(st0_ptr);
  230 
  231   accumulator.lsw = accumulator.midw = accumulator.msw = 0;
  232 
  233   if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
  234     {
  235       /* arg is < 0.687705 */
  236 
  237       argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
  238       argSqrd.lsw = 0;
  239       mul64_Xsig(&argSqrd, &significand(st0_ptr));
  240 
  241       if ( exponent < -1 )
  242         {
  243           /* shift the argument right by the required places */
  244           shr_Xsig(&argSqrd, 2*(-1-exponent));
  245         }
  246 
  247       argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
  248       argTo4.lsw = argSqrd.lsw;
  249       mul_Xsig_Xsig(&argTo4, &argTo4);
  250 
  251       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
  252                       N_COEFF_NH-1);
  253       mul_Xsig_Xsig(&accumulator, &argSqrd);
  254       negate_Xsig(&accumulator);
  255 
  256       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
  257                       N_COEFF_PH-1);
  258       negate_Xsig(&accumulator);
  259 
  260       mul64_Xsig(&accumulator, &significand(st0_ptr));
  261       mul64_Xsig(&accumulator, &significand(st0_ptr));
  262       shr_Xsig(&accumulator, -2*(1+exponent));
  263 
  264       shr_Xsig(&accumulator, 3);
  265       negate_Xsig(&accumulator);
  266 
  267       add_Xsig_Xsig(&accumulator, &argSqrd);
  268 
  269       shr_Xsig(&accumulator, 1);
  270 
  271       /* It doesn't matter if accumulator is all zero here, the
  272          following code will work ok */
  273       negate_Xsig(&accumulator);
  274 
  275       if ( accumulator.lsw & 0x80000000 )
  276         XSIG_LL(accumulator) ++;
  277       if ( accumulator.msw == 0 )
  278         {
  279           /* The result is 1.0 */
  280           FPU_copy_to_reg0(&CONST_1, TAG_Valid);
  281           return;
  282         }
  283       else
  284         {
  285           significand(&result) = XSIG_LL(accumulator);
  286       
  287           /* will be a valid positive nr with expon = -1 */
  288           setexponentpos(&result, -1);
  289         }
  290     }
  291   else
  292     {
  293       fixed_arg = significand(st0_ptr);
  294 
  295       if ( exponent == 0 )
  296         {
  297           /* The argument is >= 1.0 */
  298 
  299           /* Put the binary point at the left. */
  300           fixed_arg <<= 1;
  301         }
  302       /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
  303       fixed_arg = 0x921fb54442d18469LL - fixed_arg;
  304       /* There is a special case which arises due to rounding, to fix here. */
  305       if ( fixed_arg == 0xffffffffffffffffLL )
  306         fixed_arg = 0;
  307 
  308       exponent = -1;
  309       exp2 = -1;
  310 
  311       /* A shift is needed here only for a narrow range of arguments,
  312          i.e. for fixed_arg approx 2^-32, but we pick up more... */
  313       if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
  314         {
  315           fixed_arg <<= 16;
  316           exponent -= 16;
  317           exp2 -= 16;
  318         }
  319 
  320       XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
  321       mul64_Xsig(&argSqrd, &fixed_arg);
  322 
  323       if ( exponent < -1 )
  324         {
  325           /* shift the argument right by the required places */
  326           shr_Xsig(&argSqrd, 2*(-1-exponent));
  327         }
  328 
  329       argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
  330       argTo4.lsw = argSqrd.lsw;
  331       mul_Xsig_Xsig(&argTo4, &argTo4);
  332 
  333       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
  334                       N_COEFF_N-1);
  335       mul_Xsig_Xsig(&accumulator, &argSqrd);
  336       negate_Xsig(&accumulator);
  337 
  338       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
  339                       N_COEFF_P-1);
  340 
  341       shr_Xsig(&accumulator, 2);    /* Divide by four */
  342       accumulator.msw |= 0x80000000;  /* Add 1.0 */
  343 
  344       mul64_Xsig(&accumulator, &fixed_arg);
  345       mul64_Xsig(&accumulator, &fixed_arg);
  346       mul64_Xsig(&accumulator, &fixed_arg);
  347 
  348       /* Divide by four, FPU_REG compatible, etc */
  349       exponent = 3*exponent;
  350 
  351       /* The minimum exponent difference is 3 */
  352       shr_Xsig(&accumulator, exp2 - exponent);
  353 
  354       negate_Xsig(&accumulator);
  355       XSIG_LL(accumulator) += fixed_arg;
  356 
  357       /* The basic computation is complete. Now fix the answer to
  358          compensate for the error due to the approximation used for
  359          pi/2
  360          */
  361 
  362       /* This has an exponent of -65 */
  363       XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
  364       fix_up.lsw = 0;
  365 
  366       /* The fix-up needs to be improved for larger args */
  367       if ( argSqrd.msw & 0xffc00000 )
  368         {
  369           /* Get about 32 bit precision in these: */
  370           fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
  371           fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
  372         }
  373 
  374       exp2 += norm_Xsig(&accumulator);
  375       shr_Xsig(&accumulator, 1); /* Prevent overflow */
  376       exp2++;
  377       shr_Xsig(&fix_up, 65 + exp2);
  378 
  379       add_Xsig_Xsig(&accumulator, &fix_up);
  380 
  381       echange = round_Xsig(&accumulator);
  382 
  383       setexponentpos(&result, exp2 + echange);
  384       significand(&result) = XSIG_LL(accumulator);
  385     }
  386 
  387   FPU_copy_to_reg0(&result, TAG_Valid);
  388 
  389 #ifdef PARANOID
  390   if ( (exponent(&result) >= 0)
  391       && (significand(&result) > 0x8000000000000000LL) )
  392     {
  393       EXCEPTION(EX_INTERNAL|0x151);
  394     }
  395 #endif /* PARANOID */
  396 
  397 }

Cache object: bcb24bb77e3efefeaeabf4ccf6212492


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.