The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/skein/skein_block.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /***********************************************************************
    2 **
    3 ** Implementation of the Skein block functions.
    4 **
    5 ** Source code author: Doug Whiting, 2008.
    6 **
    7 ** This algorithm and source code is released to the public domain.
    8 **
    9 ** Compile-time switches:
   10 **
   11 **  SKEIN_USE_ASM  -- set bits (256/512/1024) to select which
   12 **                    versions use ASM code for block processing
   13 **                    [default: use C for all block sizes]
   14 **
   15 ************************************************************************/
   16 
   17 #include <sys/cdefs.h>
   18 __FBSDID("$FreeBSD$");
   19 
   20 #include <sys/endian.h>
   21 #include <sys/types.h>
   22 
   23 #ifdef _KERNEL
   24 #include <sys/systm.h>
   25 #else
   26 #include <string.h>
   27 #endif
   28 
   29 #include "skein.h"
   30 
   31 #ifndef SKEIN_USE_ASM
   32 #define SKEIN_USE_ASM   (0)                     /* default is all C code (no ASM) */
   33 #endif
   34 
   35 #ifndef SKEIN_LOOP
   36 #define SKEIN_LOOP 001                          /* default: unroll 256 and 512, but not 1024 */
   37 #endif
   38 
   39 #define BLK_BITS        (WCNT*64)               /* some useful definitions for code here */
   40 #define KW_TWK_BASE     (0)
   41 #define KW_KEY_BASE     (3)
   42 #define ks              (kw + KW_KEY_BASE)                
   43 #define ts              (kw + KW_TWK_BASE)
   44 
   45 #ifdef SKEIN_DEBUG
   46 #define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
   47 #else
   48 #define DebugSaveTweak(ctx)
   49 #endif
   50 
   51 /*****************************************************************/
   52 /* functions to process blkCnt (nonzero) full block(s) of data. */
   53 void    Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
   54 void    Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
   55 void    Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
   56 
   57 /*****************************  Skein_256 ******************************/
   58 #if !(SKEIN_USE_ASM & 256)
   59 void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
   60     { /* do it in C */
   61     enum
   62         {
   63         WCNT = SKEIN_256_STATE_WORDS
   64         };
   65 #undef  RCNT
   66 #define RCNT  (SKEIN_256_ROUNDS_TOTAL/8)
   67 
   68 #ifdef  SKEIN_LOOP                              /* configure how much to unroll the loop */
   69 #define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10)
   70 #else
   71 #define SKEIN_UNROLL_256 (0)
   72 #endif
   73 
   74 #if SKEIN_UNROLL_256
   75 #if (RCNT % SKEIN_UNROLL_256)
   76 #error "Invalid SKEIN_UNROLL_256"               /* sanity check on unroll count */
   77 #endif
   78     size_t  r;
   79     u64b_t  kw[WCNT+4+RCNT*2];                  /* key schedule words : chaining vars + tweak + "rotation"*/
   80 #else
   81     u64b_t  kw[WCNT+4];                         /* key schedule words : chaining vars + tweak */
   82 #endif
   83     u64b_t  X0,X1,X2,X3;                        /* local copy of context vars, for speed */
   84     u64b_t  w [WCNT];                           /* local copy of input block */
   85 #ifdef SKEIN_DEBUG
   86     const u64b_t *Xptr[4];                      /* use for debugging (help compiler put Xn in registers) */
   87     Xptr[0] = &X0;  Xptr[1] = &X1;  Xptr[2] = &X2;  Xptr[3] = &X3;
   88 #endif
   89     Skein_assert(blkCnt != 0);                  /* never call with blkCnt == 0! */
   90     ts[0] = ctx->h.T[0];
   91     ts[1] = ctx->h.T[1];
   92     do  {
   93         /* this implementation only supports 2**64 input bytes (no carry out here) */
   94         ts[0] += byteCntAdd;                    /* update processed length */
   95 
   96         /* precompute the key schedule for this block */
   97         ks[0] = ctx->X[0];     
   98         ks[1] = ctx->X[1];
   99         ks[2] = ctx->X[2];
  100         ks[3] = ctx->X[3];
  101         ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
  102 
  103         ts[2] = ts[0] ^ ts[1];
  104 
  105         Skein_Get64_LSB_First(w,blkPtr,WCNT);   /* get input block in little-endian format */
  106         DebugSaveTweak(ctx);
  107         Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
  108 
  109         X0 = w[0] + ks[0];                      /* do the first full key injection */
  110         X1 = w[1] + ks[1] + ts[0];
  111         X2 = w[2] + ks[2] + ts[1];
  112         X3 = w[3] + ks[3];
  113 
  114         Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);    /* show starting state values */
  115 
  116         blkPtr += SKEIN_256_BLOCK_BYTES;
  117 
  118         /* run the rounds */
  119 
  120 #define Round256(p0,p1,p2,p3,ROT,rNum)                              \
  121     X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
  122     X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
  123 
  124 #if SKEIN_UNROLL_256 == 0                       
  125 #define R256(p0,p1,p2,p3,ROT,rNum)           /* fully unrolled */   \
  126     Round256(p0,p1,p2,p3,ROT,rNum)                                  \
  127     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
  128 
  129 #define I256(R)                                                     \
  130     X0   += ks[((R)+1) % 5];    /* inject the key schedule value */ \
  131     X1   += ks[((R)+2) % 5] + ts[((R)+1) % 3];                      \
  132     X2   += ks[((R)+3) % 5] + ts[((R)+2) % 3];                      \
  133     X3   += ks[((R)+4) % 5] +     (R)+1;                            \
  134     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
  135 #else                                       /* looping version */
  136 #define R256(p0,p1,p2,p3,ROT,rNum)                                  \
  137     Round256(p0,p1,p2,p3,ROT,rNum)                                  \
  138     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
  139 
  140 #define I256(R)                                                     \
  141     X0   += ks[r+(R)+0];        /* inject the key schedule value */ \
  142     X1   += ks[r+(R)+1] + ts[r+(R)+0];                              \
  143     X2   += ks[r+(R)+2] + ts[r+(R)+1];                              \
  144     X3   += ks[r+(R)+3] +    r+(R)   ;                              \
  145     ks[r + (R)+4    ]   = ks[r+(R)-1];     /* rotate key schedule */\
  146     ts[r + (R)+2    ]   = ts[r+(R)-1];                              \
  147     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
  148 
  149     for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256)  /* loop thru it */
  150 #endif  
  151         {    
  152 #define R256_8_rounds(R)                  \
  153         R256(0,1,2,3,R_256_0,8*(R) + 1);  \
  154         R256(0,3,2,1,R_256_1,8*(R) + 2);  \
  155         R256(0,1,2,3,R_256_2,8*(R) + 3);  \
  156         R256(0,3,2,1,R_256_3,8*(R) + 4);  \
  157         I256(2*(R));                      \
  158         R256(0,1,2,3,R_256_4,8*(R) + 5);  \
  159         R256(0,3,2,1,R_256_5,8*(R) + 6);  \
  160         R256(0,1,2,3,R_256_6,8*(R) + 7);  \
  161         R256(0,3,2,1,R_256_7,8*(R) + 8);  \
  162         I256(2*(R)+1);
  163 
  164         R256_8_rounds( 0);
  165 
  166 #define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN)))
  167 
  168   #if   R256_Unroll_R( 1)
  169         R256_8_rounds( 1);
  170   #endif
  171   #if   R256_Unroll_R( 2)
  172         R256_8_rounds( 2);
  173   #endif
  174   #if   R256_Unroll_R( 3)
  175         R256_8_rounds( 3);
  176   #endif
  177   #if   R256_Unroll_R( 4)
  178         R256_8_rounds( 4);
  179   #endif
  180   #if   R256_Unroll_R( 5)
  181         R256_8_rounds( 5);
  182   #endif
  183   #if   R256_Unroll_R( 6)
  184         R256_8_rounds( 6);
  185   #endif
  186   #if   R256_Unroll_R( 7)
  187         R256_8_rounds( 7);
  188   #endif
  189   #if   R256_Unroll_R( 8)
  190         R256_8_rounds( 8);
  191   #endif
  192   #if   R256_Unroll_R( 9)
  193         R256_8_rounds( 9);
  194   #endif
  195   #if   R256_Unroll_R(10)
  196         R256_8_rounds(10);
  197   #endif
  198   #if   R256_Unroll_R(11)
  199         R256_8_rounds(11);
  200   #endif
  201   #if   R256_Unroll_R(12)
  202         R256_8_rounds(12);
  203   #endif
  204   #if   R256_Unroll_R(13)
  205         R256_8_rounds(13);
  206   #endif
  207   #if   R256_Unroll_R(14)
  208         R256_8_rounds(14);
  209   #endif
  210   #if  (SKEIN_UNROLL_256 > 14)
  211 #error  "need more unrolling in Skein_256_Process_Block"
  212   #endif
  213         }
  214         /* do the final "feedforward" xor, update context chaining vars */
  215         ctx->X[0] = X0 ^ w[0];
  216         ctx->X[1] = X1 ^ w[1];
  217         ctx->X[2] = X2 ^ w[2];
  218         ctx->X[3] = X3 ^ w[3];
  219 
  220         Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
  221 
  222         ts[1] &= ~SKEIN_T1_FLAG_FIRST;
  223         }
  224     while (--blkCnt);
  225     ctx->h.T[0] = ts[0];
  226     ctx->h.T[1] = ts[1];
  227     }
  228 
  229 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
  230 size_t Skein_256_Process_Block_CodeSize(void)
  231     {
  232     return ((u08b_t *) Skein_256_Process_Block_CodeSize) -
  233            ((u08b_t *) Skein_256_Process_Block);
  234     }
  235 uint_t Skein_256_Unroll_Cnt(void)
  236     {
  237     return SKEIN_UNROLL_256;
  238     }
  239 #endif
  240 #endif
  241 
  242 /*****************************  Skein_512 ******************************/
  243 #if !(SKEIN_USE_ASM & 512)
  244 void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
  245     { /* do it in C */
  246     enum
  247         {
  248         WCNT = SKEIN_512_STATE_WORDS
  249         };
  250 #undef  RCNT
  251 #define RCNT  (SKEIN_512_ROUNDS_TOTAL/8)
  252 
  253 #ifdef  SKEIN_LOOP                              /* configure how much to unroll the loop */
  254 #define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
  255 #else
  256 #define SKEIN_UNROLL_512 (0)
  257 #endif
  258 
  259 #if SKEIN_UNROLL_512
  260 #if (RCNT % SKEIN_UNROLL_512)
  261 #error "Invalid SKEIN_UNROLL_512"               /* sanity check on unroll count */
  262 #endif
  263     size_t  r;
  264     u64b_t  kw[WCNT+4+RCNT*2];                  /* key schedule words : chaining vars + tweak + "rotation"*/
  265 #else
  266     u64b_t  kw[WCNT+4];                         /* key schedule words : chaining vars + tweak */
  267 #endif
  268     u64b_t  X0,X1,X2,X3,X4,X5,X6,X7;            /* local copy of vars, for speed */
  269     u64b_t  w [WCNT];                           /* local copy of input block */
  270 #ifdef SKEIN_DEBUG
  271     const u64b_t *Xptr[8];                      /* use for debugging (help compiler put Xn in registers) */
  272     Xptr[0] = &X0;  Xptr[1] = &X1;  Xptr[2] = &X2;  Xptr[3] = &X3;
  273     Xptr[4] = &X4;  Xptr[5] = &X5;  Xptr[6] = &X6;  Xptr[7] = &X7;
  274 #endif
  275 
  276     Skein_assert(blkCnt != 0);                  /* never call with blkCnt == 0! */
  277     ts[0] = ctx->h.T[0];
  278     ts[1] = ctx->h.T[1];
  279     do  {
  280         /* this implementation only supports 2**64 input bytes (no carry out here) */
  281         ts[0] += byteCntAdd;                    /* update processed length */
  282 
  283         /* precompute the key schedule for this block */
  284         ks[0] = ctx->X[0];
  285         ks[1] = ctx->X[1];
  286         ks[2] = ctx->X[2];
  287         ks[3] = ctx->X[3];
  288         ks[4] = ctx->X[4];
  289         ks[5] = ctx->X[5];
  290         ks[6] = ctx->X[6];
  291         ks[7] = ctx->X[7];
  292         ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ 
  293                 ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
  294 
  295         ts[2] = ts[0] ^ ts[1];
  296 
  297         Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
  298         DebugSaveTweak(ctx);
  299         Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
  300 
  301         X0   = w[0] + ks[0];                    /* do the first full key injection */
  302         X1   = w[1] + ks[1];
  303         X2   = w[2] + ks[2];
  304         X3   = w[3] + ks[3];
  305         X4   = w[4] + ks[4];
  306         X5   = w[5] + ks[5] + ts[0];
  307         X6   = w[6] + ks[6] + ts[1];
  308         X7   = w[7] + ks[7];
  309 
  310         blkPtr += SKEIN_512_BLOCK_BYTES;
  311 
  312         Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
  313         /* run the rounds */
  314 #define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                  \
  315     X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
  316     X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
  317     X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
  318     X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
  319 
  320 #if SKEIN_UNROLL_512 == 0                       
  321 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)      /* unrolled */  \
  322     Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
  323     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
  324 
  325 #define I512(R)                                                     \
  326     X0   += ks[((R)+1) % 9];   /* inject the key schedule value */  \
  327     X1   += ks[((R)+2) % 9];                                        \
  328     X2   += ks[((R)+3) % 9];                                        \
  329     X3   += ks[((R)+4) % 9];                                        \
  330     X4   += ks[((R)+5) % 9];                                        \
  331     X5   += ks[((R)+6) % 9] + ts[((R)+1) % 3];                      \
  332     X6   += ks[((R)+7) % 9] + ts[((R)+2) % 3];                      \
  333     X7   += ks[((R)+8) % 9] +     (R)+1;                            \
  334     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
  335 #else                                       /* looping version */
  336 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
  337     Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
  338     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
  339 
  340 #define I512(R)                                                     \
  341     X0   += ks[r+(R)+0];        /* inject the key schedule value */ \
  342     X1   += ks[r+(R)+1];                                            \
  343     X2   += ks[r+(R)+2];                                            \
  344     X3   += ks[r+(R)+3];                                            \
  345     X4   += ks[r+(R)+4];                                            \
  346     X5   += ks[r+(R)+5] + ts[r+(R)+0];                              \
  347     X6   += ks[r+(R)+6] + ts[r+(R)+1];                              \
  348     X7   += ks[r+(R)+7] +    r+(R)   ;                              \
  349     ks[r +       (R)+8] = ks[r+(R)-1];  /* rotate key schedule */   \
  350     ts[r +       (R)+2] = ts[r+(R)-1];                              \
  351     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
  352 
  353     for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512)   /* loop thru it */
  354 #endif                         /* end of looped code definitions */
  355         {
  356 #define R512_8_rounds(R)  /* do 8 full rounds */  \
  357         R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1);   \
  358         R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2);   \
  359         R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3);   \
  360         R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4);   \
  361         I512(2*(R));                              \
  362         R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5);   \
  363         R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6);   \
  364         R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7);   \
  365         R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8);   \
  366         I512(2*(R)+1);        /* and key injection */
  367 
  368         R512_8_rounds( 0);
  369 
  370 #define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
  371 
  372   #if   R512_Unroll_R( 1)
  373         R512_8_rounds( 1);
  374   #endif
  375   #if   R512_Unroll_R( 2)
  376         R512_8_rounds( 2);
  377   #endif
  378   #if   R512_Unroll_R( 3)
  379         R512_8_rounds( 3);
  380   #endif
  381   #if   R512_Unroll_R( 4)
  382         R512_8_rounds( 4);
  383   #endif
  384   #if   R512_Unroll_R( 5)
  385         R512_8_rounds( 5);
  386   #endif
  387   #if   R512_Unroll_R( 6)
  388         R512_8_rounds( 6);
  389   #endif
  390   #if   R512_Unroll_R( 7)
  391         R512_8_rounds( 7);
  392   #endif
  393   #if   R512_Unroll_R( 8)
  394         R512_8_rounds( 8);
  395   #endif
  396   #if   R512_Unroll_R( 9)
  397         R512_8_rounds( 9);
  398   #endif
  399   #if   R512_Unroll_R(10)
  400         R512_8_rounds(10);
  401   #endif
  402   #if   R512_Unroll_R(11)
  403         R512_8_rounds(11);
  404   #endif
  405   #if   R512_Unroll_R(12)
  406         R512_8_rounds(12);
  407   #endif
  408   #if   R512_Unroll_R(13)
  409         R512_8_rounds(13);
  410   #endif
  411   #if   R512_Unroll_R(14)
  412         R512_8_rounds(14);
  413   #endif
  414   #if  (SKEIN_UNROLL_512 > 14)
  415 #error  "need more unrolling in Skein_512_Process_Block"
  416   #endif
  417         }
  418 
  419         /* do the final "feedforward" xor, update context chaining vars */
  420         ctx->X[0] = X0 ^ w[0];
  421         ctx->X[1] = X1 ^ w[1];
  422         ctx->X[2] = X2 ^ w[2];
  423         ctx->X[3] = X3 ^ w[3];
  424         ctx->X[4] = X4 ^ w[4];
  425         ctx->X[5] = X5 ^ w[5];
  426         ctx->X[6] = X6 ^ w[6];
  427         ctx->X[7] = X7 ^ w[7];
  428         Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
  429 
  430         ts[1] &= ~SKEIN_T1_FLAG_FIRST;
  431         }
  432     while (--blkCnt);
  433     ctx->h.T[0] = ts[0];
  434     ctx->h.T[1] = ts[1];
  435     }
  436 
  437 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
  438 size_t Skein_512_Process_Block_CodeSize(void)
  439     {
  440     return ((u08b_t *) Skein_512_Process_Block_CodeSize) -
  441            ((u08b_t *) Skein_512_Process_Block);
  442     }
  443 uint_t Skein_512_Unroll_Cnt(void)
  444     {
  445     return SKEIN_UNROLL_512;
  446     }
  447 #endif
  448 #endif
  449 
  450 /*****************************  Skein1024 ******************************/
  451 #if !(SKEIN_USE_ASM & 1024)
  452 void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
  453     { /* do it in C, always looping (unrolled is bigger AND slower!) */
  454     enum
  455         {
  456         WCNT = SKEIN1024_STATE_WORDS
  457         };
  458 #undef  RCNT
  459 #define RCNT  (SKEIN1024_ROUNDS_TOTAL/8)
  460 
  461 #ifdef  SKEIN_LOOP                              /* configure how much to unroll the loop */
  462 #define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
  463 #else
  464 #define SKEIN_UNROLL_1024 (0)
  465 #endif
  466 
  467 #if (SKEIN_UNROLL_1024 != 0)
  468 #if (RCNT % SKEIN_UNROLL_1024)
  469 #error "Invalid SKEIN_UNROLL_1024"              /* sanity check on unroll count */
  470 #endif
  471     size_t  r;
  472     u64b_t  kw[WCNT+4+RCNT*2];                  /* key schedule words : chaining vars + tweak + "rotation"*/
  473 #else
  474     u64b_t  kw[WCNT+4];                         /* key schedule words : chaining vars + tweak */
  475 #endif
  476 
  477     u64b_t  X00,X01,X02,X03,X04,X05,X06,X07,    /* local copy of vars, for speed */
  478             X08,X09,X10,X11,X12,X13,X14,X15;
  479     u64b_t  w [WCNT];                           /* local copy of input block */
  480 #ifdef SKEIN_DEBUG
  481     const u64b_t *Xptr[16];                     /* use for debugging (help compiler put Xn in registers) */
  482     Xptr[ 0] = &X00;  Xptr[ 1] = &X01;  Xptr[ 2] = &X02;  Xptr[ 3] = &X03;
  483     Xptr[ 4] = &X04;  Xptr[ 5] = &X05;  Xptr[ 6] = &X06;  Xptr[ 7] = &X07;
  484     Xptr[ 8] = &X08;  Xptr[ 9] = &X09;  Xptr[10] = &X10;  Xptr[11] = &X11;
  485     Xptr[12] = &X12;  Xptr[13] = &X13;  Xptr[14] = &X14;  Xptr[15] = &X15;
  486 #endif
  487 
  488     Skein_assert(blkCnt != 0);                  /* never call with blkCnt == 0! */
  489     ts[0] = ctx->h.T[0];
  490     ts[1] = ctx->h.T[1];
  491     do  {
  492         /* this implementation only supports 2**64 input bytes (no carry out here) */
  493         ts[0] += byteCntAdd;                    /* update processed length */
  494 
  495         /* precompute the key schedule for this block */
  496         ks[ 0] = ctx->X[ 0];
  497         ks[ 1] = ctx->X[ 1];
  498         ks[ 2] = ctx->X[ 2];
  499         ks[ 3] = ctx->X[ 3];
  500         ks[ 4] = ctx->X[ 4];
  501         ks[ 5] = ctx->X[ 5];
  502         ks[ 6] = ctx->X[ 6];
  503         ks[ 7] = ctx->X[ 7];
  504         ks[ 8] = ctx->X[ 8];
  505         ks[ 9] = ctx->X[ 9];
  506         ks[10] = ctx->X[10];
  507         ks[11] = ctx->X[11];
  508         ks[12] = ctx->X[12];
  509         ks[13] = ctx->X[13];
  510         ks[14] = ctx->X[14];
  511         ks[15] = ctx->X[15];
  512         ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^
  513                  ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^
  514                  ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^
  515                  ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
  516 
  517         ts[2]  = ts[0] ^ ts[1];
  518 
  519         Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
  520         DebugSaveTweak(ctx);
  521         Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
  522 
  523         X00    = w[ 0] + ks[ 0];                 /* do the first full key injection */
  524         X01    = w[ 1] + ks[ 1];
  525         X02    = w[ 2] + ks[ 2];
  526         X03    = w[ 3] + ks[ 3];
  527         X04    = w[ 4] + ks[ 4];
  528         X05    = w[ 5] + ks[ 5];
  529         X06    = w[ 6] + ks[ 6];
  530         X07    = w[ 7] + ks[ 7];
  531         X08    = w[ 8] + ks[ 8];
  532         X09    = w[ 9] + ks[ 9];
  533         X10    = w[10] + ks[10];
  534         X11    = w[11] + ks[11];
  535         X12    = w[12] + ks[12];
  536         X13    = w[13] + ks[13] + ts[0];
  537         X14    = w[14] + ks[14] + ts[1];
  538         X15    = w[15] + ks[15];
  539 
  540         Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
  541 
  542 #define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \
  543     X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0;   \
  544     X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2;   \
  545     X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4;   \
  546     X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6;   \
  547     X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8;   \
  548     X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA;   \
  549     X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC;   \
  550     X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE;   \
  551 
  552 #if SKEIN_UNROLL_1024 == 0                      
  553 #define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
  554     Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
  555     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr);
  556 
  557 #define I1024(R)                                                      \
  558     X00   += ks[((R)+ 1) % 17]; /* inject the key schedule value */   \
  559     X01   += ks[((R)+ 2) % 17];                                       \
  560     X02   += ks[((R)+ 3) % 17];                                       \
  561     X03   += ks[((R)+ 4) % 17];                                       \
  562     X04   += ks[((R)+ 5) % 17];                                       \
  563     X05   += ks[((R)+ 6) % 17];                                       \
  564     X06   += ks[((R)+ 7) % 17];                                       \
  565     X07   += ks[((R)+ 8) % 17];                                       \
  566     X08   += ks[((R)+ 9) % 17];                                       \
  567     X09   += ks[((R)+10) % 17];                                       \
  568     X10   += ks[((R)+11) % 17];                                       \
  569     X11   += ks[((R)+12) % 17];                                       \
  570     X12   += ks[((R)+13) % 17];                                       \
  571     X13   += ks[((R)+14) % 17] + ts[((R)+1) % 3];                     \
  572     X14   += ks[((R)+15) % 17] + ts[((R)+2) % 3];                     \
  573     X15   += ks[((R)+16) % 17] +     (R)+1;                           \
  574     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); 
  575 #else                                       /* looping version */
  576 #define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
  577     Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
  578     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr);
  579 
  580 #define I1024(R)                                                      \
  581     X00   += ks[r+(R)+ 0];    /* inject the key schedule value */     \
  582     X01   += ks[r+(R)+ 1];                                            \
  583     X02   += ks[r+(R)+ 2];                                            \
  584     X03   += ks[r+(R)+ 3];                                            \
  585     X04   += ks[r+(R)+ 4];                                            \
  586     X05   += ks[r+(R)+ 5];                                            \
  587     X06   += ks[r+(R)+ 6];                                            \
  588     X07   += ks[r+(R)+ 7];                                            \
  589     X08   += ks[r+(R)+ 8];                                            \
  590     X09   += ks[r+(R)+ 9];                                            \
  591     X10   += ks[r+(R)+10];                                            \
  592     X11   += ks[r+(R)+11];                                            \
  593     X12   += ks[r+(R)+12];                                            \
  594     X13   += ks[r+(R)+13] + ts[r+(R)+0];                              \
  595     X14   += ks[r+(R)+14] + ts[r+(R)+1];                              \
  596     X15   += ks[r+(R)+15] +    r+(R)   ;                              \
  597     ks[r  +       (R)+16] = ks[r+(R)-1];  /* rotate key schedule */   \
  598     ts[r  +       (R)+ 2] = ts[r+(R)-1];                              \
  599     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
  600 
  601     for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024)    /* loop thru it */
  602 #endif  
  603         {
  604 #define R1024_8_rounds(R)    /* do 8 full rounds */                               \
  605         R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \
  606         R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \
  607         R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \
  608         R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \
  609         I1024(2*(R));                                                             \
  610         R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \
  611         R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \
  612         R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \
  613         R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \
  614         I1024(2*(R)+1);
  615 
  616         R1024_8_rounds( 0);
  617 
  618 #define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN)))
  619 
  620   #if   R1024_Unroll_R( 1)
  621         R1024_8_rounds( 1);
  622   #endif
  623   #if   R1024_Unroll_R( 2)
  624         R1024_8_rounds( 2);
  625   #endif
  626   #if   R1024_Unroll_R( 3)
  627         R1024_8_rounds( 3);
  628   #endif
  629   #if   R1024_Unroll_R( 4)
  630         R1024_8_rounds( 4);
  631   #endif
  632   #if   R1024_Unroll_R( 5)
  633         R1024_8_rounds( 5);
  634   #endif
  635   #if   R1024_Unroll_R( 6)
  636         R1024_8_rounds( 6);
  637   #endif
  638   #if   R1024_Unroll_R( 7)
  639         R1024_8_rounds( 7);
  640   #endif
  641   #if   R1024_Unroll_R( 8)
  642         R1024_8_rounds( 8);
  643   #endif
  644   #if   R1024_Unroll_R( 9)
  645         R1024_8_rounds( 9);
  646   #endif
  647   #if   R1024_Unroll_R(10)
  648         R1024_8_rounds(10);
  649   #endif
  650   #if   R1024_Unroll_R(11)
  651         R1024_8_rounds(11);
  652   #endif
  653   #if   R1024_Unroll_R(12)
  654         R1024_8_rounds(12);
  655   #endif
  656   #if   R1024_Unroll_R(13)
  657         R1024_8_rounds(13);
  658   #endif
  659   #if   R1024_Unroll_R(14)
  660         R1024_8_rounds(14);
  661   #endif
  662   #if  (SKEIN_UNROLL_1024 > 14)
  663 #error  "need more unrolling in Skein_1024_Process_Block"
  664   #endif
  665         }
  666         /* do the final "feedforward" xor, update context chaining vars */
  667 
  668         ctx->X[ 0] = X00 ^ w[ 0];
  669         ctx->X[ 1] = X01 ^ w[ 1];
  670         ctx->X[ 2] = X02 ^ w[ 2];
  671         ctx->X[ 3] = X03 ^ w[ 3];
  672         ctx->X[ 4] = X04 ^ w[ 4];
  673         ctx->X[ 5] = X05 ^ w[ 5];
  674         ctx->X[ 6] = X06 ^ w[ 6];
  675         ctx->X[ 7] = X07 ^ w[ 7];
  676         ctx->X[ 8] = X08 ^ w[ 8];
  677         ctx->X[ 9] = X09 ^ w[ 9];
  678         ctx->X[10] = X10 ^ w[10];
  679         ctx->X[11] = X11 ^ w[11];
  680         ctx->X[12] = X12 ^ w[12];
  681         ctx->X[13] = X13 ^ w[13];
  682         ctx->X[14] = X14 ^ w[14];
  683         ctx->X[15] = X15 ^ w[15];
  684 
  685         Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
  686         
  687         ts[1] &= ~SKEIN_T1_FLAG_FIRST;
  688         blkPtr += SKEIN1024_BLOCK_BYTES;
  689         }
  690     while (--blkCnt);
  691     ctx->h.T[0] = ts[0];
  692     ctx->h.T[1] = ts[1];
  693     }
  694 
  695 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
  696 size_t Skein1024_Process_Block_CodeSize(void)
  697     {
  698     return ((u08b_t *) Skein1024_Process_Block_CodeSize) -
  699            ((u08b_t *) Skein1024_Process_Block);
  700     }
  701 uint_t Skein1024_Unroll_Cnt(void)
  702     {
  703     return SKEIN_UNROLL_1024;
  704     }
  705 #endif
  706 #endif

Cache object: 26127553ee2d25ffcd51afecd5fdc56b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.