The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/icp/algs/skein/skein_block.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Implementation of the Skein block functions.
    3  * Source code author: Doug Whiting, 2008.
    4  * This algorithm and source code is released to the public domain.
    5  * Compile-time switches:
    6  *  SKEIN_USE_ASM  -- set bits (256/512/1024) to select which
    7  *                    versions use ASM code for block processing
    8  *                    [default: use C for all block sizes]
    9  */
   10 /* Copyright 2013 Doug Whiting. This code is released to the public domain. */
   11 
   12 #include <sys/skein.h>
   13 #include "skein_impl.h"
   14 #include <sys/isa_defs.h>       /* for _ILP32 */
   15 
   16 #ifndef SKEIN_USE_ASM
   17 #define SKEIN_USE_ASM   (0)     /* default is all C code (no ASM) */
   18 #endif
   19 
   20 #ifndef SKEIN_LOOP
   21 /*
   22  * The low-level checksum routines use a lot of stack space. On systems where
   23  * small stacks frame are enforced (like 32-bit kernel builds), do not unroll
   24  * checksum calculations to save stack space.
   25  *
   26  * Even with no loops unrolled, we still can exceed the 1k stack frame limit
   27  * in Skein1024_Process_Block() (it hits 1272 bytes on ARM32).  We can
   28  * safely ignore it though, since that the checksum functions will be called
   29  * from a worker thread that won't be using much stack.  That's why we have
   30  * the #pragma here to ignore the warning.
   31  */
   32 #if defined(_ILP32) || defined(__powerpc)       /* Assume small stack */
   33 #if defined(__GNUC__) && !defined(__clang__)
   34 #pragma GCC diagnostic ignored "-Wframe-larger-than="
   35 #endif
   36 /*
   37  * We're running on 32-bit, don't unroll loops to save stack frame space
   38  *
   39  * Due to the ways the calculations on SKEIN_LOOP are done in
   40  * Skein_*_Process_Block(), a value of 111 disables unrolling loops
   41  * in any of those functions.
   42  */
   43 #define SKEIN_LOOP 111
   44 #else
   45 /* We're compiling with large stacks */
   46 #define SKEIN_LOOP 001          /* default: unroll 256 and 512, but not 1024 */
   47 #endif
   48 #endif
   49 
   50 /* some useful definitions for code here */
   51 #define BLK_BITS        (WCNT*64)
   52 #define KW_TWK_BASE     (0)
   53 #define KW_KEY_BASE     (3)
   54 #define ks              (kw + KW_KEY_BASE)
   55 #define ts              (kw + KW_TWK_BASE)
   56 
   57 /* no debugging in Illumos version */
   58 #define DebugSaveTweak(ctx)
   59 
   60 /* Skein_256 */
   61 #if     !(SKEIN_USE_ASM & 256)
   62 void
   63 Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
   64     size_t blkCnt, size_t byteCntAdd)
   65 {
   66         enum {
   67                 WCNT = SKEIN_256_STATE_WORDS
   68         };
   69 #undef  RCNT
   70 #define RCNT  (SKEIN_256_ROUNDS_TOTAL / 8)
   71 
   72 #ifdef  SKEIN_LOOP              /* configure how much to unroll the loop */
   73 #define SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10)
   74 #else
   75 #define SKEIN_UNROLL_256 (0)
   76 #endif
   77 
   78 #if     SKEIN_UNROLL_256
   79 #if     (RCNT % SKEIN_UNROLL_256)
   80 #error "Invalid SKEIN_UNROLL_256"       /* sanity check on unroll count */
   81 #endif
   82         size_t r;
   83         /* key schedule words : chaining vars + tweak + "rotation" */
   84         uint64_t kw[WCNT + 4 + RCNT * 2];
   85 #else
   86         uint64_t kw[WCNT + 4];  /* key schedule words : chaining vars + tweak */
   87 #endif
   88         /* local copy of context vars, for speed */
   89         uint64_t X0, X1, X2, X3;
   90         uint64_t w[WCNT];               /* local copy of input block */
   91 #ifdef  SKEIN_DEBUG
   92         /* use for debugging (help compiler put Xn in registers) */
   93         const uint64_t *Xptr[4];
   94         Xptr[0] = &X0;
   95         Xptr[1] = &X1;
   96         Xptr[2] = &X2;
   97         Xptr[3] = &X3;
   98 #endif
   99         Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
  100         ts[0] = ctx->h.T[0];
  101         ts[1] = ctx->h.T[1];
  102         do {
  103                 /*
  104                  * this implementation only supports 2**64 input bytes
  105                  * (no carry out here)
  106                  */
  107                 ts[0] += byteCntAdd;    /* update processed length */
  108 
  109                 /* precompute the key schedule for this block */
  110                 ks[0] = ctx->X[0];
  111                 ks[1] = ctx->X[1];
  112                 ks[2] = ctx->X[2];
  113                 ks[3] = ctx->X[3];
  114                 ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
  115 
  116                 ts[2] = ts[0] ^ ts[1];
  117 
  118                 /* get input block in little-endian format */
  119                 Skein_Get64_LSB_First(w, blkPtr, WCNT);
  120                 DebugSaveTweak(ctx);
  121                 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
  122 
  123                 X0 = w[0] + ks[0];      /* do the first full key injection */
  124                 X1 = w[1] + ks[1] + ts[0];
  125                 X2 = w[2] + ks[2] + ts[1];
  126                 X3 = w[3] + ks[3];
  127 
  128                 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
  129                     Xptr);      /* show starting state values */
  130 
  131                 blkPtr += SKEIN_256_BLOCK_BYTES;
  132 
  133                 /* run the rounds */
  134 
  135 #define Round256(p0, p1, p2, p3, ROT, rNum)                          \
  136         X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
  137         X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
  138 
  139 #if     SKEIN_UNROLL_256 == 0
  140 #define R256(p0, p1, p2, p3, ROT, rNum)         /* fully unrolled */    \
  141         Round256(p0, p1, p2, p3, ROT, rNum)             \
  142         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
  143 
  144 #define I256(R)                                                         \
  145         X0 += ks[((R) + 1) % 5]; /* inject the key schedule value */ \
  146         X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3];                    \
  147         X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3];                    \
  148         X3 += ks[((R) + 4) % 5] + (R) + 1;                      \
  149         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
  150 #else                           /* looping version */
  151 #define R256(p0, p1, p2, p3, ROT, rNum)                             \
  152         Round256(p0, p1, p2, p3, ROT, rNum)                             \
  153         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
  154 
  155 #define I256(R)                                                         \
  156         X0 += ks[r + (R) + 0];  /* inject the key schedule value */     \
  157         X1 += ks[r + (R) + 1] + ts[r + (R) + 0];                        \
  158         X2 += ks[r + (R) + 2] + ts[r + (R) + 1];                        \
  159         X3 += ks[r + (R) + 3] + r + (R);                                \
  160         ks[r + (R) + 4] = ks[r + (R) - 1];   /* rotate key schedule */  \
  161         ts[r + (R) + 2] = ts[r + (R) - 1];                      \
  162         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
  163 
  164                 /* loop through it */
  165                 for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256)
  166 #endif
  167                 {
  168 #define R256_8_rounds(R)                         \
  169         R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1);  \
  170         R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2);  \
  171         R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3);  \
  172         R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4);  \
  173         I256(2 * (R));                           \
  174         R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5);  \
  175         R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6);  \
  176         R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7);  \
  177         R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8);  \
  178         I256(2 * (R) + 1);
  179 
  180                         R256_8_rounds(0);
  181 
  182 #define R256_Unroll_R(NN) \
  183         ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \
  184         (SKEIN_UNROLL_256 > (NN)))
  185 
  186 #if     R256_Unroll_R(1)
  187                         R256_8_rounds(1);
  188 #endif
  189 #if     R256_Unroll_R(2)
  190                         R256_8_rounds(2);
  191 #endif
  192 #if     R256_Unroll_R(3)
  193                         R256_8_rounds(3);
  194 #endif
  195 #if     R256_Unroll_R(4)
  196                         R256_8_rounds(4);
  197 #endif
  198 #if     R256_Unroll_R(5)
  199                         R256_8_rounds(5);
  200 #endif
  201 #if     R256_Unroll_R(6)
  202                         R256_8_rounds(6);
  203 #endif
  204 #if     R256_Unroll_R(7)
  205                         R256_8_rounds(7);
  206 #endif
  207 #if     R256_Unroll_R(8)
  208                         R256_8_rounds(8);
  209 #endif
  210 #if     R256_Unroll_R(9)
  211                         R256_8_rounds(9);
  212 #endif
  213 #if     R256_Unroll_R(10)
  214                         R256_8_rounds(10);
  215 #endif
  216 #if     R256_Unroll_R(11)
  217                         R256_8_rounds(11);
  218 #endif
  219 #if     R256_Unroll_R(12)
  220                         R256_8_rounds(12);
  221 #endif
  222 #if     R256_Unroll_R(13)
  223                         R256_8_rounds(13);
  224 #endif
  225 #if     R256_Unroll_R(14)
  226                         R256_8_rounds(14);
  227 #endif
  228 #if     (SKEIN_UNROLL_256 > 14)
  229 #error  "need more unrolling in Skein_256_Process_Block"
  230 #endif
  231                 }
  232                 /*
  233                  * do the final "feedforward" xor, update context chaining vars
  234                  */
  235                 ctx->X[0] = X0 ^ w[0];
  236                 ctx->X[1] = X1 ^ w[1];
  237                 ctx->X[2] = X2 ^ w[2];
  238                 ctx->X[3] = X3 ^ w[3];
  239 
  240                 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
  241 
  242                 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
  243         } while (--blkCnt);
  244         ctx->h.T[0] = ts[0];
  245         ctx->h.T[1] = ts[1];
  246 }
  247 
  248 #if     defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
  249 size_t
  250 Skein_256_Process_Block_CodeSize(void)
  251 {
  252         return ((uint8_t *)Skein_256_Process_Block_CodeSize) -
  253             ((uint8_t *)Skein_256_Process_Block);
  254 }
  255 
  256 uint_t
  257 Skein_256_Unroll_Cnt(void)
  258 {
  259         return (SKEIN_UNROLL_256);
  260 }
  261 #endif
  262 #endif
  263 
  264 /* Skein_512 */
  265 #if     !(SKEIN_USE_ASM & 512)
  266 void
  267 Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
  268     size_t blkCnt, size_t byteCntAdd)
  269 {
  270         enum {
  271                 WCNT = SKEIN_512_STATE_WORDS
  272         };
  273 #undef  RCNT
  274 #define RCNT  (SKEIN_512_ROUNDS_TOTAL / 8)
  275 
  276 #ifdef  SKEIN_LOOP              /* configure how much to unroll the loop */
  277 #define SKEIN_UNROLL_512 (((SKEIN_LOOP) / 10) % 10)
  278 #else
  279 #define SKEIN_UNROLL_512 (0)
  280 #endif
  281 
  282 #if     SKEIN_UNROLL_512
  283 #if     (RCNT % SKEIN_UNROLL_512)
  284 #error "Invalid SKEIN_UNROLL_512"       /* sanity check on unroll count */
  285 #endif
  286         size_t r;
  287         /* key schedule words : chaining vars + tweak + "rotation" */
  288         uint64_t kw[WCNT + 4 + RCNT * 2];
  289 #else
  290         uint64_t kw[WCNT + 4];  /* key schedule words : chaining vars + tweak */
  291 #endif
  292         /* local copy of vars, for speed */
  293         uint64_t X0, X1, X2, X3, X4, X5, X6, X7;
  294         uint64_t w[WCNT];               /* local copy of input block */
  295 #ifdef  SKEIN_DEBUG
  296         /* use for debugging (help compiler put Xn in registers) */
  297         const uint64_t *Xptr[8];
  298         Xptr[0] = &X0;
  299         Xptr[1] = &X1;
  300         Xptr[2] = &X2;
  301         Xptr[3] = &X3;
  302         Xptr[4] = &X4;
  303         Xptr[5] = &X5;
  304         Xptr[6] = &X6;
  305         Xptr[7] = &X7;
  306 #endif
  307 
  308         Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
  309         ts[0] = ctx->h.T[0];
  310         ts[1] = ctx->h.T[1];
  311         do {
  312                 /*
  313                  * this implementation only supports 2**64 input bytes
  314                  * (no carry out here)
  315                  */
  316                 ts[0] += byteCntAdd;    /* update processed length */
  317 
  318                 /* precompute the key schedule for this block */
  319                 ks[0] = ctx->X[0];
  320                 ks[1] = ctx->X[1];
  321                 ks[2] = ctx->X[2];
  322                 ks[3] = ctx->X[3];
  323                 ks[4] = ctx->X[4];
  324                 ks[5] = ctx->X[5];
  325                 ks[6] = ctx->X[6];
  326                 ks[7] = ctx->X[7];
  327                 ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
  328                     ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
  329 
  330                 ts[2] = ts[0] ^ ts[1];
  331 
  332                 /* get input block in little-endian format */
  333                 Skein_Get64_LSB_First(w, blkPtr, WCNT);
  334                 DebugSaveTweak(ctx);
  335                 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
  336 
  337                 X0 = w[0] + ks[0];      /* do the first full key injection */
  338                 X1 = w[1] + ks[1];
  339                 X2 = w[2] + ks[2];
  340                 X3 = w[3] + ks[3];
  341                 X4 = w[4] + ks[4];
  342                 X5 = w[5] + ks[5] + ts[0];
  343                 X6 = w[6] + ks[6] + ts[1];
  344                 X7 = w[7] + ks[7];
  345 
  346                 blkPtr += SKEIN_512_BLOCK_BYTES;
  347 
  348                 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
  349                     Xptr);
  350                 /* run the rounds */
  351 #define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)             \
  352         X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
  353         X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
  354         X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
  355         X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;
  356 
  357 #if     SKEIN_UNROLL_512 == 0
  358 #define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */  \
  359         Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)             \
  360         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
  361 
  362 #define I512(R)                                                         \
  363         X0 += ks[((R) + 1) % 9];        /* inject the key schedule value */\
  364         X1 += ks[((R) + 2) % 9];                                        \
  365         X2 += ks[((R) + 3) % 9];                                        \
  366         X3 += ks[((R) + 4) % 9];                                        \
  367         X4 += ks[((R) + 5) % 9];                                        \
  368         X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3];                    \
  369         X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3];                    \
  370         X7 += ks[((R) + 8) % 9] + (R) + 1;                              \
  371         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
  372 #else                           /* looping version */
  373 #define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)                 \
  374         Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)             \
  375         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
  376 
  377 #define I512(R)                                                         \
  378         X0 += ks[r + (R) + 0];  /* inject the key schedule value */     \
  379         X1 += ks[r + (R) + 1];                                          \
  380         X2 += ks[r + (R) + 2];                                          \
  381         X3 += ks[r + (R) + 3];                                          \
  382         X4 += ks[r + (R) + 4];                                          \
  383         X5 += ks[r + (R) + 5] + ts[r + (R) + 0];                        \
  384         X6 += ks[r + (R) + 6] + ts[r + (R) + 1];                        \
  385         X7 += ks[r + (R) + 7] + r + (R);                                \
  386         ks[r + (R)+8] = ks[r + (R) - 1];        /* rotate key schedule */\
  387         ts[r + (R)+2] = ts[r + (R) - 1];                                \
  388         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
  389 
  390                 /* loop through it */
  391                 for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512)
  392 #endif                          /* end of looped code definitions */
  393                 {
  394 #define R512_8_rounds(R)        /* do 8 full rounds */                  \
  395         R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1);             \
  396         R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2);             \
  397         R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3);             \
  398         R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4);             \
  399         I512(2 * (R));                                                  \
  400         R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5);             \
  401         R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6);             \
  402         R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7);             \
  403         R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8);             \
  404         I512(2*(R) + 1);                /* and key injection */
  405 
  406                         R512_8_rounds(0);
  407 
  408 #define R512_Unroll_R(NN) \
  409         ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL / 8 > (NN)) || \
  410         (SKEIN_UNROLL_512 > (NN)))
  411 
  412 #if     R512_Unroll_R(1)
  413                         R512_8_rounds(1);
  414 #endif
  415 #if     R512_Unroll_R(2)
  416                         R512_8_rounds(2);
  417 #endif
  418 #if     R512_Unroll_R(3)
  419                         R512_8_rounds(3);
  420 #endif
  421 #if     R512_Unroll_R(4)
  422                         R512_8_rounds(4);
  423 #endif
  424 #if     R512_Unroll_R(5)
  425                         R512_8_rounds(5);
  426 #endif
  427 #if     R512_Unroll_R(6)
  428                         R512_8_rounds(6);
  429 #endif
  430 #if     R512_Unroll_R(7)
  431                         R512_8_rounds(7);
  432 #endif
  433 #if     R512_Unroll_R(8)
  434                         R512_8_rounds(8);
  435 #endif
  436 #if     R512_Unroll_R(9)
  437                         R512_8_rounds(9);
  438 #endif
  439 #if     R512_Unroll_R(10)
  440                         R512_8_rounds(10);
  441 #endif
  442 #if     R512_Unroll_R(11)
  443                         R512_8_rounds(11);
  444 #endif
  445 #if     R512_Unroll_R(12)
  446                         R512_8_rounds(12);
  447 #endif
  448 #if     R512_Unroll_R(13)
  449                         R512_8_rounds(13);
  450 #endif
  451 #if     R512_Unroll_R(14)
  452                         R512_8_rounds(14);
  453 #endif
  454 #if     (SKEIN_UNROLL_512 > 14)
  455 #error "need more unrolling in Skein_512_Process_Block"
  456 #endif
  457                 }
  458 
  459                 /*
  460                  * do the final "feedforward" xor, update context chaining vars
  461                  */
  462                 ctx->X[0] = X0 ^ w[0];
  463                 ctx->X[1] = X1 ^ w[1];
  464                 ctx->X[2] = X2 ^ w[2];
  465                 ctx->X[3] = X3 ^ w[3];
  466                 ctx->X[4] = X4 ^ w[4];
  467                 ctx->X[5] = X5 ^ w[5];
  468                 ctx->X[6] = X6 ^ w[6];
  469                 ctx->X[7] = X7 ^ w[7];
  470                 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
  471 
  472                 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
  473         } while (--blkCnt);
  474         ctx->h.T[0] = ts[0];
  475         ctx->h.T[1] = ts[1];
  476 }
  477 
  478 #if     defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
  479 size_t
  480 Skein_512_Process_Block_CodeSize(void)
  481 {
  482         return ((uint8_t *)Skein_512_Process_Block_CodeSize) -
  483             ((uint8_t *)Skein_512_Process_Block);
  484 }
  485 
  486 uint_t
  487 Skein_512_Unroll_Cnt(void)
  488 {
  489         return (SKEIN_UNROLL_512);
  490 }
  491 #endif
  492 #endif
  493 
  494 /*  Skein1024 */
  495 #if     !(SKEIN_USE_ASM & 1024)
  496 void
  497 Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
  498     size_t blkCnt, size_t byteCntAdd)
  499 {
  500         /* do it in C, always looping (unrolled is bigger AND slower!) */
  501         enum {
  502                 WCNT = SKEIN1024_STATE_WORDS
  503         };
  504 #undef  RCNT
  505 #define RCNT  (SKEIN1024_ROUNDS_TOTAL/8)
  506 
  507 #ifdef  SKEIN_LOOP              /* configure how much to unroll the loop */
  508 #define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
  509 #else
  510 #define SKEIN_UNROLL_1024 (0)
  511 #endif
  512 
  513 #if     (SKEIN_UNROLL_1024 != 0)
  514 #if     (RCNT % SKEIN_UNROLL_1024)
  515 #error "Invalid SKEIN_UNROLL_1024"      /* sanity check on unroll count */
  516 #endif
  517         size_t r;
  518         /* key schedule words : chaining vars + tweak + "rotation" */
  519         uint64_t kw[WCNT + 4 + RCNT * 2];
  520 #else
  521         uint64_t kw[WCNT + 4];  /* key schedule words : chaining vars + tweak */
  522 #endif
  523 
  524         /* local copy of vars, for speed */
  525         uint64_t X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11,
  526             X12, X13, X14, X15;
  527         uint64_t w[WCNT];               /* local copy of input block */
  528 #ifdef  SKEIN_DEBUG
  529         /* use for debugging (help compiler put Xn in registers) */
  530         const uint64_t *Xptr[16];
  531         Xptr[0] = &X00;
  532         Xptr[1] = &X01;
  533         Xptr[2] = &X02;
  534         Xptr[3] = &X03;
  535         Xptr[4] = &X04;
  536         Xptr[5] = &X05;
  537         Xptr[6] = &X06;
  538         Xptr[7] = &X07;
  539         Xptr[8] = &X08;
  540         Xptr[9] = &X09;
  541         Xptr[10] = &X10;
  542         Xptr[11] = &X11;
  543         Xptr[12] = &X12;
  544         Xptr[13] = &X13;
  545         Xptr[14] = &X14;
  546         Xptr[15] = &X15;
  547 #endif
  548 
  549         Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
  550         ts[0] = ctx->h.T[0];
  551         ts[1] = ctx->h.T[1];
  552         do {
  553                 /*
  554                  * this implementation only supports 2**64 input bytes
  555                  * (no carry out here)
  556                  */
  557                 ts[0] += byteCntAdd;    /* update processed length */
  558 
  559                 /* precompute the key schedule for this block */
  560                 ks[0] = ctx->X[0];
  561                 ks[1] = ctx->X[1];
  562                 ks[2] = ctx->X[2];
  563                 ks[3] = ctx->X[3];
  564                 ks[4] = ctx->X[4];
  565                 ks[5] = ctx->X[5];
  566                 ks[6] = ctx->X[6];
  567                 ks[7] = ctx->X[7];
  568                 ks[8] = ctx->X[8];
  569                 ks[9] = ctx->X[9];
  570                 ks[10] = ctx->X[10];
  571                 ks[11] = ctx->X[11];
  572                 ks[12] = ctx->X[12];
  573                 ks[13] = ctx->X[13];
  574                 ks[14] = ctx->X[14];
  575                 ks[15] = ctx->X[15];
  576                 ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
  577                     ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
  578                     ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
  579                     ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
  580 
  581                 ts[2] = ts[0] ^ ts[1];
  582 
  583                 /* get input block in little-endian format */
  584                 Skein_Get64_LSB_First(w, blkPtr, WCNT);
  585                 DebugSaveTweak(ctx);
  586                 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
  587 
  588                 X00 = w[0] + ks[0];     /* do the first full key injection */
  589                 X01 = w[1] + ks[1];
  590                 X02 = w[2] + ks[2];
  591                 X03 = w[3] + ks[3];
  592                 X04 = w[4] + ks[4];
  593                 X05 = w[5] + ks[5];
  594                 X06 = w[6] + ks[6];
  595                 X07 = w[7] + ks[7];
  596                 X08 = w[8] + ks[8];
  597                 X09 = w[9] + ks[9];
  598                 X10 = w[10] + ks[10];
  599                 X11 = w[11] + ks[11];
  600                 X12 = w[12] + ks[12];
  601                 X13 = w[13] + ks[13] + ts[0];
  602                 X14 = w[14] + ks[14] + ts[1];
  603                 X15 = w[15] + ks[15];
  604 
  605                 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
  606                     Xptr);
  607 
  608 #define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,   \
  609         pD, pE, pF, ROT, rNum)                                          \
  610         X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
  611         X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
  612         X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
  613         X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;\
  614         X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8;\
  615         X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA;\
  616         X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC;\
  617         X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE;
  618 
  619 #if     SKEIN_UNROLL_1024 == 0
  620 #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD,   \
  621         pE, pF, ROT, rn)                                                \
  622         Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,   \
  623         pD, pE, pF, ROT, rn)                                            \
  624         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
  625 
  626 #define I1024(R)                                                        \
  627         X00 += ks[((R) + 1) % 17];      /* inject the key schedule value */\
  628         X01 += ks[((R) + 2) % 17];                                      \
  629         X02 += ks[((R) + 3) % 17];                                      \
  630         X03 += ks[((R) + 4) % 17];                                      \
  631         X04 += ks[((R) + 5) % 17];                                      \
  632         X05 += ks[((R) + 6) % 17];                                      \
  633         X06 += ks[((R) + 7) % 17];                                      \
  634         X07 += ks[((R) + 8) % 17];                                      \
  635         X08 += ks[((R) + 9) % 17];                                      \
  636         X09 += ks[((R) + 10) % 17];                                     \
  637         X10 += ks[((R) + 11) % 17];                                     \
  638         X11 += ks[((R) + 12) % 17];                                     \
  639         X12 += ks[((R) + 13) % 17];                                     \
  640         X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3];                 \
  641         X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3];                 \
  642         X15 += ks[((R) + 16) % 17] + (R) +1;                            \
  643         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
  644 #else                           /* looping version */
  645 #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD,   \
  646         pE, pF, ROT, rn)                                                \
  647         Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,   \
  648         pD, pE, pF, ROT, rn)                                            \
  649         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
  650 
  651 #define I1024(R)                                                        \
  652         X00 += ks[r + (R) + 0]; /* inject the key schedule value */     \
  653         X01 += ks[r + (R) + 1];                                         \
  654         X02 += ks[r + (R) + 2];                                         \
  655         X03 += ks[r + (R) + 3];                                         \
  656         X04 += ks[r + (R) + 4];                                         \
  657         X05 += ks[r + (R) + 5];                                         \
  658         X06 += ks[r + (R) + 6];                                         \
  659         X07 += ks[r + (R) + 7];                                         \
  660         X08 += ks[r + (R) + 8];                                         \
  661         X09 += ks[r + (R) + 9];                                         \
  662         X10 += ks[r + (R) + 10];                                        \
  663         X11 += ks[r + (R) + 11];                                        \
  664         X12 += ks[r + (R) + 12];                                        \
  665         X13 += ks[r + (R) + 13] + ts[r + (R) + 0];                      \
  666         X14 += ks[r + (R) + 14] + ts[r + (R) + 1];                      \
  667         X15 += ks[r + (R) + 15] +  r + (R);                             \
  668         ks[r + (R) + 16] = ks[r + (R) - 1];     /* rotate key schedule */\
  669         ts[r + (R) + 2] = ts[r + (R) - 1];                              \
  670         Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
  671 
  672                 /* loop through it */
  673                 for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024)
  674 #endif
  675                 {
  676 #define R1024_8_rounds(R)       /* do 8 full rounds */                  \
  677         R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13,   \
  678             14, 15, R1024_0, 8 * (R) + 1);                              \
  679         R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05,   \
  680             08, 01, R1024_1, 8 * (R) + 2);                              \
  681         R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11,   \
  682             10, 09, R1024_2, 8 * (R) + 3);                              \
  683         R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03,   \
  684             12, 07, R1024_3, 8 * (R) + 4);                              \
  685         I1024(2 * (R));                                                 \
  686         R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13,   \
  687             14, 15, R1024_4, 8 * (R) + 5);                              \
  688         R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05,   \
  689             08, 01, R1024_5, 8 * (R) + 6);                              \
  690         R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11,   \
  691             10, 09, R1024_6, 8 * (R) + 7);                              \
  692         R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03,   \
  693             12, 07, R1024_7, 8 * (R) + 8);                              \
  694         I1024(2 * (R) + 1);
  695 
  696                         R1024_8_rounds(0);
  697 
  698 #define R1024_Unroll_R(NN)                                              \
  699         ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \
  700         (SKEIN_UNROLL_1024 > (NN)))
  701 
  702 #if     R1024_Unroll_R(1)
  703                         R1024_8_rounds(1);
  704 #endif
  705 #if     R1024_Unroll_R(2)
  706                         R1024_8_rounds(2);
  707 #endif
  708 #if     R1024_Unroll_R(3)
  709                         R1024_8_rounds(3);
  710 #endif
  711 #if     R1024_Unroll_R(4)
  712                         R1024_8_rounds(4);
  713 #endif
  714 #if     R1024_Unroll_R(5)
  715                         R1024_8_rounds(5);
  716 #endif
  717 #if     R1024_Unroll_R(6)
  718                         R1024_8_rounds(6);
  719 #endif
  720 #if     R1024_Unroll_R(7)
  721                         R1024_8_rounds(7);
  722 #endif
  723 #if     R1024_Unroll_R(8)
  724                         R1024_8_rounds(8);
  725 #endif
  726 #if     R1024_Unroll_R(9)
  727                         R1024_8_rounds(9);
  728 #endif
  729 #if     R1024_Unroll_R(10)
  730                         R1024_8_rounds(10);
  731 #endif
  732 #if     R1024_Unroll_R(11)
  733                         R1024_8_rounds(11);
  734 #endif
  735 #if     R1024_Unroll_R(12)
  736                         R1024_8_rounds(12);
  737 #endif
  738 #if     R1024_Unroll_R(13)
  739                         R1024_8_rounds(13);
  740 #endif
  741 #if     R1024_Unroll_R(14)
  742                         R1024_8_rounds(14);
  743 #endif
  744 #if     (SKEIN_UNROLL_1024 > 14)
  745 #error  "need more unrolling in Skein_1024_Process_Block"
  746 #endif
  747                 }
  748                 /*
  749                  * do the final "feedforward" xor, update context chaining vars
  750                  */
  751 
  752                 ctx->X[0] = X00 ^ w[0];
  753                 ctx->X[1] = X01 ^ w[1];
  754                 ctx->X[2] = X02 ^ w[2];
  755                 ctx->X[3] = X03 ^ w[3];
  756                 ctx->X[4] = X04 ^ w[4];
  757                 ctx->X[5] = X05 ^ w[5];
  758                 ctx->X[6] = X06 ^ w[6];
  759                 ctx->X[7] = X07 ^ w[7];
  760                 ctx->X[8] = X08 ^ w[8];
  761                 ctx->X[9] = X09 ^ w[9];
  762                 ctx->X[10] = X10 ^ w[10];
  763                 ctx->X[11] = X11 ^ w[11];
  764                 ctx->X[12] = X12 ^ w[12];
  765                 ctx->X[13] = X13 ^ w[13];
  766                 ctx->X[14] = X14 ^ w[14];
  767                 ctx->X[15] = X15 ^ w[15];
  768 
  769                 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
  770 
  771                 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
  772                 blkPtr += SKEIN1024_BLOCK_BYTES;
  773         } while (--blkCnt);
  774         ctx->h.T[0] = ts[0];
  775         ctx->h.T[1] = ts[1];
  776 }
  777 
  778 #if     defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
  779 size_t
  780 Skein1024_Process_Block_CodeSize(void)
  781 {
  782         return ((uint8_t *)Skein1024_Process_Block_CodeSize) -
  783             ((uint8_t *)Skein1024_Process_Block);
  784 }
  785 
  786 uint_t
  787 Skein1024_Unroll_Cnt(void)
  788 {
  789         return (SKEIN_UNROLL_1024);
  790 }
  791 #endif
  792 #endif

Cache object: 3f7026e19a1e95fc40117fe211700be5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.