The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/libb2/blake2b-round.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2    BLAKE2 reference source code package - optimized C implementations
    3 
    4    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
    5 
    6    To the extent possible under law, the author(s) have dedicated all copyright
    7    and related and neighboring rights to this software to the public domain
    8    worldwide. This software is distributed without any warranty.
    9 
   10    You should have received a copy of the CC0 Public Domain Dedication along with
   11    this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
   12 */
   13 #pragma once
   14 #ifndef __BLAKE2B_ROUND_H__
   15 #define __BLAKE2B_ROUND_H__
   16 
   17 #define LOAD(p)  _mm_load_si128( (__m128i *)(p) )
   18 #define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
   19 
   20 #define LOADU(p)  _mm_loadu_si128( (__m128i *)(p) )
   21 #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
   22 
   23 #define TOF(reg) _mm_castsi128_ps((reg))
   24 #define TOI(reg) _mm_castps_si128((reg))
   25 
   26 #define LIKELY(x) __builtin_expect((x),1)
   27 
   28 
   29 /* Microarchitecture-specific macros */
   30 #ifndef HAVE_XOP
   31 #ifdef HAVE_SSSE3
   32 #define _mm_roti_epi64(x, c) \
   33     (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1))  \
   34     : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
   35     : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
   36     : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x)))  \
   37     : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
   38 #else
   39 #define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
   40 #endif
   41 #else
   42 /* ... */
   43 #endif
   44 
   45 
   46 
   47 #define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
   48   row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
   49   row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
   50   \
   51   row4l = _mm_xor_si128(row4l, row1l); \
   52   row4h = _mm_xor_si128(row4h, row1h); \
   53   \
   54   row4l = _mm_roti_epi64(row4l, -32); \
   55   row4h = _mm_roti_epi64(row4h, -32); \
   56   \
   57   row3l = _mm_add_epi64(row3l, row4l); \
   58   row3h = _mm_add_epi64(row3h, row4h); \
   59   \
   60   row2l = _mm_xor_si128(row2l, row3l); \
   61   row2h = _mm_xor_si128(row2h, row3h); \
   62   \
   63   row2l = _mm_roti_epi64(row2l, -24); \
   64   row2h = _mm_roti_epi64(row2h, -24); \
   65  
   66 #define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
   67   row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
   68   row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
   69   \
   70   row4l = _mm_xor_si128(row4l, row1l); \
   71   row4h = _mm_xor_si128(row4h, row1h); \
   72   \
   73   row4l = _mm_roti_epi64(row4l, -16); \
   74   row4h = _mm_roti_epi64(row4h, -16); \
   75   \
   76   row3l = _mm_add_epi64(row3l, row4l); \
   77   row3h = _mm_add_epi64(row3h, row4h); \
   78   \
   79   row2l = _mm_xor_si128(row2l, row3l); \
   80   row2h = _mm_xor_si128(row2h, row3h); \
   81   \
   82   row2l = _mm_roti_epi64(row2l, -63); \
   83   row2h = _mm_roti_epi64(row2h, -63); \
   84  
   85 #if defined(HAVE_SSSE3)
   86 #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
   87   t0 = _mm_alignr_epi8(row2h, row2l, 8); \
   88   t1 = _mm_alignr_epi8(row2l, row2h, 8); \
   89   row2l = t0; \
   90   row2h = t1; \
   91   \
   92   t0 = row3l; \
   93   row3l = row3h; \
   94   row3h = t0;    \
   95   \
   96   t0 = _mm_alignr_epi8(row4h, row4l, 8); \
   97   t1 = _mm_alignr_epi8(row4l, row4h, 8); \
   98   row4l = t1; \
   99   row4h = t0;
  100 
  101 #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
  102   t0 = _mm_alignr_epi8(row2l, row2h, 8); \
  103   t1 = _mm_alignr_epi8(row2h, row2l, 8); \
  104   row2l = t0; \
  105   row2h = t1; \
  106   \
  107   t0 = row3l; \
  108   row3l = row3h; \
  109   row3h = t0; \
  110   \
  111   t0 = _mm_alignr_epi8(row4l, row4h, 8); \
  112   t1 = _mm_alignr_epi8(row4h, row4l, 8); \
  113   row4l = t1; \
  114   row4h = t0;
  115 #else
  116 
  117 #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
  118   t0 = row4l;\
  119   t1 = row2l;\
  120   row4l = row3l;\
  121   row3l = row3h;\
  122   row3h = row4l;\
  123   row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
  124   row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
  125   row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
  126   row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
  127 
  128 #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
  129   t0 = row3l;\
  130   row3l = row3h;\
  131   row3h = t0;\
  132   t0 = row2l;\
  133   t1 = row4l;\
  134   row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
  135   row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
  136   row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
  137   row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
  138 
  139 #endif
  140 
  141 #if defined(HAVE_SSE4_1)
  142 #include "blake2b-load-sse41.h"
  143 #else
  144 #include "blake2b-load-sse2.h"
  145 #endif
  146 
  147 #define ROUND(r) \
  148   LOAD_MSG_ ##r ##_1(b0, b1); \
  149   G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
  150   LOAD_MSG_ ##r ##_2(b0, b1); \
  151   G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
  152   DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
  153   LOAD_MSG_ ##r ##_3(b0, b1); \
  154   G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
  155   LOAD_MSG_ ##r ##_4(b0, b1); \
  156   G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
  157   UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
  158 
  159 #endif
  160 

Cache object: 2811fa6f727c397f46ce73a83a91edda


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.