The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/libb2/blake2s-load-sse41.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2    BLAKE2 reference source code package - optimized C implementations
    3 
    4    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
    5 
    6    To the extent possible under law, the author(s) have dedicated all copyright
    7    and related and neighboring rights to this software to the public domain
    8    worldwide. This software is distributed without any warranty.
    9 
   10    You should have received a copy of the CC0 Public Domain Dedication along with
   11    this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
   12 */
   13 #pragma once
   14 #ifndef __BLAKE2S_LOAD_SSE41_H__
   15 #define __BLAKE2S_LOAD_SSE41_H__
   16 
   17 #define LOAD_MSG_0_1(buf) \
   18 buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
   19 
   20 #define LOAD_MSG_0_2(buf) \
   21 buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1)));
   22 
   23 #define LOAD_MSG_0_3(buf) \
   24 buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0)));
   25 
   26 #define LOAD_MSG_0_4(buf) \
   27 buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1)));
   28 
   29 #define LOAD_MSG_1_1(buf) \
   30 t0 = _mm_blend_epi16(m1, m2, 0x0C); \
   31 t1 = _mm_slli_si128(m3, 4); \
   32 t2 = _mm_blend_epi16(t0, t1, 0xF0); \
   33 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
   34 
   35 #define LOAD_MSG_1_2(buf) \
   36 t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \
   37 t1 = _mm_blend_epi16(m1,m3,0xC0); \
   38 t2 = _mm_blend_epi16(t0, t1, 0xF0); \
   39 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
   40 
   41 #define LOAD_MSG_1_3(buf) \
   42 t0 = _mm_slli_si128(m1, 4); \
   43 t1 = _mm_blend_epi16(m2, t0, 0x30); \
   44 t2 = _mm_blend_epi16(m0, t1, 0xF0); \
   45 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
   46 
   47 #define LOAD_MSG_1_4(buf) \
   48 t0 = _mm_unpackhi_epi32(m0,m1); \
   49 t1 = _mm_slli_si128(m3, 4); \
   50 t2 = _mm_blend_epi16(t0, t1, 0x0C); \
   51 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
   52 
   53 #define LOAD_MSG_2_1(buf) \
   54 t0 = _mm_unpackhi_epi32(m2,m3); \
   55 t1 = _mm_blend_epi16(m3,m1,0x0C); \
   56 t2 = _mm_blend_epi16(t0, t1, 0x0F); \
   57 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
   58 
   59 #define LOAD_MSG_2_2(buf) \
   60 t0 = _mm_unpacklo_epi32(m2,m0); \
   61 t1 = _mm_blend_epi16(t0, m0, 0xF0); \
   62 t2 = _mm_slli_si128(m3, 8); \
   63 buf = _mm_blend_epi16(t1, t2, 0xC0);
   64 
   65 #define LOAD_MSG_2_3(buf) \
   66 t0 = _mm_blend_epi16(m0, m2, 0x3C); \
   67 t1 = _mm_srli_si128(m1, 12); \
   68 t2 = _mm_blend_epi16(t0,t1,0x03); \
   69 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2));
   70 
   71 #define LOAD_MSG_2_4(buf) \
   72 t0 = _mm_slli_si128(m3, 4); \
   73 t1 = _mm_blend_epi16(m0, m1, 0x33); \
   74 t2 = _mm_blend_epi16(t1, t0, 0xC0); \
   75 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3));
   76 
   77 #define LOAD_MSG_3_1(buf) \
   78 t0 = _mm_unpackhi_epi32(m0,m1); \
   79 t1 = _mm_unpackhi_epi32(t0, m2); \
   80 t2 = _mm_blend_epi16(t1, m3, 0x0C); \
   81 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
   82 
   83 #define LOAD_MSG_3_2(buf) \
   84 t0 = _mm_slli_si128(m2, 8); \
   85 t1 = _mm_blend_epi16(m3,m0,0x0C); \
   86 t2 = _mm_blend_epi16(t1, t0, 0xC0); \
   87 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
   88 
   89 #define LOAD_MSG_3_3(buf) \
   90 t0 = _mm_blend_epi16(m0,m1,0x0F); \
   91 t1 = _mm_blend_epi16(t0, m3, 0xC0); \
   92 buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
   93 
   94 #define LOAD_MSG_3_4(buf) \
   95 t0 = _mm_unpacklo_epi32(m0,m2); \
   96 t1 = _mm_unpackhi_epi32(m1,m2); \
   97 buf = _mm_unpacklo_epi64(t1,t0);
   98 
   99 #define LOAD_MSG_4_1(buf) \
  100 t0 = _mm_unpacklo_epi64(m1,m2); \
  101 t1 = _mm_unpackhi_epi64(m0,m2); \
  102 t2 = _mm_blend_epi16(t0,t1,0x33); \
  103 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
  104 
  105 #define LOAD_MSG_4_2(buf) \
  106 t0 = _mm_unpackhi_epi64(m1,m3); \
  107 t1 = _mm_unpacklo_epi64(m0,m1); \
  108 buf = _mm_blend_epi16(t0,t1,0x33);
  109 
  110 #define LOAD_MSG_4_3(buf) \
  111 t0 = _mm_unpackhi_epi64(m3,m1); \
  112 t1 = _mm_unpackhi_epi64(m2,m0); \
  113 buf = _mm_blend_epi16(t1,t0,0x33);
  114 
  115 #define LOAD_MSG_4_4(buf) \
  116 t0 = _mm_blend_epi16(m0,m2,0x03); \
  117 t1 = _mm_slli_si128(t0, 8); \
  118 t2 = _mm_blend_epi16(t1,m3,0x0F); \
  119 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3));
  120 
  121 #define LOAD_MSG_5_1(buf) \
  122 t0 = _mm_unpackhi_epi32(m0,m1); \
  123 t1 = _mm_unpacklo_epi32(m0,m2); \
  124 buf = _mm_unpacklo_epi64(t0,t1);
  125 
  126 #define LOAD_MSG_5_2(buf) \
  127 t0 = _mm_srli_si128(m2, 4); \
  128 t1 = _mm_blend_epi16(m0,m3,0x03); \
  129 buf = _mm_blend_epi16(t1,t0,0x3C);
  130 
  131 #define LOAD_MSG_5_3(buf) \
  132 t0 = _mm_blend_epi16(m1,m0,0x0C); \
  133 t1 = _mm_srli_si128(m3, 4); \
  134 t2 = _mm_blend_epi16(t0,t1,0x30); \
  135 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0));
  136 
  137 #define LOAD_MSG_5_4(buf) \
  138 t0 = _mm_unpacklo_epi64(m1,m2); \
  139 t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \
  140 buf = _mm_blend_epi16(t0,t1,0x33);
  141 
  142 #define LOAD_MSG_6_1(buf) \
  143 t0 = _mm_slli_si128(m1, 12); \
  144 t1 = _mm_blend_epi16(m0,m3,0x33); \
  145 buf = _mm_blend_epi16(t1,t0,0xC0);
  146 
  147 #define LOAD_MSG_6_2(buf) \
  148 t0 = _mm_blend_epi16(m3,m2,0x30); \
  149 t1 = _mm_srli_si128(m1, 4); \
  150 t2 = _mm_blend_epi16(t0,t1,0x03); \
  151 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0));
  152 
  153 #define LOAD_MSG_6_3(buf) \
  154 t0 = _mm_unpacklo_epi64(m0,m2); \
  155 t1 = _mm_srli_si128(m1, 4); \
  156 buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0));
  157 
  158 #define LOAD_MSG_6_4(buf) \
  159 t0 = _mm_unpackhi_epi32(m1,m2); \
  160 t1 = _mm_unpackhi_epi64(m0,t0); \
  161 buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
  162 
  163 #define LOAD_MSG_7_1(buf) \
  164 t0 = _mm_unpackhi_epi32(m0,m1); \
  165 t1 = _mm_blend_epi16(t0,m3,0x0F); \
  166 buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1));
  167 
  168 #define LOAD_MSG_7_2(buf) \
  169 t0 = _mm_blend_epi16(m2,m3,0x30); \
  170 t1 = _mm_srli_si128(m0,4); \
  171 t2 = _mm_blend_epi16(t0,t1,0x03); \
  172 buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3));
  173 
  174 #define LOAD_MSG_7_3(buf) \
  175 t0 = _mm_unpackhi_epi64(m0,m3); \
  176 t1 = _mm_unpacklo_epi64(m1,m2); \
  177 t2 = _mm_blend_epi16(t0,t1,0x3C); \
  178 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1));
  179 
  180 #define LOAD_MSG_7_4(buf) \
  181 t0 = _mm_unpacklo_epi32(m0,m1); \
  182 t1 = _mm_unpackhi_epi32(m1,m2); \
  183 buf = _mm_unpacklo_epi64(t0,t1);
  184 
  185 #define LOAD_MSG_8_1(buf) \
  186 t0 = _mm_unpackhi_epi32(m1,m3); \
  187 t1 = _mm_unpacklo_epi64(t0,m0); \
  188 t2 = _mm_blend_epi16(t1,m2,0xC0); \
  189 buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2));
  190 
  191 #define LOAD_MSG_8_2(buf) \
  192 t0 = _mm_unpackhi_epi32(m0,m3); \
  193 t1 = _mm_blend_epi16(m2,t0,0xF0); \
  194 buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3));
  195 
  196 #define LOAD_MSG_8_3(buf) \
  197 t0 = _mm_blend_epi16(m2,m0,0x0C); \
  198 t1 = _mm_slli_si128(t0,4); \
  199 buf = _mm_blend_epi16(t1,m3,0x0F);
  200 
  201 #define LOAD_MSG_8_4(buf) \
  202 t0 = _mm_blend_epi16(m1,m0,0x30); \
  203 buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2));
  204 
  205 #define LOAD_MSG_9_1(buf) \
  206 t0 = _mm_blend_epi16(m0,m2,0x03); \
  207 t1 = _mm_blend_epi16(m1,m2,0x30); \
  208 t2 = _mm_blend_epi16(t1,t0,0x0F); \
  209 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2));
  210 
  211 #define LOAD_MSG_9_2(buf) \
  212 t0 = _mm_slli_si128(m0,4); \
  213 t1 = _mm_blend_epi16(m1,t0,0xC0); \
  214 buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3));
  215 
  216 #define LOAD_MSG_9_3(buf) \
  217 t0 = _mm_unpackhi_epi32(m0,m3); \
  218 t1 = _mm_unpacklo_epi32(m2,m3); \
  219 t2 = _mm_unpackhi_epi64(t0,t1); \
  220 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1));
  221 
  222 #define LOAD_MSG_9_4(buf) \
  223 t0 = _mm_blend_epi16(m3,m2,0xC0); \
  224 t1 = _mm_unpacklo_epi32(m0,m3); \
  225 t2 = _mm_blend_epi16(t0,t1,0x0F); \
  226 buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3));
  227 
  228 #endif
  229 

Cache object: e8f1624ad17309b759258bb5e5806c9b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.