The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/lib/libspl/include/sys/simd.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License, Version 1.0 only
    6  * (the "License").  You may not use this file except in compliance
    7  * with the License.
    8  *
    9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   10  * or https://opensource.org/licenses/CDDL-1.0.
   11  * See the License for the specific language governing permissions
   12  * and limitations under the License.
   13  *
   14  * When distributing Covered Code, include this CDDL HEADER in each
   15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   16  * If applicable, add the following below this CDDL HEADER, with the
   17  * fields enclosed by brackets "[]" replaced with your own identifying
   18  * information: Portions Copyright [yyyy] [name of copyright owner]
   19  *
   20  * CDDL HEADER END
   21  */
   22 /*
   23  * Copyright (c) 2006 Sun Microsystems, Inc.  All rights reserved.
   24  * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
   25  */
   26 
   27 #ifndef _LIBSPL_SYS_SIMD_H
   28 #define _LIBSPL_SYS_SIMD_H
   29 
   30 #include <sys/isa_defs.h>
   31 #include <sys/types.h>
   32 
   33 #if defined(__x86)
   34 #include <cpuid.h>
   35 
   36 #define kfpu_allowed()          1
   37 #define kfpu_begin()            do {} while (0)
   38 #define kfpu_end()              do {} while (0)
   39 #define kfpu_init()             0
   40 #define kfpu_fini()             ((void) 0)
   41 
   42 /*
   43  * CPUID feature tests for user-space.
   44  *
   45  * x86 registers used implicitly by CPUID
   46  */
   47 typedef enum cpuid_regs {
   48         EAX = 0,
   49         EBX,
   50         ECX,
   51         EDX,
   52         CPUID_REG_CNT = 4
   53 } cpuid_regs_t;
   54 
   55 /*
   56  * List of instruction sets identified by CPUID
   57  */
   58 typedef enum cpuid_inst_sets {
   59         SSE = 0,
   60         SSE2,
   61         SSE3,
   62         SSSE3,
   63         SSE4_1,
   64         SSE4_2,
   65         OSXSAVE,
   66         AVX,
   67         AVX2,
   68         BMI1,
   69         BMI2,
   70         AVX512F,
   71         AVX512CD,
   72         AVX512DQ,
   73         AVX512BW,
   74         AVX512IFMA,
   75         AVX512VBMI,
   76         AVX512PF,
   77         AVX512ER,
   78         AVX512VL,
   79         AES,
   80         PCLMULQDQ,
   81         MOVBE
   82 } cpuid_inst_sets_t;
   83 
   84 /*
   85  * Instruction set descriptor.
   86  */
   87 typedef struct cpuid_feature_desc {
   88         uint32_t leaf;          /* CPUID leaf */
   89         uint32_t subleaf;       /* CPUID sub-leaf */
   90         uint32_t flag;          /* bit mask of the feature */
   91         cpuid_regs_t reg;       /* which CPUID return register to test */
   92 } cpuid_feature_desc_t;
   93 
   94 #define _AVX512F_BIT            (1U << 16)
   95 #define _AVX512CD_BIT           (_AVX512F_BIT | (1U << 28))
   96 #define _AVX512DQ_BIT           (_AVX512F_BIT | (1U << 17))
   97 #define _AVX512BW_BIT           (_AVX512F_BIT | (1U << 30))
   98 #define _AVX512IFMA_BIT         (_AVX512F_BIT | (1U << 21))
   99 #define _AVX512VBMI_BIT         (1U << 1) /* AVX512F_BIT is on another leaf  */
  100 #define _AVX512PF_BIT           (_AVX512F_BIT | (1U << 26))
  101 #define _AVX512ER_BIT           (_AVX512F_BIT | (1U << 27))
  102 #define _AVX512VL_BIT           (1U << 31) /* if used also check other levels */
  103 #define _AES_BIT                (1U << 25)
  104 #define _PCLMULQDQ_BIT          (1U << 1)
  105 #define _MOVBE_BIT              (1U << 22)
  106 
  107 /*
  108  * Descriptions of supported instruction sets
  109  */
  110 static const cpuid_feature_desc_t cpuid_features[] = {
  111         [SSE]           = {1U, 0U,      1U << 25,       EDX     },
  112         [SSE2]          = {1U, 0U,      1U << 26,       EDX     },
  113         [SSE3]          = {1U, 0U,      1U << 0,        ECX     },
  114         [SSSE3]         = {1U, 0U,      1U << 9,        ECX     },
  115         [SSE4_1]        = {1U, 0U,      1U << 19,       ECX     },
  116         [SSE4_2]        = {1U, 0U,      1U << 20,       ECX     },
  117         [OSXSAVE]       = {1U, 0U,      1U << 27,       ECX     },
  118         [AVX]           = {1U, 0U,      1U << 28,       ECX     },
  119         [AVX2]          = {7U, 0U,      1U << 5,        EBX     },
  120         [BMI1]          = {7U, 0U,      1U << 3,        EBX     },
  121         [BMI2]          = {7U, 0U,      1U << 8,        EBX     },
  122         [AVX512F]       = {7U, 0U, _AVX512F_BIT,        EBX     },
  123         [AVX512CD]      = {7U, 0U, _AVX512CD_BIT,       EBX     },
  124         [AVX512DQ]      = {7U, 0U, _AVX512DQ_BIT,       EBX     },
  125         [AVX512BW]      = {7U, 0U, _AVX512BW_BIT,       EBX     },
  126         [AVX512IFMA]    = {7U, 0U, _AVX512IFMA_BIT,     EBX     },
  127         [AVX512VBMI]    = {7U, 0U, _AVX512VBMI_BIT,     ECX     },
  128         [AVX512PF]      = {7U, 0U, _AVX512PF_BIT,       EBX     },
  129         [AVX512ER]      = {7U, 0U, _AVX512ER_BIT,       EBX     },
  130         [AVX512VL]      = {7U, 0U, _AVX512ER_BIT,       EBX     },
  131         [AES]           = {1U, 0U, _AES_BIT,            ECX     },
  132         [PCLMULQDQ]     = {1U, 0U, _PCLMULQDQ_BIT,      ECX     },
  133         [MOVBE]         = {1U, 0U, _MOVBE_BIT,          ECX     },
  134 };
  135 
  136 /*
  137  * Check if OS supports AVX and AVX2 by checking XCR0
  138  * Only call this function if CPUID indicates that AVX feature is
  139  * supported by the CPU, otherwise it might be an illegal instruction.
  140  */
  141 static inline uint64_t
  142 xgetbv(uint32_t index)
  143 {
  144         uint32_t eax, edx;
  145         /* xgetbv - instruction byte code */
  146         __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
  147             : "=a" (eax), "=d" (edx)
  148             : "c" (index));
  149 
  150         return ((((uint64_t)edx)<<32) | (uint64_t)eax);
  151 }
  152 
  153 /*
  154  * Check if CPU supports a feature
  155  */
  156 static inline boolean_t
  157 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
  158 {
  159         uint32_t r[CPUID_REG_CNT];
  160 
  161         if (__get_cpuid_max(0, NULL) >= desc->leaf) {
  162                 /*
  163                  * __cpuid_count is needed to properly check
  164                  * for AVX2. It is a macro, so return parameters
  165                  * are passed by value.
  166                  */
  167                 __cpuid_count(desc->leaf, desc->subleaf,
  168                     r[EAX], r[EBX], r[ECX], r[EDX]);
  169                 return ((r[desc->reg] & desc->flag) == desc->flag);
  170         }
  171         return (B_FALSE);
  172 }
  173 
  174 #define CPUID_FEATURE_CHECK(name, id)                           \
  175 static inline boolean_t                                         \
  176 __cpuid_has_ ## name(void)                                      \
  177 {                                                               \
  178         return (__cpuid_check_feature(&cpuid_features[id]));    \
  179 }
  180 
  181 /*
  182  * Define functions for user-space CPUID features testing
  183  */
  184 CPUID_FEATURE_CHECK(sse, SSE);
  185 CPUID_FEATURE_CHECK(sse2, SSE2);
  186 CPUID_FEATURE_CHECK(sse3, SSE3);
  187 CPUID_FEATURE_CHECK(ssse3, SSSE3);
  188 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
  189 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
  190 CPUID_FEATURE_CHECK(avx, AVX);
  191 CPUID_FEATURE_CHECK(avx2, AVX2);
  192 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
  193 CPUID_FEATURE_CHECK(bmi1, BMI1);
  194 CPUID_FEATURE_CHECK(bmi2, BMI2);
  195 CPUID_FEATURE_CHECK(avx512f, AVX512F);
  196 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
  197 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
  198 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
  199 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
  200 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
  201 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
  202 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
  203 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
  204 CPUID_FEATURE_CHECK(aes, AES);
  205 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
  206 CPUID_FEATURE_CHECK(movbe, MOVBE);
  207 
  208 /*
  209  * Detect register set support
  210  */
  211 static inline boolean_t
  212 __simd_state_enabled(const uint64_t state)
  213 {
  214         boolean_t has_osxsave;
  215         uint64_t xcr0;
  216 
  217         has_osxsave = __cpuid_has_osxsave();
  218         if (!has_osxsave)
  219                 return (B_FALSE);
  220 
  221         xcr0 = xgetbv(0);
  222         return ((xcr0 & state) == state);
  223 }
  224 
  225 #define _XSTATE_SSE_AVX         (0x2 | 0x4)
  226 #define _XSTATE_AVX512          (0xE0 | _XSTATE_SSE_AVX)
  227 
  228 #define __ymm_enabled()         __simd_state_enabled(_XSTATE_SSE_AVX)
  229 #define __zmm_enabled()         __simd_state_enabled(_XSTATE_AVX512)
  230 
  231 /*
  232  * Check if SSE instruction set is available
  233  */
  234 static inline boolean_t
  235 zfs_sse_available(void)
  236 {
  237         return (__cpuid_has_sse());
  238 }
  239 
  240 /*
  241  * Check if SSE2 instruction set is available
  242  */
  243 static inline boolean_t
  244 zfs_sse2_available(void)
  245 {
  246         return (__cpuid_has_sse2());
  247 }
  248 
  249 /*
  250  * Check if SSE3 instruction set is available
  251  */
  252 static inline boolean_t
  253 zfs_sse3_available(void)
  254 {
  255         return (__cpuid_has_sse3());
  256 }
  257 
  258 /*
  259  * Check if SSSE3 instruction set is available
  260  */
  261 static inline boolean_t
  262 zfs_ssse3_available(void)
  263 {
  264         return (__cpuid_has_ssse3());
  265 }
  266 
  267 /*
  268  * Check if SSE4.1 instruction set is available
  269  */
  270 static inline boolean_t
  271 zfs_sse4_1_available(void)
  272 {
  273         return (__cpuid_has_sse4_1());
  274 }
  275 
  276 /*
  277  * Check if SSE4.2 instruction set is available
  278  */
  279 static inline boolean_t
  280 zfs_sse4_2_available(void)
  281 {
  282         return (__cpuid_has_sse4_2());
  283 }
  284 
  285 /*
  286  * Check if AVX instruction set is available
  287  */
  288 static inline boolean_t
  289 zfs_avx_available(void)
  290 {
  291         return (__cpuid_has_avx() && __ymm_enabled());
  292 }
  293 
  294 /*
  295  * Check if AVX2 instruction set is available
  296  */
  297 static inline boolean_t
  298 zfs_avx2_available(void)
  299 {
  300         return (__cpuid_has_avx2() && __ymm_enabled());
  301 }
  302 
  303 /*
  304  * Check if BMI1 instruction set is available
  305  */
  306 static inline boolean_t
  307 zfs_bmi1_available(void)
  308 {
  309         return (__cpuid_has_bmi1());
  310 }
  311 
  312 /*
  313  * Check if BMI2 instruction set is available
  314  */
  315 static inline boolean_t
  316 zfs_bmi2_available(void)
  317 {
  318         return (__cpuid_has_bmi2());
  319 }
  320 
  321 /*
  322  * Check if AES instruction set is available
  323  */
  324 static inline boolean_t
  325 zfs_aes_available(void)
  326 {
  327         return (__cpuid_has_aes());
  328 }
  329 
  330 /*
  331  * Check if PCLMULQDQ instruction set is available
  332  */
  333 static inline boolean_t
  334 zfs_pclmulqdq_available(void)
  335 {
  336         return (__cpuid_has_pclmulqdq());
  337 }
  338 
  339 /*
  340  * Check if MOVBE instruction is available
  341  */
  342 static inline boolean_t
  343 zfs_movbe_available(void)
  344 {
  345         return (__cpuid_has_movbe());
  346 }
  347 
  348 /*
  349  * AVX-512 family of instruction sets:
  350  *
  351  * AVX512F      Foundation
  352  * AVX512CD     Conflict Detection Instructions
  353  * AVX512ER     Exponential and Reciprocal Instructions
  354  * AVX512PF     Prefetch Instructions
  355  *
  356  * AVX512BW     Byte and Word Instructions
  357  * AVX512DQ     Double-word and Quadword Instructions
  358  * AVX512VL     Vector Length Extensions
  359  *
  360  * AVX512IFMA   Integer Fused Multiply Add (Not supported by kernel 4.4)
  361  * AVX512VBMI   Vector Byte Manipulation Instructions
  362  */
  363 
  364 /*
  365  * Check if AVX512F instruction set is available
  366  */
  367 static inline boolean_t
  368 zfs_avx512f_available(void)
  369 {
  370         return (__cpuid_has_avx512f() && __zmm_enabled());
  371 }
  372 
  373 /*
  374  * Check if AVX512CD instruction set is available
  375  */
  376 static inline boolean_t
  377 zfs_avx512cd_available(void)
  378 {
  379         return (__cpuid_has_avx512cd() && __zmm_enabled());
  380 }
  381 
  382 /*
  383  * Check if AVX512ER instruction set is available
  384  */
  385 static inline boolean_t
  386 zfs_avx512er_available(void)
  387 {
  388         return (__cpuid_has_avx512er() && __zmm_enabled());
  389 }
  390 
  391 /*
  392  * Check if AVX512PF instruction set is available
  393  */
  394 static inline boolean_t
  395 zfs_avx512pf_available(void)
  396 {
  397         return (__cpuid_has_avx512pf() && __zmm_enabled());
  398 }
  399 
  400 /*
  401  * Check if AVX512BW instruction set is available
  402  */
  403 static inline boolean_t
  404 zfs_avx512bw_available(void)
  405 {
  406         return (__cpuid_has_avx512bw() && __zmm_enabled());
  407 }
  408 
  409 /*
  410  * Check if AVX512DQ instruction set is available
  411  */
  412 static inline boolean_t
  413 zfs_avx512dq_available(void)
  414 {
  415         return (__cpuid_has_avx512dq() && __zmm_enabled());
  416 }
  417 
  418 /*
  419  * Check if AVX512VL instruction set is available
  420  */
  421 static inline boolean_t
  422 zfs_avx512vl_available(void)
  423 {
  424         return (__cpuid_has_avx512vl() && __zmm_enabled());
  425 }
  426 
  427 /*
  428  * Check if AVX512IFMA instruction set is available
  429  */
  430 static inline boolean_t
  431 zfs_avx512ifma_available(void)
  432 {
  433         return (__cpuid_has_avx512ifma() && __zmm_enabled());
  434 }
  435 
  436 /*
  437  * Check if AVX512VBMI instruction set is available
  438  */
  439 static inline boolean_t
  440 zfs_avx512vbmi_available(void)
  441 {
  442         return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
  443             __zmm_enabled());
  444 }
  445 
  446 #elif defined(__aarch64__)
  447 
  448 #define kfpu_allowed()          1
  449 #define kfpu_initialize(tsk)    do {} while (0)
  450 #define kfpu_begin()            do {} while (0)
  451 #define kfpu_end()              do {} while (0)
  452 
  453 #elif defined(__powerpc__)
  454 
  455 /* including <sys/auxv.h> clashes with AT_UID and others */
  456 #if defined(__FreeBSD__)
  457 #define AT_HWCAP        25      /* CPU feature flags. */
  458 #define AT_HWCAP2       26      /* CPU feature flags 2. */
  459 extern int elf_aux_info(int aux, void *buf, int buflen);
  460 static inline unsigned long
  461 getauxval(unsigned long key)
  462 {
  463         unsigned long val = 0UL;
  464 
  465         if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
  466                 return (0UL);
  467 
  468         return (val);
  469 }
  470 #elif defined(__linux__)
  471 #define AT_HWCAP        16      /* CPU feature flags. */
  472 #define AT_HWCAP2       26      /* CPU feature flags 2. */
  473 extern unsigned long getauxval(unsigned long type);
  474 #endif
  475 
  476 #define kfpu_allowed()          1
  477 #define kfpu_initialize(tsk)    do {} while (0)
  478 #define kfpu_begin()            do {} while (0)
  479 #define kfpu_end()              do {} while (0)
  480 
  481 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
  482 static inline boolean_t
  483 zfs_altivec_available(void)
  484 {
  485         unsigned long hwcap = getauxval(AT_HWCAP);
  486 
  487         return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
  488 }
  489 
  490 #define PPC_FEATURE_HAS_VSX     0x00000080
  491 static inline boolean_t
  492 zfs_vsx_available(void)
  493 {
  494         unsigned long hwcap = getauxval(AT_HWCAP);
  495 
  496         return (hwcap & PPC_FEATURE_HAS_VSX);
  497 }
  498 
  499 #define PPC_FEATURE2_ARCH_2_07  0x80000000
  500 static inline boolean_t
  501 zfs_isa207_available(void)
  502 {
  503         unsigned long hwcap = getauxval(AT_HWCAP);
  504         unsigned long hwcap2 = getauxval(AT_HWCAP2);
  505 
  506         return ((hwcap & PPC_FEATURE_HAS_VSX) &&
  507             (hwcap2 & PPC_FEATURE2_ARCH_2_07));
  508 }
  509 
  510 #else
  511 
  512 #define kfpu_allowed()          0
  513 #define kfpu_initialize(tsk)    do {} while (0)
  514 #define kfpu_begin()            do {} while (0)
  515 #define kfpu_end()              do {} while (0)
  516 
  517 #endif
  518 
  519 #endif /* _LIBSPL_SYS_SIMD_H */

Cache object: 7a0d02699ab5f9225920a688d76e83e6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.