The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/lib/libzpool/kernel.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
   24  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
   25  */
   26 
   27 #include <assert.h>
   28 #include <fcntl.h>
   29 #include <libgen.h>
   30 #include <poll.h>
   31 #include <stdio.h>
   32 #include <stdlib.h>
   33 #include <string.h>
   34 #include <limits.h>
   35 #include <libzutil.h>
   36 #include <sys/crypto/icp.h>
   37 #include <sys/processor.h>
   38 #include <sys/rrwlock.h>
   39 #include <sys/spa.h>
   40 #include <sys/stat.h>
   41 #include <sys/systeminfo.h>
   42 #include <sys/time.h>
   43 #include <sys/utsname.h>
   44 #include <sys/zfs_context.h>
   45 #include <sys/zfs_onexit.h>
   46 #include <sys/zfs_vfsops.h>
   47 #include <sys/zstd/zstd.h>
   48 #include <sys/zvol.h>
   49 #include <zfs_fletcher.h>
   50 #include <zlib.h>
   51 
   52 /*
   53  * Emulation of kernel services in userland.
   54  */
   55 
   56 uint64_t physmem;
   57 uint32_t hostid;
   58 struct utsname hw_utsname;
   59 
   60 /* If set, all blocks read will be copied to the specified directory. */
   61 char *vn_dumpdir = NULL;
   62 
   63 /* this only exists to have its address taken */
   64 struct proc p0;
   65 
   66 /*
   67  * =========================================================================
   68  * threads
   69  * =========================================================================
   70  *
   71  * TS_STACK_MIN is dictated by the minimum allowed pthread stack size.  While
   72  * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
   73  * the expected stack depth while small enough to avoid exhausting address
   74  * space with high thread counts.
   75  */
   76 #define TS_STACK_MIN    MAX(PTHREAD_STACK_MIN, 32768)
   77 #define TS_STACK_MAX    (256 * 1024)
   78 
   79 struct zk_thread_wrapper {
   80         void (*func)(void *);
   81         void *arg;
   82 };
   83 
   84 static void *
   85 zk_thread_wrapper(void *arg)
   86 {
   87         struct zk_thread_wrapper ztw;
   88         memcpy(&ztw, arg, sizeof (ztw));
   89         free(arg);
   90         ztw.func(ztw.arg);
   91         return (NULL);
   92 }
   93 
   94 kthread_t *
   95 zk_thread_create(void (*func)(void *), void *arg, size_t stksize, int state)
   96 {
   97         pthread_attr_t attr;
   98         pthread_t tid;
   99         char *stkstr;
  100         struct zk_thread_wrapper *ztw;
  101         int detachstate = PTHREAD_CREATE_DETACHED;
  102 
  103         VERIFY0(pthread_attr_init(&attr));
  104 
  105         if (state & TS_JOINABLE)
  106                 detachstate = PTHREAD_CREATE_JOINABLE;
  107 
  108         VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));
  109 
  110         /*
  111          * We allow the default stack size in user space to be specified by
  112          * setting the ZFS_STACK_SIZE environment variable.  This allows us
  113          * the convenience of observing and debugging stack overruns in
  114          * user space.  Explicitly specified stack sizes will be honored.
  115          * The usage of ZFS_STACK_SIZE is discussed further in the
  116          * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
  117          */
  118         if (stksize == 0) {
  119                 stkstr = getenv("ZFS_STACK_SIZE");
  120 
  121                 if (stkstr == NULL)
  122                         stksize = TS_STACK_MAX;
  123                 else
  124                         stksize = MAX(atoi(stkstr), TS_STACK_MIN);
  125         }
  126 
  127         VERIFY3S(stksize, >, 0);
  128         stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);
  129 
  130         /*
  131          * If this ever fails, it may be because the stack size is not a
  132          * multiple of system page size.
  133          */
  134         VERIFY0(pthread_attr_setstacksize(&attr, stksize));
  135         VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));
  136 
  137         VERIFY(ztw = malloc(sizeof (*ztw)));
  138         ztw->func = func;
  139         ztw->arg = arg;
  140         VERIFY0(pthread_create(&tid, &attr, zk_thread_wrapper, ztw));
  141         VERIFY0(pthread_attr_destroy(&attr));
  142 
  143         return ((void *)(uintptr_t)tid);
  144 }
  145 
  146 /*
  147  * =========================================================================
  148  * kstats
  149  * =========================================================================
  150  */
  151 kstat_t *
  152 kstat_create(const char *module, int instance, const char *name,
  153     const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
  154 {
  155         (void) module, (void) instance, (void) name, (void) class, (void) type,
  156             (void) ndata, (void) ks_flag;
  157         return (NULL);
  158 }
  159 
  160 void
  161 kstat_install(kstat_t *ksp)
  162 {
  163         (void) ksp;
  164 }
  165 
  166 void
  167 kstat_delete(kstat_t *ksp)
  168 {
  169         (void) ksp;
  170 }
  171 
  172 void
  173 kstat_set_raw_ops(kstat_t *ksp,
  174     int (*headers)(char *buf, size_t size),
  175     int (*data)(char *buf, size_t size, void *data),
  176     void *(*addr)(kstat_t *ksp, loff_t index))
  177 {
  178         (void) ksp, (void) headers, (void) data, (void) addr;
  179 }
  180 
  181 /*
  182  * =========================================================================
  183  * mutexes
  184  * =========================================================================
  185  */
  186 
  187 void
  188 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
  189 {
  190         (void) name, (void) type, (void) cookie;
  191         VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));
  192         memset(&mp->m_owner, 0, sizeof (pthread_t));
  193 }
  194 
  195 void
  196 mutex_destroy(kmutex_t *mp)
  197 {
  198         VERIFY0(pthread_mutex_destroy(&mp->m_lock));
  199 }
  200 
  201 void
  202 mutex_enter(kmutex_t *mp)
  203 {
  204         VERIFY0(pthread_mutex_lock(&mp->m_lock));
  205         mp->m_owner = pthread_self();
  206 }
  207 
  208 int
  209 mutex_tryenter(kmutex_t *mp)
  210 {
  211         int error = pthread_mutex_trylock(&mp->m_lock);
  212         if (error == 0) {
  213                 mp->m_owner = pthread_self();
  214                 return (1);
  215         } else {
  216                 VERIFY3S(error, ==, EBUSY);
  217                 return (0);
  218         }
  219 }
  220 
  221 void
  222 mutex_exit(kmutex_t *mp)
  223 {
  224         memset(&mp->m_owner, 0, sizeof (pthread_t));
  225         VERIFY0(pthread_mutex_unlock(&mp->m_lock));
  226 }
  227 
  228 /*
  229  * =========================================================================
  230  * rwlocks
  231  * =========================================================================
  232  */
  233 
  234 void
  235 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
  236 {
  237         (void) name, (void) type, (void) arg;
  238         VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));
  239         rwlp->rw_readers = 0;
  240         rwlp->rw_owner = 0;
  241 }
  242 
  243 void
  244 rw_destroy(krwlock_t *rwlp)
  245 {
  246         VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));
  247 }
  248 
  249 void
  250 rw_enter(krwlock_t *rwlp, krw_t rw)
  251 {
  252         if (rw == RW_READER) {
  253                 VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));
  254                 atomic_inc_uint(&rwlp->rw_readers);
  255         } else {
  256                 VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));
  257                 rwlp->rw_owner = pthread_self();
  258         }
  259 }
  260 
  261 void
  262 rw_exit(krwlock_t *rwlp)
  263 {
  264         if (RW_READ_HELD(rwlp))
  265                 atomic_dec_uint(&rwlp->rw_readers);
  266         else
  267                 rwlp->rw_owner = 0;
  268 
  269         VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));
  270 }
  271 
  272 int
  273 rw_tryenter(krwlock_t *rwlp, krw_t rw)
  274 {
  275         int error;
  276 
  277         if (rw == RW_READER)
  278                 error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
  279         else
  280                 error = pthread_rwlock_trywrlock(&rwlp->rw_lock);
  281 
  282         if (error == 0) {
  283                 if (rw == RW_READER)
  284                         atomic_inc_uint(&rwlp->rw_readers);
  285                 else
  286                         rwlp->rw_owner = pthread_self();
  287 
  288                 return (1);
  289         }
  290 
  291         VERIFY3S(error, ==, EBUSY);
  292 
  293         return (0);
  294 }
  295 
  296 uint32_t
  297 zone_get_hostid(void *zonep)
  298 {
  299         /*
  300          * We're emulating the system's hostid in userland.
  301          */
  302         (void) zonep;
  303         return (hostid);
  304 }
  305 
  306 int
  307 rw_tryupgrade(krwlock_t *rwlp)
  308 {
  309         (void) rwlp;
  310         return (0);
  311 }
  312 
  313 /*
  314  * =========================================================================
  315  * condition variables
  316  * =========================================================================
  317  */
  318 
  319 void
  320 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
  321 {
  322         (void) name, (void) type, (void) arg;
  323         VERIFY0(pthread_cond_init(cv, NULL));
  324 }
  325 
  326 void
  327 cv_destroy(kcondvar_t *cv)
  328 {
  329         VERIFY0(pthread_cond_destroy(cv));
  330 }
  331 
  332 void
  333 cv_wait(kcondvar_t *cv, kmutex_t *mp)
  334 {
  335         memset(&mp->m_owner, 0, sizeof (pthread_t));
  336         VERIFY0(pthread_cond_wait(cv, &mp->m_lock));
  337         mp->m_owner = pthread_self();
  338 }
  339 
  340 int
  341 cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)
  342 {
  343         cv_wait(cv, mp);
  344         return (1);
  345 }
  346 
  347 int
  348 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
  349 {
  350         int error;
  351         struct timeval tv;
  352         struct timespec ts;
  353         clock_t delta;
  354 
  355         delta = abstime - ddi_get_lbolt();
  356         if (delta <= 0)
  357                 return (-1);
  358 
  359         VERIFY(gettimeofday(&tv, NULL) == 0);
  360 
  361         ts.tv_sec = tv.tv_sec + delta / hz;
  362         ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);
  363         if (ts.tv_nsec >= NANOSEC) {
  364                 ts.tv_sec++;
  365                 ts.tv_nsec -= NANOSEC;
  366         }
  367 
  368         memset(&mp->m_owner, 0, sizeof (pthread_t));
  369         error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
  370         mp->m_owner = pthread_self();
  371 
  372         if (error == ETIMEDOUT)
  373                 return (-1);
  374 
  375         VERIFY0(error);
  376 
  377         return (1);
  378 }
  379 
  380 int
  381 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
  382     int flag)
  383 {
  384         (void) res;
  385         int error;
  386         struct timeval tv;
  387         struct timespec ts;
  388         hrtime_t delta;
  389 
  390         ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
  391 
  392         delta = tim;
  393         if (flag & CALLOUT_FLAG_ABSOLUTE)
  394                 delta -= gethrtime();
  395 
  396         if (delta <= 0)
  397                 return (-1);
  398 
  399         VERIFY0(gettimeofday(&tv, NULL));
  400 
  401         ts.tv_sec = tv.tv_sec + delta / NANOSEC;
  402         ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);
  403         if (ts.tv_nsec >= NANOSEC) {
  404                 ts.tv_sec++;
  405                 ts.tv_nsec -= NANOSEC;
  406         }
  407 
  408         memset(&mp->m_owner, 0, sizeof (pthread_t));
  409         error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
  410         mp->m_owner = pthread_self();
  411 
  412         if (error == ETIMEDOUT)
  413                 return (-1);
  414 
  415         VERIFY0(error);
  416 
  417         return (1);
  418 }
  419 
  420 void
  421 cv_signal(kcondvar_t *cv)
  422 {
  423         VERIFY0(pthread_cond_signal(cv));
  424 }
  425 
  426 void
  427 cv_broadcast(kcondvar_t *cv)
  428 {
  429         VERIFY0(pthread_cond_broadcast(cv));
  430 }
  431 
  432 /*
  433  * =========================================================================
  434  * procfs list
  435  * =========================================================================
  436  */
  437 
  438 void
  439 seq_printf(struct seq_file *m, const char *fmt, ...)
  440 {
  441         (void) m, (void) fmt;
  442 }
  443 
  444 void
  445 procfs_list_install(const char *module,
  446     const char *submodule,
  447     const char *name,
  448     mode_t mode,
  449     procfs_list_t *procfs_list,
  450     int (*show)(struct seq_file *f, void *p),
  451     int (*show_header)(struct seq_file *f),
  452     int (*clear)(procfs_list_t *procfs_list),
  453     size_t procfs_list_node_off)
  454 {
  455         (void) module, (void) submodule, (void) name, (void) mode, (void) show,
  456             (void) show_header, (void) clear;
  457         mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
  458         list_create(&procfs_list->pl_list,
  459             procfs_list_node_off + sizeof (procfs_list_node_t),
  460             procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
  461         procfs_list->pl_next_id = 1;
  462         procfs_list->pl_node_offset = procfs_list_node_off;
  463 }
  464 
  465 void
  466 procfs_list_uninstall(procfs_list_t *procfs_list)
  467 {
  468         (void) procfs_list;
  469 }
  470 
  471 void
  472 procfs_list_destroy(procfs_list_t *procfs_list)
  473 {
  474         ASSERT(list_is_empty(&procfs_list->pl_list));
  475         list_destroy(&procfs_list->pl_list);
  476         mutex_destroy(&procfs_list->pl_lock);
  477 }
  478 
  479 #define NODE_ID(procfs_list, obj) \
  480                 (((procfs_list_node_t *)(((char *)obj) + \
  481                 (procfs_list)->pl_node_offset))->pln_id)
  482 
  483 void
  484 procfs_list_add(procfs_list_t *procfs_list, void *p)
  485 {
  486         ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
  487         NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
  488         list_insert_tail(&procfs_list->pl_list, p);
  489 }
  490 
  491 /*
  492  * =========================================================================
  493  * vnode operations
  494  * =========================================================================
  495  */
  496 
  497 /*
  498  * =========================================================================
  499  * Figure out which debugging statements to print
  500  * =========================================================================
  501  */
  502 
  503 static char *dprintf_string;
  504 static int dprintf_print_all;
  505 
  506 int
  507 dprintf_find_string(const char *string)
  508 {
  509         char *tmp_str = dprintf_string;
  510         int len = strlen(string);
  511 
  512         /*
  513          * Find out if this is a string we want to print.
  514          * String format: file1.c,function_name1,file2.c,file3.c
  515          */
  516 
  517         while (tmp_str != NULL) {
  518                 if (strncmp(tmp_str, string, len) == 0 &&
  519                     (tmp_str[len] == ',' || tmp_str[len] == '\0'))
  520                         return (1);
  521                 tmp_str = strchr(tmp_str, ',');
  522                 if (tmp_str != NULL)
  523                         tmp_str++; /* Get rid of , */
  524         }
  525         return (0);
  526 }
  527 
  528 void
  529 dprintf_setup(int *argc, char **argv)
  530 {
  531         int i, j;
  532 
  533         /*
  534          * Debugging can be specified two ways: by setting the
  535          * environment variable ZFS_DEBUG, or by including a
  536          * "debug=..."  argument on the command line.  The command
  537          * line setting overrides the environment variable.
  538          */
  539 
  540         for (i = 1; i < *argc; i++) {
  541                 int len = strlen("debug=");
  542                 /* First look for a command line argument */
  543                 if (strncmp("debug=", argv[i], len) == 0) {
  544                         dprintf_string = argv[i] + len;
  545                         /* Remove from args */
  546                         for (j = i; j < *argc; j++)
  547                                 argv[j] = argv[j+1];
  548                         argv[j] = NULL;
  549                         (*argc)--;
  550                 }
  551         }
  552 
  553         if (dprintf_string == NULL) {
  554                 /* Look for ZFS_DEBUG environment variable */
  555                 dprintf_string = getenv("ZFS_DEBUG");
  556         }
  557 
  558         /*
  559          * Are we just turning on all debugging?
  560          */
  561         if (dprintf_find_string("on"))
  562                 dprintf_print_all = 1;
  563 
  564         if (dprintf_string != NULL)
  565                 zfs_flags |= ZFS_DEBUG_DPRINTF;
  566 }
  567 
  568 /*
  569  * =========================================================================
  570  * debug printfs
  571  * =========================================================================
  572  */
  573 void
  574 __dprintf(boolean_t dprint, const char *file, const char *func,
  575     int line, const char *fmt, ...)
  576 {
  577         /* Get rid of annoying "../common/" prefix to filename. */
  578         const char *newfile = zfs_basename(file);
  579 
  580         va_list adx;
  581         if (dprint) {
  582                 /* dprintf messages are printed immediately */
  583 
  584                 if (!dprintf_print_all &&
  585                     !dprintf_find_string(newfile) &&
  586                     !dprintf_find_string(func))
  587                         return;
  588 
  589                 /* Print out just the function name if requested */
  590                 flockfile(stdout);
  591                 if (dprintf_find_string("pid"))
  592                         (void) printf("%d ", getpid());
  593                 if (dprintf_find_string("tid"))
  594                         (void) printf("%ju ",
  595                             (uintmax_t)(uintptr_t)pthread_self());
  596                 if (dprintf_find_string("cpu"))
  597                         (void) printf("%u ", getcpuid());
  598                 if (dprintf_find_string("time"))
  599                         (void) printf("%llu ", gethrtime());
  600                 if (dprintf_find_string("long"))
  601                         (void) printf("%s, line %d: ", newfile, line);
  602                 (void) printf("dprintf: %s: ", func);
  603                 va_start(adx, fmt);
  604                 (void) vprintf(fmt, adx);
  605                 va_end(adx);
  606                 funlockfile(stdout);
  607         } else {
  608                 /* zfs_dbgmsg is logged for dumping later */
  609                 size_t size;
  610                 char *buf;
  611                 int i;
  612 
  613                 size = 1024;
  614                 buf = umem_alloc(size, UMEM_NOFAIL);
  615                 i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
  616 
  617                 if (i < size) {
  618                         va_start(adx, fmt);
  619                         (void) vsnprintf(buf + i, size - i, fmt, adx);
  620                         va_end(adx);
  621                 }
  622 
  623                 __zfs_dbgmsg(buf);
  624 
  625                 umem_free(buf, size);
  626         }
  627 }
  628 
  629 /*
  630  * =========================================================================
  631  * cmn_err() and panic()
  632  * =========================================================================
  633  */
  634 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
  635 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
  636 
  637 __attribute__((noreturn)) void
  638 vpanic(const char *fmt, va_list adx)
  639 {
  640         (void) fprintf(stderr, "error: ");
  641         (void) vfprintf(stderr, fmt, adx);
  642         (void) fprintf(stderr, "\n");
  643 
  644         abort();        /* think of it as a "user-level crash dump" */
  645 }
  646 
  647 __attribute__((noreturn)) void
  648 panic(const char *fmt, ...)
  649 {
  650         va_list adx;
  651 
  652         va_start(adx, fmt);
  653         vpanic(fmt, adx);
  654         va_end(adx);
  655 }
  656 
  657 void
  658 vcmn_err(int ce, const char *fmt, va_list adx)
  659 {
  660         if (ce == CE_PANIC)
  661                 vpanic(fmt, adx);
  662         if (ce != CE_NOTE) {    /* suppress noise in userland stress testing */
  663                 (void) fprintf(stderr, "%s", ce_prefix[ce]);
  664                 (void) vfprintf(stderr, fmt, adx);
  665                 (void) fprintf(stderr, "%s", ce_suffix[ce]);
  666         }
  667 }
  668 
  669 void
  670 cmn_err(int ce, const char *fmt, ...)
  671 {
  672         va_list adx;
  673 
  674         va_start(adx, fmt);
  675         vcmn_err(ce, fmt, adx);
  676         va_end(adx);
  677 }
  678 
  679 /*
  680  * =========================================================================
  681  * misc routines
  682  * =========================================================================
  683  */
  684 
  685 void
  686 delay(clock_t ticks)
  687 {
  688         (void) poll(0, 0, ticks * (1000 / hz));
  689 }
  690 
  691 /*
  692  * Find highest one bit set.
  693  * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
  694  * The __builtin_clzll() function is supported by both GCC and Clang.
  695  */
  696 int
  697 highbit64(uint64_t i)
  698 {
  699         if (i == 0)
  700         return (0);
  701 
  702         return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
  703 }
  704 
  705 /*
  706  * Find lowest one bit set.
  707  * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
  708  * The __builtin_ffsll() function is supported by both GCC and Clang.
  709  */
  710 int
  711 lowbit64(uint64_t i)
  712 {
  713         if (i == 0)
  714                 return (0);
  715 
  716         return (__builtin_ffsll(i));
  717 }
  718 
  719 const char *random_path = "/dev/random";
  720 const char *urandom_path = "/dev/urandom";
  721 static int random_fd = -1, urandom_fd = -1;
  722 
  723 void
  724 random_init(void)
  725 {
  726         VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);
  727         VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);
  728 }
  729 
  730 void
  731 random_fini(void)
  732 {
  733         close(random_fd);
  734         close(urandom_fd);
  735 
  736         random_fd = -1;
  737         urandom_fd = -1;
  738 }
  739 
  740 static int
  741 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
  742 {
  743         size_t resid = len;
  744         ssize_t bytes;
  745 
  746         ASSERT(fd != -1);
  747 
  748         while (resid != 0) {
  749                 bytes = read(fd, ptr, resid);
  750                 ASSERT3S(bytes, >=, 0);
  751                 ptr += bytes;
  752                 resid -= bytes;
  753         }
  754 
  755         return (0);
  756 }
  757 
  758 int
  759 random_get_bytes(uint8_t *ptr, size_t len)
  760 {
  761         return (random_get_bytes_common(ptr, len, random_fd));
  762 }
  763 
  764 int
  765 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
  766 {
  767         return (random_get_bytes_common(ptr, len, urandom_fd));
  768 }
  769 
  770 int
  771 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
  772 {
  773         errno = 0;
  774         *result = strtoull(str, nptr, base);
  775         if (*result == 0)
  776                 return (errno);
  777         return (0);
  778 }
  779 
  780 utsname_t *
  781 utsname(void)
  782 {
  783         return (&hw_utsname);
  784 }
  785 
  786 /*
  787  * =========================================================================
  788  * kernel emulation setup & teardown
  789  * =========================================================================
  790  */
  791 static int
  792 umem_out_of_memory(void)
  793 {
  794         char errmsg[] = "out of memory -- generating core dump\n";
  795 
  796         (void) fprintf(stderr, "%s", errmsg);
  797         abort();
  798         return (0);
  799 }
  800 
  801 void
  802 kernel_init(int mode)
  803 {
  804         extern uint_t rrw_tsd_key;
  805 
  806         umem_nofail_callback(umem_out_of_memory);
  807 
  808         physmem = sysconf(_SC_PHYS_PAGES);
  809 
  810         dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,
  811             (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
  812 
  813         hostid = (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0;
  814 
  815         random_init();
  816 
  817         VERIFY0(uname(&hw_utsname));
  818 
  819         system_taskq_init();
  820         icp_init();
  821 
  822         zstd_init();
  823 
  824         spa_init((spa_mode_t)mode);
  825 
  826         fletcher_4_init();
  827 
  828         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
  829 }
  830 
  831 void
  832 kernel_fini(void)
  833 {
  834         fletcher_4_fini();
  835         spa_fini();
  836 
  837         zstd_fini();
  838 
  839         icp_fini();
  840         system_taskq_fini();
  841 
  842         random_fini();
  843 }
  844 
  845 uid_t
  846 crgetuid(cred_t *cr)
  847 {
  848         (void) cr;
  849         return (0);
  850 }
  851 
  852 uid_t
  853 crgetruid(cred_t *cr)
  854 {
  855         (void) cr;
  856         return (0);
  857 }
  858 
  859 gid_t
  860 crgetgid(cred_t *cr)
  861 {
  862         (void) cr;
  863         return (0);
  864 }
  865 
  866 int
  867 crgetngroups(cred_t *cr)
  868 {
  869         (void) cr;
  870         return (0);
  871 }
  872 
  873 gid_t *
  874 crgetgroups(cred_t *cr)
  875 {
  876         (void) cr;
  877         return (NULL);
  878 }
  879 
  880 int
  881 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
  882 {
  883         (void) name, (void) cr;
  884         return (0);
  885 }
  886 
  887 int
  888 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
  889 {
  890         (void) from, (void) to, (void) cr;
  891         return (0);
  892 }
  893 
  894 int
  895 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
  896 {
  897         (void) name, (void) cr;
  898         return (0);
  899 }
  900 
  901 int
  902 secpolicy_zfs(const cred_t *cr)
  903 {
  904         (void) cr;
  905         return (0);
  906 }
  907 
  908 int
  909 secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
  910 {
  911         (void) cr, (void) proc;
  912         return (0);
  913 }
  914 
  915 ksiddomain_t *
  916 ksid_lookupdomain(const char *dom)
  917 {
  918         ksiddomain_t *kd;
  919 
  920         kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
  921         kd->kd_name = spa_strdup(dom);
  922         return (kd);
  923 }
  924 
  925 void
  926 ksiddomain_rele(ksiddomain_t *ksid)
  927 {
  928         spa_strfree(ksid->kd_name);
  929         umem_free(ksid, sizeof (ksiddomain_t));
  930 }
  931 
  932 char *
  933 kmem_vasprintf(const char *fmt, va_list adx)
  934 {
  935         char *buf = NULL;
  936         va_list adx_copy;
  937 
  938         va_copy(adx_copy, adx);
  939         VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
  940         va_end(adx_copy);
  941 
  942         return (buf);
  943 }
  944 
  945 char *
  946 kmem_asprintf(const char *fmt, ...)
  947 {
  948         char *buf = NULL;
  949         va_list adx;
  950 
  951         va_start(adx, fmt);
  952         VERIFY(vasprintf(&buf, fmt, adx) != -1);
  953         va_end(adx);
  954 
  955         return (buf);
  956 }
  957 
  958 /*
  959  * kmem_scnprintf() will return the number of characters that it would have
  960  * printed whenever it is limited by value of the size variable, rather than
  961  * the number of characters that it did print. This can cause misbehavior on
  962  * subsequent uses of the return value, so we define a safe version that will
  963  * return the number of characters actually printed, minus the NULL format
  964  * character.  Subsequent use of this by the safe string functions is safe
  965  * whether it is snprintf(), strlcat() or strlcpy().
  966  */
  967 int
  968 kmem_scnprintf(char *restrict str, size_t size, const char *restrict fmt, ...)
  969 {
  970         int n;
  971         va_list ap;
  972 
  973         /* Make the 0 case a no-op so that we do not return -1 */
  974         if (size == 0)
  975                 return (0);
  976 
  977         va_start(ap, fmt);
  978         n = vsnprintf(str, size, fmt, ap);
  979         va_end(ap);
  980 
  981         if (n >= size)
  982                 n = size - 1;
  983 
  984         return (n);
  985 }
  986 
  987 zfs_file_t *
  988 zfs_onexit_fd_hold(int fd, minor_t *minorp)
  989 {
  990         (void) fd;
  991         *minorp = 0;
  992         return (NULL);
  993 }
  994 
  995 void
  996 zfs_onexit_fd_rele(zfs_file_t *fp)
  997 {
  998         (void) fp;
  999 }
 1000 
 1001 int
 1002 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
 1003     uintptr_t *action_handle)
 1004 {
 1005         (void) minor, (void) func, (void) data, (void) action_handle;
 1006         return (0);
 1007 }
 1008 
 1009 fstrans_cookie_t
 1010 spl_fstrans_mark(void)
 1011 {
 1012         return ((fstrans_cookie_t)0);
 1013 }
 1014 
 1015 void
 1016 spl_fstrans_unmark(fstrans_cookie_t cookie)
 1017 {
 1018         (void) cookie;
 1019 }
 1020 
 1021 int
 1022 __spl_pf_fstrans_check(void)
 1023 {
 1024         return (0);
 1025 }
 1026 
 1027 int
 1028 kmem_cache_reap_active(void)
 1029 {
 1030         return (0);
 1031 }
 1032 
 1033 void
 1034 zvol_create_minor(const char *name)
 1035 {
 1036         (void) name;
 1037 }
 1038 
 1039 void
 1040 zvol_create_minors_recursive(const char *name)
 1041 {
 1042         (void) name;
 1043 }
 1044 
 1045 void
 1046 zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
 1047 {
 1048         (void) spa, (void) name, (void) async;
 1049 }
 1050 
 1051 void
 1052 zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
 1053     boolean_t async)
 1054 {
 1055         (void) spa, (void) oldname, (void) newname, (void) async;
 1056 }
 1057 
 1058 /*
 1059  * Open file
 1060  *
 1061  * path - fully qualified path to file
 1062  * flags - file attributes O_READ / O_WRITE / O_EXCL
 1063  * fpp - pointer to return file pointer
 1064  *
 1065  * Returns 0 on success underlying error on failure.
 1066  */
 1067 int
 1068 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
 1069 {
 1070         int fd = -1;
 1071         int dump_fd = -1;
 1072         int err;
 1073         int old_umask = 0;
 1074         zfs_file_t *fp;
 1075         struct stat64 st;
 1076 
 1077         if (!(flags & O_CREAT) && stat64(path, &st) == -1)
 1078                 return (errno);
 1079 
 1080         if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
 1081                 flags |= O_DIRECT;
 1082 
 1083         if (flags & O_CREAT)
 1084                 old_umask = umask(0);
 1085 
 1086         fd = open64(path, flags, mode);
 1087         if (fd == -1)
 1088                 return (errno);
 1089 
 1090         if (flags & O_CREAT)
 1091                 (void) umask(old_umask);
 1092 
 1093         if (vn_dumpdir != NULL) {
 1094                 char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
 1095                 const char *inpath = zfs_basename(path);
 1096 
 1097                 (void) snprintf(dumppath, MAXPATHLEN,
 1098                     "%s/%s", vn_dumpdir, inpath);
 1099                 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
 1100                 umem_free(dumppath, MAXPATHLEN);
 1101                 if (dump_fd == -1) {
 1102                         err = errno;
 1103                         close(fd);
 1104                         return (err);
 1105                 }
 1106         } else {
 1107                 dump_fd = -1;
 1108         }
 1109 
 1110         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
 1111 
 1112         fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
 1113         fp->f_fd = fd;
 1114         fp->f_dump_fd = dump_fd;
 1115         *fpp = fp;
 1116 
 1117         return (0);
 1118 }
 1119 
 1120 void
 1121 zfs_file_close(zfs_file_t *fp)
 1122 {
 1123         close(fp->f_fd);
 1124         if (fp->f_dump_fd != -1)
 1125                 close(fp->f_dump_fd);
 1126 
 1127         umem_free(fp, sizeof (zfs_file_t));
 1128 }
 1129 
 1130 /*
 1131  * Stateful write - use os internal file pointer to determine where to
 1132  * write and update on successful completion.
 1133  *
 1134  * fp -  pointer to file (pipe, socket, etc) to write to
 1135  * buf - buffer to write
 1136  * count - # of bytes to write
 1137  * resid -  pointer to count of unwritten bytes  (if short write)
 1138  *
 1139  * Returns 0 on success errno on failure.
 1140  */
 1141 int
 1142 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
 1143 {
 1144         ssize_t rc;
 1145 
 1146         rc = write(fp->f_fd, buf, count);
 1147         if (rc < 0)
 1148                 return (errno);
 1149 
 1150         if (resid) {
 1151                 *resid = count - rc;
 1152         } else if (rc != count) {
 1153                 return (EIO);
 1154         }
 1155 
 1156         return (0);
 1157 }
 1158 
 1159 /*
 1160  * Stateless write - os internal file pointer is not updated.
 1161  *
 1162  * fp -  pointer to file (pipe, socket, etc) to write to
 1163  * buf - buffer to write
 1164  * count - # of bytes to write
 1165  * off - file offset to write to (only valid for seekable types)
 1166  * resid -  pointer to count of unwritten bytes
 1167  *
 1168  * Returns 0 on success errno on failure.
 1169  */
 1170 int
 1171 zfs_file_pwrite(zfs_file_t *fp, const void *buf,
 1172     size_t count, loff_t pos, ssize_t *resid)
 1173 {
 1174         ssize_t rc, split, done;
 1175         int sectors;
 1176 
 1177         /*
 1178          * To simulate partial disk writes, we split writes into two
 1179          * system calls so that the process can be killed in between.
 1180          * This is used by ztest to simulate realistic failure modes.
 1181          */
 1182         sectors = count >> SPA_MINBLOCKSHIFT;
 1183         split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
 1184         rc = pwrite64(fp->f_fd, buf, split, pos);
 1185         if (rc != -1) {
 1186                 done = rc;
 1187                 rc = pwrite64(fp->f_fd, (char *)buf + split,
 1188                     count - split, pos + split);
 1189         }
 1190 #ifdef __linux__
 1191         if (rc == -1 && errno == EINVAL) {
 1192                 /*
 1193                  * Under Linux, this most likely means an alignment issue
 1194                  * (memory or disk) due to O_DIRECT, so we abort() in order
 1195                  * to catch the offender.
 1196                  */
 1197                 abort();
 1198         }
 1199 #endif
 1200 
 1201         if (rc < 0)
 1202                 return (errno);
 1203 
 1204         done += rc;
 1205 
 1206         if (resid) {
 1207                 *resid = count - done;
 1208         } else if (done != count) {
 1209                 return (EIO);
 1210         }
 1211 
 1212         return (0);
 1213 }
 1214 
 1215 /*
 1216  * Stateful read - use os internal file pointer to determine where to
 1217  * read and update on successful completion.
 1218  *
 1219  * fp -  pointer to file (pipe, socket, etc) to read from
 1220  * buf - buffer to write
 1221  * count - # of bytes to read
 1222  * resid -  pointer to count of unread bytes (if short read)
 1223  *
 1224  * Returns 0 on success errno on failure.
 1225  */
 1226 int
 1227 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
 1228 {
 1229         int rc;
 1230 
 1231         rc = read(fp->f_fd, buf, count);
 1232         if (rc < 0)
 1233                 return (errno);
 1234 
 1235         if (resid) {
 1236                 *resid = count - rc;
 1237         } else if (rc != count) {
 1238                 return (EIO);
 1239         }
 1240 
 1241         return (0);
 1242 }
 1243 
 1244 /*
 1245  * Stateless read - os internal file pointer is not updated.
 1246  *
 1247  * fp -  pointer to file (pipe, socket, etc) to read from
 1248  * buf - buffer to write
 1249  * count - # of bytes to write
 1250  * off - file offset to read from (only valid for seekable types)
 1251  * resid -  pointer to count of unwritten bytes (if short write)
 1252  *
 1253  * Returns 0 on success errno on failure.
 1254  */
 1255 int
 1256 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
 1257     ssize_t *resid)
 1258 {
 1259         ssize_t rc;
 1260 
 1261         rc = pread64(fp->f_fd, buf, count, off);
 1262         if (rc < 0) {
 1263 #ifdef __linux__
 1264                 /*
 1265                  * Under Linux, this most likely means an alignment issue
 1266                  * (memory or disk) due to O_DIRECT, so we abort() in order to
 1267                  * catch the offender.
 1268                  */
 1269                 if (errno == EINVAL)
 1270                         abort();
 1271 #endif
 1272                 return (errno);
 1273         }
 1274 
 1275         if (fp->f_dump_fd != -1) {
 1276                 int status;
 1277 
 1278                 status = pwrite64(fp->f_dump_fd, buf, rc, off);
 1279                 ASSERT(status != -1);
 1280         }
 1281 
 1282         if (resid) {
 1283                 *resid = count - rc;
 1284         } else if (rc != count) {
 1285                 return (EIO);
 1286         }
 1287 
 1288         return (0);
 1289 }
 1290 
 1291 /*
 1292  * lseek - set / get file pointer
 1293  *
 1294  * fp -  pointer to file (pipe, socket, etc) to read from
 1295  * offp - value to seek to, returns current value plus passed offset
 1296  * whence - see man pages for standard lseek whence values
 1297  *
 1298  * Returns 0 on success errno on failure (ESPIPE for non seekable types)
 1299  */
 1300 int
 1301 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
 1302 {
 1303         loff_t rc;
 1304 
 1305         rc = lseek(fp->f_fd, *offp, whence);
 1306         if (rc < 0)
 1307                 return (errno);
 1308 
 1309         *offp = rc;
 1310 
 1311         return (0);
 1312 }
 1313 
 1314 /*
 1315  * Get file attributes
 1316  *
 1317  * filp - file pointer
 1318  * zfattr - pointer to file attr structure
 1319  *
 1320  * Currently only used for fetching size and file mode
 1321  *
 1322  * Returns 0 on success or error code of underlying getattr call on failure.
 1323  */
 1324 int
 1325 zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
 1326 {
 1327         struct stat64 st;
 1328 
 1329         if (fstat64_blk(fp->f_fd, &st) == -1)
 1330                 return (errno);
 1331 
 1332         zfattr->zfa_size = st.st_size;
 1333         zfattr->zfa_mode = st.st_mode;
 1334 
 1335         return (0);
 1336 }
 1337 
 1338 /*
 1339  * Sync file to disk
 1340  *
 1341  * filp - file pointer
 1342  * flags - O_SYNC and or O_DSYNC
 1343  *
 1344  * Returns 0 on success or error code of underlying sync call on failure.
 1345  */
 1346 int
 1347 zfs_file_fsync(zfs_file_t *fp, int flags)
 1348 {
 1349         (void) flags;
 1350 
 1351         if (fsync(fp->f_fd) < 0)
 1352                 return (errno);
 1353 
 1354         return (0);
 1355 }
 1356 
 1357 /*
 1358  * fallocate - allocate or free space on disk
 1359  *
 1360  * fp - file pointer
 1361  * mode (non-standard options for hole punching etc)
 1362  * offset - offset to start allocating or freeing from
 1363  * len - length to free / allocate
 1364  *
 1365  * OPTIONAL
 1366  */
 1367 int
 1368 zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
 1369 {
 1370 #ifdef __linux__
 1371         return (fallocate(fp->f_fd, mode, offset, len));
 1372 #else
 1373         (void) fp, (void) mode, (void) offset, (void) len;
 1374         return (EOPNOTSUPP);
 1375 #endif
 1376 }
 1377 
 1378 /*
 1379  * Request current file pointer offset
 1380  *
 1381  * fp - pointer to file
 1382  *
 1383  * Returns current file offset.
 1384  */
 1385 loff_t
 1386 zfs_file_off(zfs_file_t *fp)
 1387 {
 1388         return (lseek(fp->f_fd, SEEK_CUR, 0));
 1389 }
 1390 
 1391 /*
 1392  * unlink file
 1393  *
 1394  * path - fully qualified file path
 1395  *
 1396  * Returns 0 on success.
 1397  *
 1398  * OPTIONAL
 1399  */
 1400 int
 1401 zfs_file_unlink(const char *path)
 1402 {
 1403         return (remove(path));
 1404 }
 1405 
 1406 /*
 1407  * Get reference to file pointer
 1408  *
 1409  * fd - input file descriptor
 1410  *
 1411  * Returns pointer to file struct or NULL.
 1412  * Unsupported in user space.
 1413  */
 1414 zfs_file_t *
 1415 zfs_file_get(int fd)
 1416 {
 1417         (void) fd;
 1418         abort();
 1419         return (NULL);
 1420 }
 1421 /*
 1422  * Drop reference to file pointer
 1423  *
 1424  * fp - pointer to file struct
 1425  *
 1426  * Unsupported in user space.
 1427  */
 1428 void
 1429 zfs_file_put(zfs_file_t *fp)
 1430 {
 1431         abort();
 1432         (void) fp;
 1433 }
 1434 
 1435 void
 1436 zfsvfs_update_fromname(const char *oldname, const char *newname)
 1437 {
 1438         (void) oldname, (void) newname;
 1439 }
 1440 
 1441 void
 1442 spa_import_os(spa_t *spa)
 1443 {
 1444         (void) spa;
 1445 }
 1446 
 1447 void
 1448 spa_export_os(spa_t *spa)
 1449 {
 1450         (void) spa;
 1451 }
 1452 
 1453 void
 1454 spa_activate_os(spa_t *spa)
 1455 {
 1456         (void) spa;
 1457 }
 1458 
 1459 void
 1460 spa_deactivate_os(spa_t *spa)
 1461 {
 1462         (void) spa;
 1463 }

Cache object: d2115f44a00a3b94ddc05bc1e0fae091


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.