The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/os/linux/zfs/vdev_file.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
   24  */
   25 
   26 #include <sys/zfs_context.h>
   27 #include <sys/spa.h>
   28 #include <sys/spa_impl.h>
   29 #include <sys/vdev_file.h>
   30 #include <sys/vdev_impl.h>
   31 #include <sys/vdev_trim.h>
   32 #include <sys/zio.h>
   33 #include <sys/fs/zfs.h>
   34 #include <sys/fm/fs/zfs.h>
   35 #include <sys/abd.h>
   36 #include <sys/fcntl.h>
   37 #include <sys/vnode.h>
   38 #include <sys/zfs_file.h>
   39 #ifdef _KERNEL
   40 #include <linux/falloc.h>
   41 #endif
   42 /*
   43  * Virtual device vector for files.
   44  */
   45 
   46 static taskq_t *vdev_file_taskq;
   47 
   48 /*
   49  * By default, the logical/physical ashift for file vdevs is set to
   50  * SPA_MINBLOCKSHIFT (9). This allows all file vdevs to use 512B (1 << 9)
   51  * blocksizes. Users may opt to change one or both of these for testing
   52  * or performance reasons. Care should be taken as these values will
   53  * impact the vdev_ashift setting which can only be set at vdev creation
   54  * time.
   55  */
   56 static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
   57 static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
   58 
   59 static void
   60 vdev_file_hold(vdev_t *vd)
   61 {
   62         ASSERT(vd->vdev_path != NULL);
   63 }
   64 
   65 static void
   66 vdev_file_rele(vdev_t *vd)
   67 {
   68         ASSERT(vd->vdev_path != NULL);
   69 }
   70 
   71 static mode_t
   72 vdev_file_open_mode(spa_mode_t spa_mode)
   73 {
   74         mode_t mode = 0;
   75 
   76         if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
   77                 mode = O_RDWR;
   78         } else if (spa_mode & SPA_MODE_READ) {
   79                 mode = O_RDONLY;
   80         } else if (spa_mode & SPA_MODE_WRITE) {
   81                 mode = O_WRONLY;
   82         }
   83 
   84         return (mode | O_LARGEFILE);
   85 }
   86 
   87 static int
   88 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
   89     uint64_t *logical_ashift, uint64_t *physical_ashift)
   90 {
   91         vdev_file_t *vf;
   92         zfs_file_t *fp;
   93         zfs_file_attr_t zfa;
   94         int error;
   95 
   96         /*
   97          * Rotational optimizations only make sense on block devices.
   98          */
   99         vd->vdev_nonrot = B_TRUE;
  100 
  101         /*
  102          * Allow TRIM on file based vdevs.  This may not always be supported,
  103          * since it depends on your kernel version and underlying filesystem
  104          * type but it is always safe to attempt.
  105          */
  106         vd->vdev_has_trim = B_TRUE;
  107 
  108         /*
  109          * Disable secure TRIM on file based vdevs.  There is no way to
  110          * request this behavior from the underlying filesystem.
  111          */
  112         vd->vdev_has_securetrim = B_FALSE;
  113 
  114         /*
  115          * We must have a pathname, and it must be absolute.
  116          */
  117         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
  118                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
  119                 return (SET_ERROR(EINVAL));
  120         }
  121 
  122         /*
  123          * Reopen the device if it's not currently open.  Otherwise,
  124          * just update the physical size of the device.
  125          */
  126         if (vd->vdev_tsd != NULL) {
  127                 ASSERT(vd->vdev_reopening);
  128                 vf = vd->vdev_tsd;
  129                 goto skip_open;
  130         }
  131 
  132         vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
  133 
  134         /*
  135          * We always open the files from the root of the global zone, even if
  136          * we're in a local zone.  If the user has gotten to this point, the
  137          * administrator has already decided that the pool should be available
  138          * to local zone users, so the underlying devices should be as well.
  139          */
  140         ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
  141 
  142         error = zfs_file_open(vd->vdev_path,
  143             vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
  144         if (error) {
  145                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
  146                 return (error);
  147         }
  148 
  149         vf->vf_file = fp;
  150 
  151 #ifdef _KERNEL
  152         /*
  153          * Make sure it's a regular file.
  154          */
  155         if (zfs_file_getattr(fp, &zfa)) {
  156                 return (SET_ERROR(ENODEV));
  157         }
  158         if (!S_ISREG(zfa.zfa_mode)) {
  159                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
  160                 return (SET_ERROR(ENODEV));
  161         }
  162 #endif
  163 
  164 skip_open:
  165 
  166         error =  zfs_file_getattr(vf->vf_file, &zfa);
  167         if (error) {
  168                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
  169                 return (error);
  170         }
  171 
  172         *max_psize = *psize = zfa.zfa_size;
  173         *logical_ashift = vdev_file_logical_ashift;
  174         *physical_ashift = vdev_file_physical_ashift;
  175 
  176         return (0);
  177 }
  178 
  179 static void
  180 vdev_file_close(vdev_t *vd)
  181 {
  182         vdev_file_t *vf = vd->vdev_tsd;
  183 
  184         if (vd->vdev_reopening || vf == NULL)
  185                 return;
  186 
  187         if (vf->vf_file != NULL) {
  188                 (void) zfs_file_close(vf->vf_file);
  189         }
  190 
  191         vd->vdev_delayed_close = B_FALSE;
  192         kmem_free(vf, sizeof (vdev_file_t));
  193         vd->vdev_tsd = NULL;
  194 }
  195 
  196 static void
  197 vdev_file_io_strategy(void *arg)
  198 {
  199         zio_t *zio = (zio_t *)arg;
  200         vdev_t *vd = zio->io_vd;
  201         vdev_file_t *vf = vd->vdev_tsd;
  202         ssize_t resid;
  203         void *buf;
  204         loff_t off;
  205         ssize_t size;
  206         int err;
  207 
  208         off = zio->io_offset;
  209         size = zio->io_size;
  210         resid = 0;
  211 
  212         if (zio->io_type == ZIO_TYPE_READ) {
  213                 buf = abd_borrow_buf(zio->io_abd, zio->io_size);
  214                 err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
  215                 abd_return_buf_copy(zio->io_abd, buf, size);
  216         } else {
  217                 buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
  218                 err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
  219                 abd_return_buf(zio->io_abd, buf, size);
  220         }
  221         zio->io_error = err;
  222         if (resid != 0 && zio->io_error == 0)
  223                 zio->io_error = SET_ERROR(ENOSPC);
  224 
  225         zio_delay_interrupt(zio);
  226 }
  227 
  228 static void
  229 vdev_file_io_fsync(void *arg)
  230 {
  231         zio_t *zio = (zio_t *)arg;
  232         vdev_file_t *vf = zio->io_vd->vdev_tsd;
  233 
  234         zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);
  235 
  236         zio_interrupt(zio);
  237 }
  238 
  239 static void
  240 vdev_file_io_start(zio_t *zio)
  241 {
  242         vdev_t *vd = zio->io_vd;
  243         vdev_file_t *vf = vd->vdev_tsd;
  244 
  245         if (zio->io_type == ZIO_TYPE_IOCTL) {
  246                 /* XXPOLICY */
  247                 if (!vdev_readable(vd)) {
  248                         zio->io_error = SET_ERROR(ENXIO);
  249                         zio_interrupt(zio);
  250                         return;
  251                 }
  252 
  253                 switch (zio->io_cmd) {
  254                 case DKIOCFLUSHWRITECACHE:
  255 
  256                         if (zfs_nocacheflush)
  257                                 break;
  258 
  259                         /*
  260                          * We cannot safely call vfs_fsync() when PF_FSTRANS
  261                          * is set in the current context.  Filesystems like
  262                          * XFS include sanity checks to verify it is not
  263                          * already set, see xfs_vm_writepage().  Therefore
  264                          * the sync must be dispatched to a different context.
  265                          */
  266                         if (__spl_pf_fstrans_check()) {
  267                                 VERIFY3U(taskq_dispatch(vdev_file_taskq,
  268                                     vdev_file_io_fsync, zio, TQ_SLEEP), !=,
  269                                     TASKQID_INVALID);
  270                                 return;
  271                         }
  272 
  273                         zio->io_error = zfs_file_fsync(vf->vf_file,
  274                             O_SYNC | O_DSYNC);
  275                         break;
  276                 default:
  277                         zio->io_error = SET_ERROR(ENOTSUP);
  278                 }
  279 
  280                 zio_execute(zio);
  281                 return;
  282         } else if (zio->io_type == ZIO_TYPE_TRIM) {
  283                 int mode = 0;
  284 
  285                 ASSERT3U(zio->io_size, !=, 0);
  286 #ifdef __linux__
  287                 mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
  288 #endif
  289                 zio->io_error = zfs_file_fallocate(vf->vf_file,
  290                     mode, zio->io_offset, zio->io_size);
  291                 zio_execute(zio);
  292                 return;
  293         }
  294 
  295         zio->io_target_timestamp = zio_handle_io_delay(zio);
  296 
  297         VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
  298             TQ_SLEEP), !=, TASKQID_INVALID);
  299 }
  300 
  301 static void
  302 vdev_file_io_done(zio_t *zio)
  303 {
  304         (void) zio;
  305 }
  306 
  307 vdev_ops_t vdev_file_ops = {
  308         .vdev_op_init = NULL,
  309         .vdev_op_fini = NULL,
  310         .vdev_op_open = vdev_file_open,
  311         .vdev_op_close = vdev_file_close,
  312         .vdev_op_asize = vdev_default_asize,
  313         .vdev_op_min_asize = vdev_default_min_asize,
  314         .vdev_op_min_alloc = NULL,
  315         .vdev_op_io_start = vdev_file_io_start,
  316         .vdev_op_io_done = vdev_file_io_done,
  317         .vdev_op_state_change = NULL,
  318         .vdev_op_need_resilver = NULL,
  319         .vdev_op_hold = vdev_file_hold,
  320         .vdev_op_rele = vdev_file_rele,
  321         .vdev_op_remap = NULL,
  322         .vdev_op_xlate = vdev_default_xlate,
  323         .vdev_op_rebuild_asize = NULL,
  324         .vdev_op_metaslab_init = NULL,
  325         .vdev_op_config_generate = NULL,
  326         .vdev_op_nparity = NULL,
  327         .vdev_op_ndisks = NULL,
  328         .vdev_op_type = VDEV_TYPE_FILE,         /* name of this vdev type */
  329         .vdev_op_leaf = B_TRUE                  /* leaf vdev */
  330 };
  331 
  332 void
  333 vdev_file_init(void)
  334 {
  335         vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
  336             minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);
  337 
  338         VERIFY(vdev_file_taskq);
  339 }
  340 
  341 void
  342 vdev_file_fini(void)
  343 {
  344         taskq_destroy(vdev_file_taskq);
  345 }
  346 
  347 /*
  348  * From userland we access disks just like files.
  349  */
  350 #ifndef _KERNEL
  351 
  352 vdev_ops_t vdev_disk_ops = {
  353         .vdev_op_init = NULL,
  354         .vdev_op_fini = NULL,
  355         .vdev_op_open = vdev_file_open,
  356         .vdev_op_close = vdev_file_close,
  357         .vdev_op_asize = vdev_default_asize,
  358         .vdev_op_min_asize = vdev_default_min_asize,
  359         .vdev_op_min_alloc = NULL,
  360         .vdev_op_io_start = vdev_file_io_start,
  361         .vdev_op_io_done = vdev_file_io_done,
  362         .vdev_op_state_change = NULL,
  363         .vdev_op_need_resilver = NULL,
  364         .vdev_op_hold = vdev_file_hold,
  365         .vdev_op_rele = vdev_file_rele,
  366         .vdev_op_remap = NULL,
  367         .vdev_op_xlate = vdev_default_xlate,
  368         .vdev_op_rebuild_asize = NULL,
  369         .vdev_op_metaslab_init = NULL,
  370         .vdev_op_config_generate = NULL,
  371         .vdev_op_nparity = NULL,
  372         .vdev_op_ndisks = NULL,
  373         .vdev_op_type = VDEV_TYPE_DISK,         /* name of this vdev type */
  374         .vdev_op_leaf = B_TRUE                  /* leaf vdev */
  375 };
  376 
  377 #endif
  378 
  379 ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, UINT, ZMOD_RW,
  380         "Logical ashift for file-based devices");
  381 ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, UINT, ZMOD_RW,
  382         "Physical ashift for file-based devices");

Cache object: 3a36a87f70e0051e525d41e54d3e1a5b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.