The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
   24  */
   25 
   26 #include <sys/zfs_context.h>
   27 #include <sys/spa.h>
   28 #include <sys/file.h>
   29 #include <sys/vdev_file.h>
   30 #include <sys/vdev_impl.h>
   31 #include <sys/zio.h>
   32 #include <sys/fs/zfs.h>
   33 #include <sys/fm/fs/zfs.h>
   34 #include <sys/abd.h>
   35 #include <sys/stat.h>
   36 
   37 /*
   38  * Virtual device vector for files.
   39  */
   40 
   41 static taskq_t *vdev_file_taskq;
   42 
   43 static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
   44 static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
   45 
   46 void
   47 vdev_file_init(void)
   48 {
   49         vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16),
   50             minclsyspri, max_ncpus, INT_MAX, 0);
   51 }
   52 
   53 void
   54 vdev_file_fini(void)
   55 {
   56         taskq_destroy(vdev_file_taskq);
   57 }
   58 
   59 static void
   60 vdev_file_hold(vdev_t *vd)
   61 {
   62         ASSERT3P(vd->vdev_path, !=, NULL);
   63 }
   64 
   65 static void
   66 vdev_file_rele(vdev_t *vd)
   67 {
   68         ASSERT3P(vd->vdev_path, !=, NULL);
   69 }
   70 
   71 static mode_t
   72 vdev_file_open_mode(spa_mode_t spa_mode)
   73 {
   74         mode_t mode = 0;
   75 
   76         if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
   77                 mode = O_RDWR;
   78         } else if (spa_mode & SPA_MODE_READ) {
   79                 mode = O_RDONLY;
   80         } else if (spa_mode & SPA_MODE_WRITE) {
   81                 mode = O_WRONLY;
   82         }
   83 
   84         return (mode | O_LARGEFILE);
   85 }
   86 
   87 static int
   88 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
   89     uint64_t *logical_ashift, uint64_t *physical_ashift)
   90 {
   91         vdev_file_t *vf;
   92         zfs_file_t *fp;
   93         zfs_file_attr_t zfa;
   94         int error;
   95 
   96         /*
   97          * Rotational optimizations only make sense on block devices.
   98          */
   99         vd->vdev_nonrot = B_TRUE;
  100 
  101         /*
  102          * Allow TRIM on file based vdevs.  This may not always be supported,
  103          * since it depends on your kernel version and underlying filesystem
  104          * type but it is always safe to attempt.
  105          */
  106         vd->vdev_has_trim = B_TRUE;
  107 
  108         /*
  109          * Disable secure TRIM on file based vdevs.  There is no way to
  110          * request this behavior from the underlying filesystem.
  111          */
  112         vd->vdev_has_securetrim = B_FALSE;
  113 
  114         /*
  115          * We must have a pathname, and it must be absolute.
  116          */
  117         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
  118                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
  119                 return (SET_ERROR(EINVAL));
  120         }
  121 
  122         /*
  123          * Reopen the device if it's not currently open.  Otherwise,
  124          * just update the physical size of the device.
  125          */
  126         if (vd->vdev_tsd != NULL) {
  127                 ASSERT(vd->vdev_reopening);
  128                 vf = vd->vdev_tsd;
  129                 goto skip_open;
  130         }
  131 
  132         vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
  133 
  134         /*
  135          * We always open the files from the root of the global zone, even if
  136          * we're in a local zone.  If the user has gotten to this point, the
  137          * administrator has already decided that the pool should be available
  138          * to local zone users, so the underlying devices should be as well.
  139          */
  140         ASSERT3P(vd->vdev_path, !=, NULL);
  141         ASSERT(vd->vdev_path[0] == '/');
  142 
  143         error = zfs_file_open(vd->vdev_path,
  144             vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
  145         if (error) {
  146                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
  147                 return (error);
  148         }
  149 
  150         vf->vf_file = fp;
  151 
  152 #ifdef _KERNEL
  153         /*
  154          * Make sure it's a regular file.
  155          */
  156         if (zfs_file_getattr(fp, &zfa)) {
  157                 return (SET_ERROR(ENODEV));
  158         }
  159         if (!S_ISREG(zfa.zfa_mode)) {
  160                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
  161                 return (SET_ERROR(ENODEV));
  162         }
  163 #endif
  164 
  165 skip_open:
  166 
  167         error =  zfs_file_getattr(vf->vf_file, &zfa);
  168         if (error) {
  169                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
  170                 return (error);
  171         }
  172 
  173         *max_psize = *psize = zfa.zfa_size;
  174         *logical_ashift = vdev_file_logical_ashift;
  175         *physical_ashift = vdev_file_physical_ashift;
  176 
  177         return (0);
  178 }
  179 
  180 static void
  181 vdev_file_close(vdev_t *vd)
  182 {
  183         vdev_file_t *vf = vd->vdev_tsd;
  184 
  185         if (vd->vdev_reopening || vf == NULL)
  186                 return;
  187 
  188         if (vf->vf_file != NULL) {
  189                 zfs_file_close(vf->vf_file);
  190         }
  191 
  192         vd->vdev_delayed_close = B_FALSE;
  193         kmem_free(vf, sizeof (vdev_file_t));
  194         vd->vdev_tsd = NULL;
  195 }
  196 
  197 /*
  198  * Implements the interrupt side for file vdev types. This routine will be
  199  * called when the I/O completes allowing us to transfer the I/O to the
  200  * interrupt taskqs. For consistency, the code structure mimics disk vdev
  201  * types.
  202  */
  203 static void
  204 vdev_file_io_intr(zio_t *zio)
  205 {
  206         zio_delay_interrupt(zio);
  207 }
  208 
  209 static void
  210 vdev_file_io_strategy(void *arg)
  211 {
  212         zio_t *zio = arg;
  213         vdev_t *vd = zio->io_vd;
  214         vdev_file_t *vf;
  215         void *buf;
  216         ssize_t resid;
  217         loff_t off;
  218         ssize_t size;
  219         int err;
  220 
  221         off = zio->io_offset;
  222         size = zio->io_size;
  223         resid = 0;
  224 
  225         vf = vd->vdev_tsd;
  226 
  227         ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
  228         if (zio->io_type == ZIO_TYPE_READ) {
  229                 buf = abd_borrow_buf(zio->io_abd, zio->io_size);
  230                 err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
  231                 abd_return_buf_copy(zio->io_abd, buf, size);
  232         } else {
  233                 buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
  234                 err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
  235                 abd_return_buf(zio->io_abd, buf, size);
  236         }
  237         zio->io_error = err;
  238         if (resid != 0 && zio->io_error == 0)
  239                 zio->io_error = ENOSPC;
  240 
  241         vdev_file_io_intr(zio);
  242 }
  243 
  244 static void
  245 vdev_file_io_start(zio_t *zio)
  246 {
  247         vdev_t *vd = zio->io_vd;
  248         vdev_file_t *vf = vd->vdev_tsd;
  249 
  250         if (zio->io_type == ZIO_TYPE_IOCTL) {
  251                 /* XXPOLICY */
  252                 if (!vdev_readable(vd)) {
  253                         zio->io_error = SET_ERROR(ENXIO);
  254                         zio_interrupt(zio);
  255                         return;
  256                 }
  257 
  258                 switch (zio->io_cmd) {
  259                 case DKIOCFLUSHWRITECACHE:
  260                         zio->io_error = zfs_file_fsync(vf->vf_file,
  261                             O_SYNC|O_DSYNC);
  262                         break;
  263                 default:
  264                         zio->io_error = SET_ERROR(ENOTSUP);
  265                 }
  266 
  267                 zio_execute(zio);
  268                 return;
  269         } else if (zio->io_type == ZIO_TYPE_TRIM) {
  270 #ifdef notyet
  271                 int mode = 0;
  272 
  273                 ASSERT3U(zio->io_size, !=, 0);
  274 
  275                 /* XXX FreeBSD has no fallocate routine in file ops */
  276                 zio->io_error = zfs_file_fallocate(vf->vf_file,
  277                     mode, zio->io_offset, zio->io_size);
  278 #endif
  279                 zio->io_error = SET_ERROR(ENOTSUP);
  280                 zio_execute(zio);
  281                 return;
  282         }
  283         ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
  284         zio->io_target_timestamp = zio_handle_io_delay(zio);
  285 
  286         VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
  287             TQ_SLEEP), !=, 0);
  288 }
  289 
  290 static void
  291 vdev_file_io_done(zio_t *zio)
  292 {
  293         (void) zio;
  294 }
  295 
  296 vdev_ops_t vdev_file_ops = {
  297         .vdev_op_init = NULL,
  298         .vdev_op_fini = NULL,
  299         .vdev_op_open = vdev_file_open,
  300         .vdev_op_close = vdev_file_close,
  301         .vdev_op_asize = vdev_default_asize,
  302         .vdev_op_min_asize = vdev_default_min_asize,
  303         .vdev_op_min_alloc = NULL,
  304         .vdev_op_io_start = vdev_file_io_start,
  305         .vdev_op_io_done = vdev_file_io_done,
  306         .vdev_op_state_change = NULL,
  307         .vdev_op_need_resilver = NULL,
  308         .vdev_op_hold = vdev_file_hold,
  309         .vdev_op_rele = vdev_file_rele,
  310         .vdev_op_remap = NULL,
  311         .vdev_op_xlate = vdev_default_xlate,
  312         .vdev_op_rebuild_asize = NULL,
  313         .vdev_op_metaslab_init = NULL,
  314         .vdev_op_config_generate = NULL,
  315         .vdev_op_nparity = NULL,
  316         .vdev_op_ndisks = NULL,
  317         .vdev_op_type = VDEV_TYPE_FILE,         /* name of this vdev type */
  318         .vdev_op_leaf = B_TRUE                  /* leaf vdev */
  319 };
  320 
  321 /*
  322  * From userland we access disks just like files.
  323  */
  324 #ifndef _KERNEL
  325 
  326 vdev_ops_t vdev_disk_ops = {
  327         .vdev_op_init = NULL,
  328         .vdev_op_fini = NULL,
  329         .vdev_op_open = vdev_file_open,
  330         .vdev_op_close = vdev_file_close,
  331         .vdev_op_asize = vdev_default_asize,
  332         .vdev_op_min_asize = vdev_default_min_asize,
  333         .vdev_op_min_alloc = NULL,
  334         .vdev_op_io_start = vdev_file_io_start,
  335         .vdev_op_io_done = vdev_file_io_done,
  336         .vdev_op_state_change = NULL,
  337         .vdev_op_need_resilver = NULL,
  338         .vdev_op_hold = vdev_file_hold,
  339         .vdev_op_rele = vdev_file_rele,
  340         .vdev_op_remap = NULL,
  341         .vdev_op_xlate = vdev_default_xlate,
  342         .vdev_op_rebuild_asize = NULL,
  343         .vdev_op_metaslab_init = NULL,
  344         .vdev_op_config_generate = NULL,
  345         .vdev_op_nparity = NULL,
  346         .vdev_op_ndisks = NULL,
  347         .vdev_op_type = VDEV_TYPE_DISK,         /* name of this vdev type */
  348         .vdev_op_leaf = B_TRUE                  /* leaf vdev */
  349 };
  350 
  351 #endif
  352 
  353 ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, UINT, ZMOD_RW,
  354         "Logical ashift for file-based devices");
  355 ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, UINT, ZMOD_RW,
  356         "Physical ashift for file-based devices");

Cache object: 62613e3c8c6156a0917dd9330c7bab0b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.