The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or http://www.opensolaris.org/os/licensing.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
   23  * Use is subject to license terms.
   24  */
   25 
   26 #pragma ident   "%Z%%M% %I%     %E% SMI"
   27 
   28 #include <sys/types.h>
   29 #include <sys/param.h>
   30 #include <sys/time.h>
   31 #include <sys/systm.h>
   32 #include <sys/sysmacros.h>
   33 #include <sys/resource.h>
   34 #include <sys/vfs.h>
   35 #include <sys/vnode.h>
   36 #include <sys/file.h>
   37 #include <sys/kmem.h>
   38 #include <sys/uio.h>
   39 #include <sys/cmn_err.h>
   40 #include <sys/errno.h>
   41 #include <sys/stat.h>
   42 #include <sys/unistd.h>
   43 #include <sys/random.h>
   44 #include <sys/policy.h>
   45 #include <sys/kcondvar.h>
   46 #include <sys/callb.h>
   47 #include <sys/smp.h>
   48 #include <sys/zfs_dir.h>
   49 #include <sys/zfs_acl.h>
   50 #include <sys/fs/zfs.h>
   51 #include <sys/zap.h>
   52 #include <sys/dmu.h>
   53 #include <sys/atomic.h>
   54 #include <sys/zfs_ctldir.h>
   55 #include <sys/dnlc.h>
   56 
   57 /*
   58  * Lock a directory entry.  A dirlock on <dzp, name> protects that name
   59  * in dzp's directory zap object.  As long as you hold a dirlock, you can
   60  * assume two things: (1) dzp cannot be reaped, and (2) no other thread
   61  * can change the zap entry for (i.e. link or unlink) this name.
   62  *
   63  * Input arguments:
   64  *      dzp     - znode for directory
   65  *      name    - name of entry to lock
   66  *      flag    - ZNEW: if the entry already exists, fail with EEXIST.
   67  *                ZEXISTS: if the entry does not exist, fail with ENOENT.
   68  *                ZSHARED: allow concurrent access with other ZSHARED callers.
   69  *                ZXATTR: we want dzp's xattr directory
   70  *
   71  * Output arguments:
   72  *      zpp     - pointer to the znode for the entry (NULL if there isn't one)
   73  *      dlpp    - pointer to the dirlock for this entry (NULL on error)
   74  *
   75  * Return value: 0 on success or errno on failure.
   76  *
   77  * NOTE: Always checks for, and rejects, '.' and '..'.
   78  */
   79 int
   80 zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
   81         int flag)
   82 {
   83         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
   84         zfs_dirlock_t   *dl;
   85         uint64_t        zoid;
   86         int             error;
   87         vnode_t         *vp;
   88 
   89         *zpp = NULL;
   90         *dlpp = NULL;
   91 
   92         /*
   93          * Verify that we are not trying to lock '.', '..', or '.zfs'
   94          */
   95         if (name[0] == '.' &&
   96             (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
   97             zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
   98                 return (EEXIST);
   99 
  100         /*
  101          * Wait until there are no locks on this name.
  102          */
  103         rw_enter(&dzp->z_name_lock, RW_READER);
  104         mutex_enter(&dzp->z_lock);
  105         for (;;) {
  106                 if (dzp->z_unlinked) {
  107                         mutex_exit(&dzp->z_lock);
  108                         rw_exit(&dzp->z_name_lock);
  109                         return (ENOENT);
  110                 }
  111                 for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next)
  112                         if (strcmp(name, dl->dl_name) == 0)
  113                                 break;
  114                 if (dl == NULL) {
  115                         /*
  116                          * Allocate a new dirlock and add it to the list.
  117                          */
  118                         dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
  119                         cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
  120                         dl->dl_name = name;
  121                         dl->dl_sharecnt = 0;
  122                         dl->dl_namesize = 0;
  123                         dl->dl_dzp = dzp;
  124                         dl->dl_next = dzp->z_dirlocks;
  125                         dzp->z_dirlocks = dl;
  126                         break;
  127                 }
  128                 if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
  129                         break;
  130                 cv_wait(&dl->dl_cv, &dzp->z_lock);
  131         }
  132 
  133         if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
  134                 /*
  135                  * We're the second shared reference to dl.  Make a copy of
  136                  * dl_name in case the first thread goes away before we do.
  137                  * Note that we initialize the new name before storing its
  138                  * pointer into dl_name, because the first thread may load
  139                  * dl->dl_name at any time.  He'll either see the old value,
  140                  * which is his, or the new shared copy; either is OK.
  141                  */
  142                 dl->dl_namesize = strlen(dl->dl_name) + 1;
  143                 name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
  144                 bcopy(dl->dl_name, name, dl->dl_namesize);
  145                 dl->dl_name = name;
  146         }
  147 
  148         mutex_exit(&dzp->z_lock);
  149 
  150         /*
  151          * We have a dirlock on the name.  (Note that it is the dirlock,
  152          * not the dzp's z_lock, that protects the name in the zap object.)
  153          * See if there's an object by this name; if so, put a hold on it.
  154          */
  155         if (flag & ZXATTR) {
  156                 zoid = dzp->z_phys->zp_xattr;
  157                 error = (zoid == 0 ? ENOENT : 0);
  158         } else {
  159                 vp = dnlc_lookup(ZTOV(dzp), name);
  160                 if (vp == DNLC_NO_VNODE) {
  161                         VN_RELE(vp);
  162                         error = ENOENT;
  163                 } else if (vp) {
  164                         if (flag & ZNEW) {
  165                                 zfs_dirent_unlock(dl);
  166                                 VN_RELE(vp);
  167                                 return (EEXIST);
  168                         }
  169                         *dlpp = dl;
  170                         *zpp = VTOZ(vp);
  171                         return (0);
  172                 } else {
  173                         error = zap_lookup(zfsvfs->z_os, dzp->z_id, name,
  174                             8, 1, &zoid);
  175                         zoid = ZFS_DIRENT_OBJ(zoid);
  176                         if (error == ENOENT)
  177                                 dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
  178                 }
  179         }
  180         if (error) {
  181                 if (error != ENOENT || (flag & ZEXISTS)) {
  182                         zfs_dirent_unlock(dl);
  183                         return (error);
  184                 }
  185         } else {
  186                 if (flag & ZNEW) {
  187                         zfs_dirent_unlock(dl);
  188                         return (EEXIST);
  189                 }
  190                 error = zfs_zget(zfsvfs, zoid, zpp);
  191                 if (error) {
  192                         zfs_dirent_unlock(dl);
  193                         return (error);
  194                 }
  195                 if (!(flag & ZXATTR))
  196                         dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
  197         }
  198 
  199         *dlpp = dl;
  200 
  201         return (0);
  202 }
  203 
  204 /*
  205  * Unlock this directory entry and wake anyone who was waiting for it.
  206  */
  207 void
  208 zfs_dirent_unlock(zfs_dirlock_t *dl)
  209 {
  210         znode_t *dzp = dl->dl_dzp;
  211         zfs_dirlock_t **prev_dl, *cur_dl;
  212 
  213         mutex_enter(&dzp->z_lock);
  214         rw_exit(&dzp->z_name_lock);
  215         if (dl->dl_sharecnt > 1) {
  216                 dl->dl_sharecnt--;
  217                 mutex_exit(&dzp->z_lock);
  218                 return;
  219         }
  220         prev_dl = &dzp->z_dirlocks;
  221         while ((cur_dl = *prev_dl) != dl)
  222                 prev_dl = &cur_dl->dl_next;
  223         *prev_dl = dl->dl_next;
  224         cv_broadcast(&dl->dl_cv);
  225         mutex_exit(&dzp->z_lock);
  226 
  227         if (dl->dl_namesize != 0)
  228                 kmem_free(dl->dl_name, dl->dl_namesize);
  229         cv_destroy(&dl->dl_cv);
  230         kmem_free(dl, sizeof (*dl));
  231 }
  232 
  233 /*
  234  * Look up an entry in a directory.
  235  *
  236  * NOTE: '.' and '..' are handled as special cases because
  237  *      no directory entries are actually stored for them.  If this is
  238  *      the root of a filesystem, then '.zfs' is also treated as a
  239  *      special pseudo-directory.
  240  */
  241 int
  242 zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp)
  243 {
  244         zfs_dirlock_t *dl;
  245         znode_t *zp;
  246         int error = 0;
  247 
  248         if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
  249                 *vpp = ZTOV(dzp);
  250                 VN_HOLD(*vpp);
  251         } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
  252                 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
  253                 /*
  254                  * If we are a snapshot mounted under .zfs, return
  255                  * the vp for the snapshot directory.
  256                  */
  257                 if (dzp->z_phys->zp_parent == dzp->z_id &&
  258                     zfsvfs->z_parent != zfsvfs) {
  259                         error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
  260                             "snapshot", vpp, NULL, 0, NULL, kcred);
  261                         return (error);
  262                 }
  263                 rw_enter(&dzp->z_parent_lock, RW_READER);
  264                 error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp);
  265                 if (error == 0)
  266                         *vpp = ZTOV(zp);
  267                 rw_exit(&dzp->z_parent_lock);
  268         } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
  269                 *vpp = zfsctl_root(dzp);
  270         } else {
  271                 error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS | ZSHARED);
  272                 if (error == 0) {
  273                         *vpp = ZTOV(zp);
  274                         zfs_dirent_unlock(dl);
  275                         dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
  276                 }
  277         }
  278 
  279         return (error);
  280 }
  281 
  282 static char *
  283 zfs_unlinked_hexname(char namebuf[17], uint64_t x)
  284 {
  285         char *name = &namebuf[16];
  286         const char digits[16] = "0123456789abcdef";
  287 
  288         *name = '\0';
  289         do {
  290                 *--name = digits[x & 0xf];
  291                 x >>= 4;
  292         } while (x != 0);
  293 
  294         return (name);
  295 }
  296 
  297 /*
  298  * unlinked Set (formerly known as the "delete queue") Error Handling
  299  *
  300  * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
  301  * don't specify the name of the entry that we will be manipulating.  We
  302  * also fib and say that we won't be adding any new entries to the
  303  * unlinked set, even though we might (this is to lower the minimum file
  304  * size that can be deleted in a full filesystem).  So on the small
  305  * chance that the nlink list is using a fat zap (ie. has more than
  306  * 2000 entries), we *may* not pre-read a block that's needed.
  307  * Therefore it is remotely possible for some of the assertions
  308  * regarding the unlinked set below to fail due to i/o error.  On a
  309  * nondebug system, this will result in the space being leaked.
  310  */
  311 void
  312 zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
  313 {
  314         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
  315         char obj_name[17];
  316         int error;
  317 
  318         ASSERT(zp->z_unlinked);
  319         ASSERT3U(zp->z_phys->zp_links, ==, 0);
  320 
  321         error = zap_add(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
  322             zfs_unlinked_hexname(obj_name, zp->z_id), 8, 1, &zp->z_id, tx);
  323         ASSERT3U(error, ==, 0);
  324 }
  325 
  326 /*
  327  * Clean up any znodes that had no links when we either crashed or
  328  * (force) umounted the file system.
  329  */
  330 void
  331 zfs_unlinked_drain(zfsvfs_t *zfsvfs)
  332 {
  333         zap_cursor_t    zc;
  334         zap_attribute_t zap;
  335         dmu_object_info_t doi;
  336         znode_t         *zp;
  337         int             error;
  338 
  339         /*
  340          * Interate over the contents of the unlinked set.
  341          */
  342         for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
  343             zap_cursor_retrieve(&zc, &zap) == 0;
  344             zap_cursor_advance(&zc)) {
  345 
  346                 /*
  347                  * See what kind of object we have in list
  348                  */
  349 
  350                 error = dmu_object_info(zfsvfs->z_os,
  351                     zap.za_first_integer, &doi);
  352                 if (error != 0)
  353                         continue;
  354 
  355                 ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
  356                     (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
  357                 /*
  358                  * We need to re-mark these list entries for deletion,
  359                  * so we pull them back into core and set zp->z_unlinked.
  360                  */
  361                 error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
  362 
  363                 /*
  364                  * We may pick up znodes that are already marked for deletion.
  365                  * This could happen during the purge of an extended attribute
  366                  * directory.  All we need to do is skip over them, since they
  367                  * are already in the system marked z_unlinked.
  368                  */
  369                 if (error != 0)
  370                         continue;
  371 
  372                 zp->z_unlinked = B_TRUE;
  373                 VN_RELE(ZTOV(zp));
  374         }
  375         zap_cursor_fini(&zc);
  376 }
  377 
  378 /*
  379  * Delete the entire contents of a directory.  Return a count
  380  * of the number of entries that could not be deleted.
  381  *
  382  * NOTE: this function assumes that the directory is inactive,
  383  *      so there is no need to lock its entries before deletion.
  384  *      Also, it assumes the directory contents is *only* regular
  385  *      files.
  386  */
  387 static int
  388 zfs_purgedir(znode_t *dzp)
  389 {
  390         zap_cursor_t    zc;
  391         zap_attribute_t zap;
  392         znode_t         *xzp;
  393         dmu_tx_t        *tx;
  394         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
  395         zfs_dirlock_t   dl;
  396         int skipped = 0;
  397         int error;
  398 
  399         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
  400             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
  401             zap_cursor_advance(&zc)) {
  402                 error = zfs_zget(zfsvfs,
  403                     ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
  404                 ASSERT3U(error, ==, 0);
  405 
  406                 ASSERT((ZTOV(xzp)->v_type == VREG) ||
  407                     (ZTOV(xzp)->v_type == VLNK));
  408 
  409                 tx = dmu_tx_create(zfsvfs->z_os);
  410                 dmu_tx_hold_bonus(tx, dzp->z_id);
  411                 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
  412                 dmu_tx_hold_bonus(tx, xzp->z_id);
  413                 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
  414                 error = dmu_tx_assign(tx, TXG_WAIT);
  415                 if (error) {
  416                         dmu_tx_abort(tx);
  417                         VN_RELE(ZTOV(xzp));
  418                         skipped += 1;
  419                         continue;
  420                 }
  421                 bzero(&dl, sizeof (dl));
  422                 dl.dl_dzp = dzp;
  423                 dl.dl_name = zap.za_name;
  424 
  425                 error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
  426                 ASSERT3U(error, ==, 0);
  427                 dmu_tx_commit(tx);
  428 
  429                 VN_RELE(ZTOV(xzp));
  430         }
  431         zap_cursor_fini(&zc);
  432         ASSERT(error == ENOENT);
  433         return (skipped);
  434 }
  435 
  436 void
  437 zfs_rmnode(znode_t *zp)
  438 {
  439         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
  440         objset_t        *os = zfsvfs->z_os;
  441         znode_t         *xzp = NULL;
  442         char            obj_name[17];
  443         dmu_tx_t        *tx;
  444         uint64_t        acl_obj;
  445         int             error;
  446         int             vfslocked;
  447 
  448         vfslocked = VFS_LOCK_GIANT(zfsvfs->z_vfs);
  449 
  450         ASSERT(zp->z_phys->zp_links == 0);
  451 
  452         /*
  453          * If this is an attribute directory, purge its contents.
  454          */
  455         if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
  456             (zp->z_phys->zp_flags & ZFS_XATTR)) {
  457                 if (zfs_purgedir(zp) != 0) {
  458                         /*
  459                          * Not enough space to delete some xattrs.
  460                          * Leave it on the unlinked set.
  461                          */
  462                         VFS_UNLOCK_GIANT(vfslocked);
  463                         return;
  464                 }
  465         }
  466 
  467         /*
  468          * If the file has extended attributes, we're going to unlink
  469          * the xattr dir.
  470          */
  471         if (zp->z_phys->zp_xattr) {
  472                 error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
  473                 ASSERT(error == 0);
  474         }
  475 
  476         acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
  477 
  478         /*
  479          * Set up the transaction.
  480          */
  481         tx = dmu_tx_create(os);
  482         dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
  483         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
  484         if (xzp) {
  485                 dmu_tx_hold_bonus(tx, xzp->z_id);
  486                 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
  487         }
  488         if (acl_obj)
  489                 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
  490         error = dmu_tx_assign(tx, TXG_WAIT);
  491         if (error) {
  492                 /*
  493                  * Not enough space to delete the file.  Leave it in the
  494                  * unlinked set, leaking it until the fs is remounted (at
  495                  * which point we'll call zfs_unlinked_drain() to process it).
  496                  */
  497                 dmu_tx_abort(tx);
  498                 VFS_UNLOCK_GIANT(vfslocked);
  499                 return;
  500         }
  501 
  502         if (xzp) {
  503                 dmu_buf_will_dirty(xzp->z_dbuf, tx);
  504                 mutex_enter(&xzp->z_lock);
  505                 xzp->z_unlinked = B_TRUE;       /* mark xzp for deletion */
  506                 xzp->z_phys->zp_links = 0;      /* no more links to it */
  507                 mutex_exit(&xzp->z_lock);
  508                 zfs_unlinked_add(xzp, tx);
  509         }
  510 
  511         /* Remove this znode from the unlinked set */
  512         error = zap_remove(os, zfsvfs->z_unlinkedobj,
  513             zfs_unlinked_hexname(obj_name, zp->z_id), tx);
  514         ASSERT3U(error, ==, 0);
  515 
  516         zfs_znode_delete(zp, tx);
  517 
  518         dmu_tx_commit(tx);
  519 
  520         if (xzp)
  521                 VN_RELE(ZTOV(xzp));
  522         VFS_UNLOCK_GIANT(vfslocked);
  523 }
  524 
  525 /*
  526  * Link zp into dl.  Can only fail if zp has been unlinked.
  527  */
  528 int
  529 zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
  530 {
  531         znode_t *dzp = dl->dl_dzp;
  532         vnode_t *vp = ZTOV(zp);
  533         uint64_t value;
  534         int zp_is_dir = (vp->v_type == VDIR);
  535         int error;
  536 
  537         dmu_buf_will_dirty(zp->z_dbuf, tx);
  538         mutex_enter(&zp->z_lock);
  539 
  540         if (!(flag & ZRENAMING)) {
  541                 if (zp->z_unlinked) {   /* no new links to unlinked zp */
  542                         ASSERT(!(flag & (ZNEW | ZEXISTS)));
  543                         mutex_exit(&zp->z_lock);
  544                         return (ENOENT);
  545                 }
  546                 zp->z_phys->zp_links++;
  547         }
  548         zp->z_phys->zp_parent = dzp->z_id;      /* dzp is now zp's parent */
  549 
  550         if (!(flag & ZNEW))
  551                 zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
  552         mutex_exit(&zp->z_lock);
  553 
  554         dmu_buf_will_dirty(dzp->z_dbuf, tx);
  555         mutex_enter(&dzp->z_lock);
  556         dzp->z_phys->zp_size++;                 /* one dirent added */
  557         dzp->z_phys->zp_links += zp_is_dir;     /* ".." link from zp */
  558         zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
  559         mutex_exit(&dzp->z_lock);
  560 
  561         /*
  562          * MacOS X will fill in the 4-bit object type here.
  563          */
  564         value = ZFS_DIRENT_MAKE(IFTODT(zp->z_phys->zp_mode), zp->z_id);
  565         error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
  566             8, 1, &value, tx);
  567         ASSERT(error == 0);
  568 
  569         dnlc_update(ZTOV(dzp), dl->dl_name, vp);
  570 
  571         return (0);
  572 }
  573 
  574 /*
  575  * Unlink zp from dl, and mark zp for deletion if this was the last link.
  576  * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
  577  * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
  578  * If it's non-NULL, we use it to indicate whether the znode needs deletion,
  579  * and it's the caller's job to do it.
  580  */
  581 int
  582 zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
  583         boolean_t *unlinkedp)
  584 {
  585         znode_t *dzp = dl->dl_dzp;
  586         vnode_t *vp = ZTOV(zp);
  587         int zp_is_dir = (vp->v_type == VDIR);
  588         boolean_t unlinked = B_FALSE;
  589         int error;
  590 
  591         dnlc_remove(ZTOV(dzp), dl->dl_name);
  592 
  593         if (!(flag & ZRENAMING)) {
  594                 dmu_buf_will_dirty(zp->z_dbuf, tx);
  595 
  596                 if (vn_vfswlock(vp))            /* prevent new mounts on zp */
  597                         return (EBUSY);
  598 
  599                 if (vn_ismntpt(vp)) {           /* don't remove mount point */
  600                         vn_vfsunlock(vp);
  601                         return (EBUSY);
  602                 }
  603 
  604                 mutex_enter(&zp->z_lock);
  605                 if (zp_is_dir && !zfs_dirempty(zp)) {   /* dir not empty */
  606                         mutex_exit(&zp->z_lock);
  607                         vn_vfsunlock(vp);
  608                         return (ENOTEMPTY);
  609                 }
  610                 if (zp->z_phys->zp_links <= zp_is_dir) {
  611                         zfs_panic_recover("zfs: link count on vnode %p is %u, "
  612                             "should be at least %u", zp->z_vnode,
  613                             (int)zp->z_phys->zp_links,
  614                             zp_is_dir + 1);
  615                         zp->z_phys->zp_links = zp_is_dir + 1;
  616                 }
  617                 if (--zp->z_phys->zp_links == zp_is_dir) {
  618                         zp->z_unlinked = B_TRUE;
  619                         zp->z_phys->zp_links = 0;
  620                         unlinked = B_TRUE;
  621                 } else {
  622                         zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
  623                 }
  624                 mutex_exit(&zp->z_lock);
  625                 vn_vfsunlock(vp);
  626         }
  627 
  628         dmu_buf_will_dirty(dzp->z_dbuf, tx);
  629         mutex_enter(&dzp->z_lock);
  630         dzp->z_phys->zp_size--;                 /* one dirent removed */
  631         dzp->z_phys->zp_links -= zp_is_dir;     /* ".." link from zp */
  632         zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
  633         mutex_exit(&dzp->z_lock);
  634 
  635         error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, tx);
  636         ASSERT(error == 0);
  637 
  638         if (unlinkedp != NULL)
  639                 *unlinkedp = unlinked;
  640         else if (unlinked)
  641                 zfs_unlinked_add(zp, tx);
  642 
  643         return (0);
  644 }
  645 
  646 /*
  647  * Indicate whether the directory is empty.  Works with or without z_lock
  648  * held, but can only be consider a hint in the latter case.  Returns true
  649  * if only "." and ".." remain and there's no work in progress.
  650  */
  651 boolean_t
  652 zfs_dirempty(znode_t *dzp)
  653 {
  654         return (dzp->z_phys->zp_size == 2 && dzp->z_dirlocks == 0);
  655 }
  656 
  657 int
  658 zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
  659 {
  660         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
  661         znode_t *xzp;
  662         dmu_tx_t *tx;
  663         uint64_t xoid;
  664         int error;
  665 
  666         *xvpp = NULL;
  667 
  668         if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, cr))
  669                 return (error);
  670 
  671         tx = dmu_tx_create(zfsvfs->z_os);
  672         dmu_tx_hold_bonus(tx, zp->z_id);
  673         dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
  674         error = dmu_tx_assign(tx, zfsvfs->z_assign);
  675         if (error) {
  676                 if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
  677                         dmu_tx_wait(tx);
  678                 dmu_tx_abort(tx);
  679                 return (error);
  680         }
  681         zfs_mknode(zp, vap, &xoid, tx, cr, IS_XATTR, &xzp, 0);
  682         ASSERT(xzp->z_id == xoid);
  683         ASSERT(xzp->z_phys->zp_parent == zp->z_id);
  684         dmu_buf_will_dirty(zp->z_dbuf, tx);
  685         zp->z_phys->zp_xattr = xoid;
  686 
  687         (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "");
  688         dmu_tx_commit(tx);
  689 
  690         *xvpp = ZTOV(xzp);
  691 
  692         return (0);
  693 }
  694 
  695 /*
  696  * Return a znode for the extended attribute directory for zp.
  697  * ** If the directory does not already exist, it is created **
  698  *
  699  *      IN:     zp      - znode to obtain attribute directory from
  700  *              cr      - credentials of caller
  701  *              flags   - flags from the VOP_LOOKUP call
  702  *
  703  *      OUT:    xzpp    - pointer to extended attribute znode
  704  *
  705  *      RETURN: 0 on success
  706  *              error number on failure
  707  */
  708 int
  709 zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
  710 {
  711         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
  712         znode_t         *xzp;
  713         zfs_dirlock_t   *dl;
  714         vattr_t         va;
  715         int             error;
  716 top:
  717         error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR);
  718         if (error)
  719                 return (error);
  720 
  721         if (xzp != NULL) {
  722                 *xvpp = ZTOV(xzp);
  723                 zfs_dirent_unlock(dl);
  724                 return (0);
  725         }
  726 
  727         ASSERT(zp->z_phys->zp_xattr == 0);
  728 
  729 #ifdef TODO
  730         if (!(flags & CREATE_XATTR_DIR)) {
  731                 zfs_dirent_unlock(dl);
  732                 return (ENOENT);
  733         }
  734 #endif
  735 
  736         if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
  737                 zfs_dirent_unlock(dl);
  738                 return (EROFS);
  739         }
  740 
  741         /*
  742          * The ability to 'create' files in an attribute
  743          * directory comes from the write_xattr permission on the base file.
  744          *
  745          * The ability to 'search' an attribute directory requires
  746          * read_xattr permission on the base file.
  747          *
  748          * Once in a directory the ability to read/write attributes
  749          * is controlled by the permissions on the attribute file.
  750          */
  751         va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
  752         va.va_type = VDIR;
  753         va.va_mode = S_IFDIR | S_ISVTX | 0777;
  754         va.va_uid = (uid_t)zp->z_phys->zp_uid;
  755         va.va_gid = (gid_t)zp->z_phys->zp_gid;
  756 
  757         error = zfs_make_xattrdir(zp, &va, xvpp, cr);
  758         zfs_dirent_unlock(dl);
  759 
  760         if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
  761                 /* NB: we already did dmu_tx_wait() if necessary */
  762                 goto top;
  763         }
  764 
  765         return (error);
  766 }
  767 
  768 /*
  769  * Decide whether it is okay to remove within a sticky directory.
  770  *
  771  * In sticky directories, write access is not sufficient;
  772  * you can remove entries from a directory only if:
  773  *
  774  *      you own the directory,
  775  *      you own the entry,
  776  *      the entry is a plain file and you have write access,
  777  *      or you are privileged (checked in secpolicy...).
  778  *
  779  * The function returns 0 if remove access is granted.
  780  */
  781 int
  782 zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
  783 {
  784         uid_t           uid;
  785 
  786         if (zdp->z_zfsvfs->z_assign >= TXG_INITIAL)     /* ZIL replay */
  787                 return (0);
  788 
  789         if ((zdp->z_phys->zp_mode & S_ISVTX) == 0 ||
  790             (uid = crgetuid(cr)) == zdp->z_phys->zp_uid ||
  791             uid == zp->z_phys->zp_uid ||
  792             (ZTOV(zp)->v_type == VREG &&
  793             zfs_zaccess(zp, ACE_WRITE_DATA, cr) == 0))
  794                 return (0);
  795         else
  796                 return (secpolicy_vnode_remove(cr));
  797 }

Cache object: 67a2048e2582eb067e95198c09ce0963


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.