The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   23  * Copyright (c) 2013 by Delphix. All rights reserved.
   24  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
   25  */
   26 
   27 #include <sys/types.h>
   28 #include <sys/param.h>
   29 #include <sys/time.h>
   30 #include <sys/systm.h>
   31 #include <sys/sysmacros.h>
   32 #include <sys/resource.h>
   33 #include <sys/vfs.h>
   34 #include <sys/vnode.h>
   35 #include <sys/file.h>
   36 #include <sys/stat.h>
   37 #include <sys/kmem.h>
   38 #include <sys/cmn_err.h>
   39 #include <sys/errno.h>
   40 #include <sys/unistd.h>
   41 #include <sys/sdt.h>
   42 #include <sys/fs/zfs.h>
   43 #include <sys/policy.h>
   44 #include <sys/zfs_znode.h>
   45 #include <sys/zfs_fuid.h>
   46 #include <sys/zfs_acl.h>
   47 #include <sys/zfs_dir.h>
   48 #include <sys/zfs_quota.h>
   49 #include <sys/zfs_vfsops.h>
   50 #include <sys/dmu.h>
   51 #include <sys/dnode.h>
   52 #include <sys/zap.h>
   53 #include <sys/sa.h>
   54 #include <acl/acl_common.h>
   55 
   56 
   57 #define ALLOW   ACE_ACCESS_ALLOWED_ACE_TYPE
   58 #define DENY    ACE_ACCESS_DENIED_ACE_TYPE
   59 #define MAX_ACE_TYPE    ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
   60 #define MIN_ACE_TYPE    ALLOW
   61 
   62 #define OWNING_GROUP            (ACE_GROUP|ACE_IDENTIFIER_GROUP)
   63 #define EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
   64     ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
   65 #define EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
   66     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
   67 #define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
   68     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
   69 
   70 #define ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
   71     ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
   72     ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
   73     ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
   74 
   75 #define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
   76 #define WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
   77     ACE_DELETE|ACE_DELETE_CHILD)
   78 #define WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
   79 
   80 #define OGE_CLEAR       (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
   81     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
   82 
   83 #define OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
   84     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
   85 
   86 #define ALL_INHERIT     (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
   87     ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
   88 
   89 #define RESTRICTED_CLEAR        (ACE_WRITE_ACL|ACE_WRITE_OWNER)
   90 
   91 #define V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
   92     ZFS_ACL_PROTECTED)
   93 
   94 #define ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
   95     ZFS_ACL_OBJ_ACE)
   96 
   97 #define ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
   98 
   99 static uint16_t
  100 zfs_ace_v0_get_type(void *acep)
  101 {
  102         return (((zfs_oldace_t *)acep)->z_type);
  103 }
  104 
  105 static uint16_t
  106 zfs_ace_v0_get_flags(void *acep)
  107 {
  108         return (((zfs_oldace_t *)acep)->z_flags);
  109 }
  110 
  111 static uint32_t
  112 zfs_ace_v0_get_mask(void *acep)
  113 {
  114         return (((zfs_oldace_t *)acep)->z_access_mask);
  115 }
  116 
  117 static uint64_t
  118 zfs_ace_v0_get_who(void *acep)
  119 {
  120         return (((zfs_oldace_t *)acep)->z_fuid);
  121 }
  122 
  123 static void
  124 zfs_ace_v0_set_type(void *acep, uint16_t type)
  125 {
  126         ((zfs_oldace_t *)acep)->z_type = type;
  127 }
  128 
  129 static void
  130 zfs_ace_v0_set_flags(void *acep, uint16_t flags)
  131 {
  132         ((zfs_oldace_t *)acep)->z_flags = flags;
  133 }
  134 
  135 static void
  136 zfs_ace_v0_set_mask(void *acep, uint32_t mask)
  137 {
  138         ((zfs_oldace_t *)acep)->z_access_mask = mask;
  139 }
  140 
  141 static void
  142 zfs_ace_v0_set_who(void *acep, uint64_t who)
  143 {
  144         ((zfs_oldace_t *)acep)->z_fuid = who;
  145 }
  146 
  147 static size_t
  148 zfs_ace_v0_size(void *acep)
  149 {
  150         (void) acep;
  151         return (sizeof (zfs_oldace_t));
  152 }
  153 
  154 static size_t
  155 zfs_ace_v0_abstract_size(void)
  156 {
  157         return (sizeof (zfs_oldace_t));
  158 }
  159 
  160 static int
  161 zfs_ace_v0_mask_off(void)
  162 {
  163         return (offsetof(zfs_oldace_t, z_access_mask));
  164 }
  165 
  166 static int
  167 zfs_ace_v0_data(void *acep, void **datap)
  168 {
  169         (void) acep;
  170         *datap = NULL;
  171         return (0);
  172 }
  173 
  174 static const acl_ops_t zfs_acl_v0_ops = {
  175         zfs_ace_v0_get_mask,
  176         zfs_ace_v0_set_mask,
  177         zfs_ace_v0_get_flags,
  178         zfs_ace_v0_set_flags,
  179         zfs_ace_v0_get_type,
  180         zfs_ace_v0_set_type,
  181         zfs_ace_v0_get_who,
  182         zfs_ace_v0_set_who,
  183         zfs_ace_v0_size,
  184         zfs_ace_v0_abstract_size,
  185         zfs_ace_v0_mask_off,
  186         zfs_ace_v0_data
  187 };
  188 
  189 static uint16_t
  190 zfs_ace_fuid_get_type(void *acep)
  191 {
  192         return (((zfs_ace_hdr_t *)acep)->z_type);
  193 }
  194 
  195 static uint16_t
  196 zfs_ace_fuid_get_flags(void *acep)
  197 {
  198         return (((zfs_ace_hdr_t *)acep)->z_flags);
  199 }
  200 
  201 static uint32_t
  202 zfs_ace_fuid_get_mask(void *acep)
  203 {
  204         return (((zfs_ace_hdr_t *)acep)->z_access_mask);
  205 }
  206 
  207 static uint64_t
  208 zfs_ace_fuid_get_who(void *args)
  209 {
  210         uint16_t entry_type;
  211         zfs_ace_t *acep = args;
  212 
  213         entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
  214 
  215         if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
  216             entry_type == ACE_EVERYONE)
  217                 return (-1);
  218         return (((zfs_ace_t *)acep)->z_fuid);
  219 }
  220 
  221 static void
  222 zfs_ace_fuid_set_type(void *acep, uint16_t type)
  223 {
  224         ((zfs_ace_hdr_t *)acep)->z_type = type;
  225 }
  226 
  227 static void
  228 zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
  229 {
  230         ((zfs_ace_hdr_t *)acep)->z_flags = flags;
  231 }
  232 
  233 static void
  234 zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
  235 {
  236         ((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
  237 }
  238 
  239 static void
  240 zfs_ace_fuid_set_who(void *arg, uint64_t who)
  241 {
  242         zfs_ace_t *acep = arg;
  243 
  244         uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
  245 
  246         if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
  247             entry_type == ACE_EVERYONE)
  248                 return;
  249         acep->z_fuid = who;
  250 }
  251 
  252 static size_t
  253 zfs_ace_fuid_size(void *acep)
  254 {
  255         zfs_ace_hdr_t *zacep = acep;
  256         uint16_t entry_type;
  257 
  258         switch (zacep->z_type) {
  259         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
  260         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
  261         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
  262         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
  263                 return (sizeof (zfs_object_ace_t));
  264         case ALLOW:
  265         case DENY:
  266                 entry_type =
  267                     (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
  268                 if (entry_type == ACE_OWNER ||
  269                     entry_type == OWNING_GROUP ||
  270                     entry_type == ACE_EVERYONE)
  271                         return (sizeof (zfs_ace_hdr_t));
  272                 zfs_fallthrough;
  273         default:
  274                 return (sizeof (zfs_ace_t));
  275         }
  276 }
  277 
  278 static size_t
  279 zfs_ace_fuid_abstract_size(void)
  280 {
  281         return (sizeof (zfs_ace_hdr_t));
  282 }
  283 
  284 static int
  285 zfs_ace_fuid_mask_off(void)
  286 {
  287         return (offsetof(zfs_ace_hdr_t, z_access_mask));
  288 }
  289 
  290 static int
  291 zfs_ace_fuid_data(void *acep, void **datap)
  292 {
  293         zfs_ace_t *zacep = acep;
  294         zfs_object_ace_t *zobjp;
  295 
  296         switch (zacep->z_hdr.z_type) {
  297         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
  298         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
  299         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
  300         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
  301                 zobjp = acep;
  302                 *datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
  303                 return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
  304         default:
  305                 *datap = NULL;
  306                 return (0);
  307         }
  308 }
  309 
  310 static const acl_ops_t zfs_acl_fuid_ops = {
  311         zfs_ace_fuid_get_mask,
  312         zfs_ace_fuid_set_mask,
  313         zfs_ace_fuid_get_flags,
  314         zfs_ace_fuid_set_flags,
  315         zfs_ace_fuid_get_type,
  316         zfs_ace_fuid_set_type,
  317         zfs_ace_fuid_get_who,
  318         zfs_ace_fuid_set_who,
  319         zfs_ace_fuid_size,
  320         zfs_ace_fuid_abstract_size,
  321         zfs_ace_fuid_mask_off,
  322         zfs_ace_fuid_data
  323 };
  324 
  325 /*
  326  * The following three functions are provided for compatibility with
  327  * older ZPL version in order to determine if the file use to have
  328  * an external ACL and what version of ACL previously existed on the
  329  * file.  Would really be nice to not need this, sigh.
  330  */
  331 uint64_t
  332 zfs_external_acl(znode_t *zp)
  333 {
  334         zfs_acl_phys_t acl_phys;
  335         int error;
  336 
  337         if (zp->z_is_sa)
  338                 return (0);
  339 
  340         /*
  341          * Need to deal with a potential
  342          * race where zfs_sa_upgrade could cause
  343          * z_isa_sa to change.
  344          *
  345          * If the lookup fails then the state of z_is_sa should have
  346          * changed.
  347          */
  348 
  349         if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
  350             &acl_phys, sizeof (acl_phys))) == 0)
  351                 return (acl_phys.z_acl_extern_obj);
  352         else {
  353                 /*
  354                  * after upgrade the SA_ZPL_ZNODE_ACL should have been
  355                  * removed
  356                  */
  357                 VERIFY(zp->z_is_sa);
  358                 VERIFY3S(error, ==, ENOENT);
  359                 return (0);
  360         }
  361 }
  362 
  363 /*
  364  * Determine size of ACL in bytes
  365  *
  366  * This is more complicated than it should be since we have to deal
  367  * with old external ACLs.
  368  */
  369 static int
  370 zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
  371     zfs_acl_phys_t *aclphys)
  372 {
  373         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
  374         uint64_t acl_count;
  375         int size;
  376         int error;
  377 
  378         ASSERT(MUTEX_HELD(&zp->z_acl_lock));
  379         if (zp->z_is_sa) {
  380                 if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
  381                     &size)) != 0)
  382                         return (error);
  383                 *aclsize = size;
  384                 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
  385                     &acl_count, sizeof (acl_count))) != 0)
  386                         return (error);
  387                 *aclcount = acl_count;
  388         } else {
  389                 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
  390                     aclphys, sizeof (*aclphys))) != 0)
  391                         return (error);
  392 
  393                 if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
  394                         *aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
  395                         *aclcount = aclphys->z_acl_size;
  396                 } else {
  397                         *aclsize = aclphys->z_acl_size;
  398                         *aclcount = aclphys->z_acl_count;
  399                 }
  400         }
  401         return (0);
  402 }
  403 
  404 int
  405 zfs_znode_acl_version(znode_t *zp)
  406 {
  407         zfs_acl_phys_t acl_phys;
  408 
  409         if (zp->z_is_sa)
  410                 return (ZFS_ACL_VERSION_FUID);
  411         else {
  412                 int error;
  413 
  414                 /*
  415                  * Need to deal with a potential
  416                  * race where zfs_sa_upgrade could cause
  417                  * z_isa_sa to change.
  418                  *
  419                  * If the lookup fails then the state of z_is_sa should have
  420                  * changed.
  421                  */
  422                 if ((error = sa_lookup(zp->z_sa_hdl,
  423                     SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
  424                     &acl_phys, sizeof (acl_phys))) == 0)
  425                         return (acl_phys.z_acl_version);
  426                 else {
  427                         /*
  428                          * After upgrade SA_ZPL_ZNODE_ACL should have
  429                          * been removed.
  430                          */
  431                         VERIFY(zp->z_is_sa);
  432                         VERIFY3S(error, ==, ENOENT);
  433                         return (ZFS_ACL_VERSION_FUID);
  434                 }
  435         }
  436 }
  437 
  438 static int
  439 zfs_acl_version(int version)
  440 {
  441         if (version < ZPL_VERSION_FUID)
  442                 return (ZFS_ACL_VERSION_INITIAL);
  443         else
  444                 return (ZFS_ACL_VERSION_FUID);
  445 }
  446 
  447 static int
  448 zfs_acl_version_zp(znode_t *zp)
  449 {
  450         return (zfs_acl_version(zp->z_zfsvfs->z_version));
  451 }
  452 
  453 zfs_acl_t *
  454 zfs_acl_alloc(int vers)
  455 {
  456         zfs_acl_t *aclp;
  457 
  458         aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
  459         list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
  460             offsetof(zfs_acl_node_t, z_next));
  461         aclp->z_version = vers;
  462         if (vers == ZFS_ACL_VERSION_FUID)
  463                 aclp->z_ops = &zfs_acl_fuid_ops;
  464         else
  465                 aclp->z_ops = &zfs_acl_v0_ops;
  466         return (aclp);
  467 }
  468 
  469 zfs_acl_node_t *
  470 zfs_acl_node_alloc(size_t bytes)
  471 {
  472         zfs_acl_node_t *aclnode;
  473 
  474         aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
  475         if (bytes) {
  476                 aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
  477                 aclnode->z_allocdata = aclnode->z_acldata;
  478                 aclnode->z_allocsize = bytes;
  479                 aclnode->z_size = bytes;
  480         }
  481 
  482         return (aclnode);
  483 }
  484 
  485 static void
  486 zfs_acl_node_free(zfs_acl_node_t *aclnode)
  487 {
  488         if (aclnode->z_allocsize)
  489                 kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
  490         kmem_free(aclnode, sizeof (zfs_acl_node_t));
  491 }
  492 
  493 static void
  494 zfs_acl_release_nodes(zfs_acl_t *aclp)
  495 {
  496         zfs_acl_node_t *aclnode;
  497 
  498         while ((aclnode = list_head(&aclp->z_acl))) {
  499                 list_remove(&aclp->z_acl, aclnode);
  500                 zfs_acl_node_free(aclnode);
  501         }
  502         aclp->z_acl_count = 0;
  503         aclp->z_acl_bytes = 0;
  504 }
  505 
  506 void
  507 zfs_acl_free(zfs_acl_t *aclp)
  508 {
  509         zfs_acl_release_nodes(aclp);
  510         list_destroy(&aclp->z_acl);
  511         kmem_free(aclp, sizeof (zfs_acl_t));
  512 }
  513 
  514 static boolean_t
  515 zfs_acl_valid_ace_type(uint_t type, uint_t flags)
  516 {
  517         uint16_t entry_type;
  518 
  519         switch (type) {
  520         case ALLOW:
  521         case DENY:
  522         case ACE_SYSTEM_AUDIT_ACE_TYPE:
  523         case ACE_SYSTEM_ALARM_ACE_TYPE:
  524                 entry_type = flags & ACE_TYPE_FLAGS;
  525                 return (entry_type == ACE_OWNER ||
  526                     entry_type == OWNING_GROUP ||
  527                     entry_type == ACE_EVERYONE || entry_type == 0 ||
  528                     entry_type == ACE_IDENTIFIER_GROUP);
  529         default:
  530                 if (type <= MAX_ACE_TYPE)
  531                         return (B_TRUE);
  532         }
  533         return (B_FALSE);
  534 }
  535 
  536 static boolean_t
  537 zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
  538 {
  539         /*
  540          * first check type of entry
  541          */
  542 
  543         if (!zfs_acl_valid_ace_type(type, iflags))
  544                 return (B_FALSE);
  545 
  546         switch (type) {
  547         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
  548         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
  549         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
  550         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
  551                 if (aclp->z_version < ZFS_ACL_VERSION_FUID)
  552                         return (B_FALSE);
  553                 aclp->z_hints |= ZFS_ACL_OBJ_ACE;
  554         }
  555 
  556         /*
  557          * next check inheritance level flags
  558          */
  559 
  560         if (obj_type == VDIR &&
  561             (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
  562                 aclp->z_hints |= ZFS_INHERIT_ACE;
  563 
  564         if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
  565                 if ((iflags & (ACE_FILE_INHERIT_ACE|
  566                     ACE_DIRECTORY_INHERIT_ACE)) == 0) {
  567                         return (B_FALSE);
  568                 }
  569         }
  570 
  571         return (B_TRUE);
  572 }
  573 
  574 static void *
  575 zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
  576     uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
  577 {
  578         zfs_acl_node_t *aclnode;
  579 
  580         ASSERT3P(aclp, !=, NULL);
  581 
  582         if (start == NULL) {
  583                 aclnode = list_head(&aclp->z_acl);
  584                 if (aclnode == NULL)
  585                         return (NULL);
  586 
  587                 aclp->z_next_ace = aclnode->z_acldata;
  588                 aclp->z_curr_node = aclnode;
  589                 aclnode->z_ace_idx = 0;
  590         }
  591 
  592         aclnode = aclp->z_curr_node;
  593 
  594         if (aclnode == NULL)
  595                 return (NULL);
  596 
  597         if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
  598                 aclnode = list_next(&aclp->z_acl, aclnode);
  599                 if (aclnode == NULL)
  600                         return (NULL);
  601                 else {
  602                         aclp->z_curr_node = aclnode;
  603                         aclnode->z_ace_idx = 0;
  604                         aclp->z_next_ace = aclnode->z_acldata;
  605                 }
  606         }
  607 
  608         if (aclnode->z_ace_idx < aclnode->z_ace_count) {
  609                 void *acep = aclp->z_next_ace;
  610                 size_t ace_size;
  611 
  612                 /*
  613                  * Make sure we don't overstep our bounds
  614                  */
  615                 ace_size = aclp->z_ops->ace_size(acep);
  616 
  617                 if (((caddr_t)acep + ace_size) >
  618                     ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
  619                         return (NULL);
  620                 }
  621 
  622                 *iflags = aclp->z_ops->ace_flags_get(acep);
  623                 *type = aclp->z_ops->ace_type_get(acep);
  624                 *access_mask = aclp->z_ops->ace_mask_get(acep);
  625                 *who = aclp->z_ops->ace_who_get(acep);
  626                 aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
  627                 aclnode->z_ace_idx++;
  628 
  629                 return ((void *)acep);
  630         }
  631         return (NULL);
  632 }
  633 
  634 static uintptr_t
  635 zfs_ace_walk(void *datap, uintptr_t cookie, int aclcnt,
  636     uint16_t *flags, uint16_t *type, uint32_t *mask)
  637 {
  638         (void) aclcnt;
  639         zfs_acl_t *aclp = datap;
  640         zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
  641         uint64_t who;
  642 
  643         acep = zfs_acl_next_ace(aclp, acep, &who, mask,
  644             flags, type);
  645         return ((uintptr_t)acep);
  646 }
  647 
  648 /*
  649  * Copy ACE to internal ZFS format.
  650  * While processing the ACL each ACE will be validated for correctness.
  651  * ACE FUIDs will be created later.
  652  */
  653 static int
  654 zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
  655     void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
  656     zfs_fuid_info_t **fuidp, cred_t *cr)
  657 {
  658         int i;
  659         uint16_t entry_type;
  660         zfs_ace_t *aceptr = z_acl;
  661         ace_t *acep = datap;
  662         zfs_object_ace_t *zobjacep;
  663         ace_object_t *aceobjp;
  664 
  665         for (i = 0; i != aclcnt; i++) {
  666                 aceptr->z_hdr.z_access_mask = acep->a_access_mask;
  667                 aceptr->z_hdr.z_flags = acep->a_flags;
  668                 aceptr->z_hdr.z_type = acep->a_type;
  669                 entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
  670                 if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
  671                     entry_type != ACE_EVERYONE) {
  672                         aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
  673                             cr, (entry_type == 0) ?
  674                             ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
  675                 }
  676 
  677                 /*
  678                  * Make sure ACE is valid
  679                  */
  680                 if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
  681                     aceptr->z_hdr.z_flags) != B_TRUE)
  682                         return (SET_ERROR(EINVAL));
  683 
  684                 switch (acep->a_type) {
  685                 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
  686                 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
  687                 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
  688                 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
  689                         zobjacep = (zfs_object_ace_t *)aceptr;
  690                         aceobjp = (ace_object_t *)acep;
  691 
  692                         memcpy(zobjacep->z_object_type, aceobjp->a_obj_type,
  693                             sizeof (aceobjp->a_obj_type));
  694                         memcpy(zobjacep->z_inherit_type,
  695                             aceobjp->a_inherit_obj_type,
  696                             sizeof (aceobjp->a_inherit_obj_type));
  697                         acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
  698                         break;
  699                 default:
  700                         acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
  701                 }
  702 
  703                 aceptr = (zfs_ace_t *)((caddr_t)aceptr +
  704                     aclp->z_ops->ace_size(aceptr));
  705         }
  706 
  707         *size = (caddr_t)aceptr - (caddr_t)z_acl;
  708 
  709         return (0);
  710 }
  711 
  712 /*
  713  * Copy ZFS ACEs to fixed size ace_t layout
  714  */
  715 static void
  716 zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
  717     void *datap, int filter)
  718 {
  719         uint64_t who;
  720         uint32_t access_mask;
  721         uint16_t iflags, type;
  722         zfs_ace_hdr_t *zacep = NULL;
  723         ace_t *acep = datap;
  724         ace_object_t *objacep;
  725         zfs_object_ace_t *zobjacep;
  726         size_t ace_size;
  727         uint16_t entry_type;
  728 
  729         while ((zacep = zfs_acl_next_ace(aclp, zacep,
  730             &who, &access_mask, &iflags, &type))) {
  731 
  732                 switch (type) {
  733                 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
  734                 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
  735                 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
  736                 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
  737                         if (filter) {
  738                                 continue;
  739                         }
  740                         zobjacep = (zfs_object_ace_t *)zacep;
  741                         objacep = (ace_object_t *)acep;
  742                         memcpy(objacep->a_obj_type,
  743                             zobjacep->z_object_type,
  744                             sizeof (zobjacep->z_object_type));
  745                         memcpy(objacep->a_inherit_obj_type,
  746                             zobjacep->z_inherit_type,
  747                             sizeof (zobjacep->z_inherit_type));
  748                         ace_size = sizeof (ace_object_t);
  749                         break;
  750                 default:
  751                         ace_size = sizeof (ace_t);
  752                         break;
  753                 }
  754 
  755                 entry_type = (iflags & ACE_TYPE_FLAGS);
  756                 if ((entry_type != ACE_OWNER &&
  757                     entry_type != OWNING_GROUP &&
  758                     entry_type != ACE_EVERYONE)) {
  759                         acep->a_who = zfs_fuid_map_id(zfsvfs, who,
  760                             cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
  761                             ZFS_ACE_GROUP : ZFS_ACE_USER);
  762                 } else {
  763                         acep->a_who = (uid_t)(int64_t)who;
  764                 }
  765                 acep->a_access_mask = access_mask;
  766                 acep->a_flags = iflags;
  767                 acep->a_type = type;
  768                 acep = (ace_t *)((caddr_t)acep + ace_size);
  769         }
  770 }
  771 
  772 static int
  773 zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
  774     zfs_oldace_t *z_acl, int aclcnt, size_t *size)
  775 {
  776         int i;
  777         zfs_oldace_t *aceptr = z_acl;
  778 
  779         for (i = 0; i != aclcnt; i++, aceptr++) {
  780                 aceptr->z_access_mask = acep[i].a_access_mask;
  781                 aceptr->z_type = acep[i].a_type;
  782                 aceptr->z_flags = acep[i].a_flags;
  783                 aceptr->z_fuid = acep[i].a_who;
  784                 /*
  785                  * Make sure ACE is valid
  786                  */
  787                 if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
  788                     aceptr->z_flags) != B_TRUE)
  789                         return (SET_ERROR(EINVAL));
  790         }
  791         *size = (caddr_t)aceptr - (caddr_t)z_acl;
  792         return (0);
  793 }
  794 
  795 /*
  796  * convert old ACL format to new
  797  */
  798 void
  799 zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
  800 {
  801         zfs_oldace_t *oldaclp;
  802         int i;
  803         uint16_t type, iflags;
  804         uint32_t access_mask;
  805         uint64_t who;
  806         void *cookie = NULL;
  807         zfs_acl_node_t *newaclnode;
  808 
  809         ASSERT3U(aclp->z_version, ==, ZFS_ACL_VERSION_INITIAL);
  810         /*
  811          * First create the ACE in a contiguous piece of memory
  812          * for zfs_copy_ace_2_fuid().
  813          *
  814          * We only convert an ACL once, so this won't happen
  815          * everytime.
  816          */
  817         oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
  818             KM_SLEEP);
  819         i = 0;
  820         while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,
  821             &access_mask, &iflags, &type))) {
  822                 oldaclp[i].z_flags = iflags;
  823                 oldaclp[i].z_type = type;
  824                 oldaclp[i].z_fuid = who;
  825                 oldaclp[i++].z_access_mask = access_mask;
  826         }
  827 
  828         newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
  829             sizeof (zfs_object_ace_t));
  830         aclp->z_ops = &zfs_acl_fuid_ops;
  831         VERIFY0(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
  832             oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
  833             &newaclnode->z_size, NULL, cr));
  834         newaclnode->z_ace_count = aclp->z_acl_count;
  835         aclp->z_version = ZFS_ACL_VERSION;
  836         kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
  837 
  838         /*
  839          * Release all previous ACL nodes
  840          */
  841 
  842         zfs_acl_release_nodes(aclp);
  843 
  844         list_insert_head(&aclp->z_acl, newaclnode);
  845 
  846         aclp->z_acl_bytes = newaclnode->z_size;
  847         aclp->z_acl_count = newaclnode->z_ace_count;
  848 
  849 }
  850 
  851 /*
  852  * Convert unix access mask to v4 access mask
  853  */
  854 static uint32_t
  855 zfs_unix_to_v4(uint32_t access_mask)
  856 {
  857         uint32_t new_mask = 0;
  858 
  859         if (access_mask & S_IXOTH)
  860                 new_mask |= ACE_EXECUTE;
  861         if (access_mask & S_IWOTH)
  862                 new_mask |= ACE_WRITE_DATA;
  863         if (access_mask & S_IROTH)
  864                 new_mask |= ACE_READ_DATA;
  865         return (new_mask);
  866 }
  867 
  868 static void
  869 zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
  870     uint16_t access_type, uint64_t fuid, uint16_t entry_type)
  871 {
  872         uint16_t type = entry_type & ACE_TYPE_FLAGS;
  873 
  874         aclp->z_ops->ace_mask_set(acep, access_mask);
  875         aclp->z_ops->ace_type_set(acep, access_type);
  876         aclp->z_ops->ace_flags_set(acep, entry_type);
  877         if ((type != ACE_OWNER && type != OWNING_GROUP &&
  878             type != ACE_EVERYONE))
  879                 aclp->z_ops->ace_who_set(acep, fuid);
  880 }
  881 
  882 /*
  883  * Determine mode of file based on ACL.
  884  */
  885 uint64_t
  886 zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
  887     uint64_t *pflags, uint64_t fuid, uint64_t fgid)
  888 {
  889         int             entry_type;
  890         mode_t          mode;
  891         mode_t          seen = 0;
  892         zfs_ace_hdr_t   *acep = NULL;
  893         uint64_t        who;
  894         uint16_t        iflags, type;
  895         uint32_t        access_mask;
  896         boolean_t       an_exec_denied = B_FALSE;
  897 
  898         mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
  899 
  900         while ((acep = zfs_acl_next_ace(aclp, acep, &who,
  901             &access_mask, &iflags, &type))) {
  902 
  903                 if (!zfs_acl_valid_ace_type(type, iflags))
  904                         continue;
  905 
  906                 entry_type = (iflags & ACE_TYPE_FLAGS);
  907 
  908                 /*
  909                  * Skip over any inherit_only ACEs
  910                  */
  911                 if (iflags & ACE_INHERIT_ONLY_ACE)
  912                         continue;
  913 
  914                 if (entry_type == ACE_OWNER || (entry_type == 0 &&
  915                     who == fuid)) {
  916                         if ((access_mask & ACE_READ_DATA) &&
  917                             (!(seen & S_IRUSR))) {
  918                                 seen |= S_IRUSR;
  919                                 if (type == ALLOW) {
  920                                         mode |= S_IRUSR;
  921                                 }
  922                         }
  923                         if ((access_mask & ACE_WRITE_DATA) &&
  924                             (!(seen & S_IWUSR))) {
  925                                 seen |= S_IWUSR;
  926                                 if (type == ALLOW) {
  927                                         mode |= S_IWUSR;
  928                                 }
  929                         }
  930                         if ((access_mask & ACE_EXECUTE) &&
  931                             (!(seen & S_IXUSR))) {
  932                                 seen |= S_IXUSR;
  933                                 if (type == ALLOW) {
  934                                         mode |= S_IXUSR;
  935                                 }
  936                         }
  937                 } else if (entry_type == OWNING_GROUP ||
  938                     (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
  939                         if ((access_mask & ACE_READ_DATA) &&
  940                             (!(seen & S_IRGRP))) {
  941                                 seen |= S_IRGRP;
  942                                 if (type == ALLOW) {
  943                                         mode |= S_IRGRP;
  944                                 }
  945                         }
  946                         if ((access_mask & ACE_WRITE_DATA) &&
  947                             (!(seen & S_IWGRP))) {
  948                                 seen |= S_IWGRP;
  949                                 if (type == ALLOW) {
  950                                         mode |= S_IWGRP;
  951                                 }
  952                         }
  953                         if ((access_mask & ACE_EXECUTE) &&
  954                             (!(seen & S_IXGRP))) {
  955                                 seen |= S_IXGRP;
  956                                 if (type == ALLOW) {
  957                                         mode |= S_IXGRP;
  958                                 }
  959                         }
  960                 } else if (entry_type == ACE_EVERYONE) {
  961                         if ((access_mask & ACE_READ_DATA)) {
  962                                 if (!(seen & S_IRUSR)) {
  963                                         seen |= S_IRUSR;
  964                                         if (type == ALLOW) {
  965                                                 mode |= S_IRUSR;
  966                                         }
  967                                 }
  968                                 if (!(seen & S_IRGRP)) {
  969                                         seen |= S_IRGRP;
  970                                         if (type == ALLOW) {
  971                                                 mode |= S_IRGRP;
  972                                         }
  973                                 }
  974                                 if (!(seen & S_IROTH)) {
  975                                         seen |= S_IROTH;
  976                                         if (type == ALLOW) {
  977                                                 mode |= S_IROTH;
  978                                         }
  979                                 }
  980                         }
  981                         if ((access_mask & ACE_WRITE_DATA)) {
  982                                 if (!(seen & S_IWUSR)) {
  983                                         seen |= S_IWUSR;
  984                                         if (type == ALLOW) {
  985                                                 mode |= S_IWUSR;
  986                                         }
  987                                 }
  988                                 if (!(seen & S_IWGRP)) {
  989                                         seen |= S_IWGRP;
  990                                         if (type == ALLOW) {
  991                                                 mode |= S_IWGRP;
  992                                         }
  993                                 }
  994                                 if (!(seen & S_IWOTH)) {
  995                                         seen |= S_IWOTH;
  996                                         if (type == ALLOW) {
  997                                                 mode |= S_IWOTH;
  998                                         }
  999                                 }
 1000                         }
 1001                         if ((access_mask & ACE_EXECUTE)) {
 1002                                 if (!(seen & S_IXUSR)) {
 1003                                         seen |= S_IXUSR;
 1004                                         if (type == ALLOW) {
 1005                                                 mode |= S_IXUSR;
 1006                                         }
 1007                                 }
 1008                                 if (!(seen & S_IXGRP)) {
 1009                                         seen |= S_IXGRP;
 1010                                         if (type == ALLOW) {
 1011                                                 mode |= S_IXGRP;
 1012                                         }
 1013                                 }
 1014                                 if (!(seen & S_IXOTH)) {
 1015                                         seen |= S_IXOTH;
 1016                                         if (type == ALLOW) {
 1017                                                 mode |= S_IXOTH;
 1018                                         }
 1019                                 }
 1020                         }
 1021                 } else {
 1022                         /*
 1023                          * Only care if this IDENTIFIER_GROUP or
 1024                          * USER ACE denies execute access to someone,
 1025                          * mode is not affected
 1026                          */
 1027                         if ((access_mask & ACE_EXECUTE) && type == DENY)
 1028                                 an_exec_denied = B_TRUE;
 1029                 }
 1030         }
 1031 
 1032         /*
 1033          * Failure to allow is effectively a deny, so execute permission
 1034          * is denied if it was never mentioned or if we explicitly
 1035          * weren't allowed it.
 1036          */
 1037         if (!an_exec_denied &&
 1038             ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
 1039             (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
 1040                 an_exec_denied = B_TRUE;
 1041 
 1042         if (an_exec_denied)
 1043                 *pflags &= ~ZFS_NO_EXECS_DENIED;
 1044         else
 1045                 *pflags |= ZFS_NO_EXECS_DENIED;
 1046 
 1047         return (mode);
 1048 }
 1049 
 1050 /*
 1051  * Read an external acl object.  If the intent is to modify, always
 1052  * create a new acl and leave any cached acl in place.
 1053  */
 1054 int
 1055 zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp,
 1056     boolean_t will_modify)
 1057 {
 1058         zfs_acl_t       *aclp;
 1059         int             aclsize;
 1060         int             acl_count;
 1061         zfs_acl_node_t  *aclnode;
 1062         zfs_acl_phys_t  znode_acl;
 1063         int             version;
 1064         int             error;
 1065 
 1066         ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 1067         if (zp->z_zfsvfs->z_replay == B_FALSE)
 1068                 ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
 1069 
 1070         if (zp->z_acl_cached && !will_modify) {
 1071                 *aclpp = zp->z_acl_cached;
 1072                 return (0);
 1073         }
 1074 
 1075         version = zfs_znode_acl_version(zp);
 1076 
 1077         if ((error = zfs_acl_znode_info(zp, &aclsize,
 1078             &acl_count, &znode_acl)) != 0) {
 1079                 goto done;
 1080         }
 1081 
 1082         aclp = zfs_acl_alloc(version);
 1083 
 1084         aclp->z_acl_count = acl_count;
 1085         aclp->z_acl_bytes = aclsize;
 1086 
 1087         aclnode = zfs_acl_node_alloc(aclsize);
 1088         aclnode->z_ace_count = aclp->z_acl_count;
 1089         aclnode->z_size = aclsize;
 1090 
 1091         if (!zp->z_is_sa) {
 1092                 if (znode_acl.z_acl_extern_obj) {
 1093                         error = dmu_read(zp->z_zfsvfs->z_os,
 1094                             znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
 1095                             aclnode->z_acldata, DMU_READ_PREFETCH);
 1096                 } else {
 1097                         memcpy(aclnode->z_acldata, znode_acl.z_ace_data,
 1098                             aclnode->z_size);
 1099                 }
 1100         } else {
 1101                 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
 1102                     aclnode->z_acldata, aclnode->z_size);
 1103         }
 1104 
 1105         if (error != 0) {
 1106                 zfs_acl_free(aclp);
 1107                 zfs_acl_node_free(aclnode);
 1108                 /* convert checksum errors into IO errors */
 1109                 if (error == ECKSUM)
 1110                         error = SET_ERROR(EIO);
 1111                 goto done;
 1112         }
 1113 
 1114         list_insert_head(&aclp->z_acl, aclnode);
 1115 
 1116         *aclpp = aclp;
 1117         if (!will_modify)
 1118                 zp->z_acl_cached = aclp;
 1119 done:
 1120         return (error);
 1121 }
 1122 
 1123 void
 1124 zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
 1125     boolean_t start, void *userdata)
 1126 {
 1127         (void) buflen;
 1128         zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
 1129 
 1130         if (start) {
 1131                 cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
 1132         } else {
 1133                 cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
 1134                     cb->cb_acl_node);
 1135         }
 1136         ASSERT3P(cb->cb_acl_node, !=, NULL);
 1137         *dataptr = cb->cb_acl_node->z_acldata;
 1138         *length = cb->cb_acl_node->z_size;
 1139 }
 1140 
 1141 int
 1142 zfs_acl_chown_setattr(znode_t *zp)
 1143 {
 1144         int error;
 1145         zfs_acl_t *aclp;
 1146 
 1147         if (zp->z_zfsvfs->z_replay == B_FALSE) {
 1148                 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 1149                 ASSERT_VOP_IN_SEQC(ZTOV(zp));
 1150         }
 1151         ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 1152 
 1153         if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0)
 1154                 zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
 1155                     &zp->z_pflags, zp->z_uid, zp->z_gid);
 1156         return (error);
 1157 }
 1158 
 1159 /*
 1160  * common code for setting ACLs.
 1161  *
 1162  * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
 1163  * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
 1164  * already checked the acl and knows whether to inherit.
 1165  */
 1166 int
 1167 zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
 1168 {
 1169         int                     error;
 1170         zfsvfs_t                *zfsvfs = zp->z_zfsvfs;
 1171         dmu_object_type_t       otype;
 1172         zfs_acl_locator_cb_t    locate = { 0 };
 1173         uint64_t                mode;
 1174         sa_bulk_attr_t          bulk[5];
 1175         uint64_t                ctime[2];
 1176         int                     count = 0;
 1177         zfs_acl_phys_t          acl_phys;
 1178 
 1179         if (zp->z_zfsvfs->z_replay == B_FALSE) {
 1180                 ASSERT_VOP_IN_SEQC(ZTOV(zp));
 1181         }
 1182 
 1183         mode = zp->z_mode;
 1184 
 1185         mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
 1186             zp->z_uid, zp->z_gid);
 1187 
 1188         zp->z_mode = mode;
 1189         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 1190             &mode, sizeof (mode));
 1191         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 1192             &zp->z_pflags, sizeof (zp->z_pflags));
 1193         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 1194             &ctime, sizeof (ctime));
 1195 
 1196         if (zp->z_acl_cached) {
 1197                 zfs_acl_free(zp->z_acl_cached);
 1198                 zp->z_acl_cached = NULL;
 1199         }
 1200 
 1201         /*
 1202          * Upgrade needed?
 1203          */
 1204         if (!zfsvfs->z_use_fuids) {
 1205                 otype = DMU_OT_OLDACL;
 1206         } else {
 1207                 if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
 1208                     (zfsvfs->z_version >= ZPL_VERSION_FUID))
 1209                         zfs_acl_xform(zp, aclp, cr);
 1210                 ASSERT3U(aclp->z_version, >=, ZFS_ACL_VERSION_FUID);
 1211                 otype = DMU_OT_ACL;
 1212         }
 1213 
 1214         /*
 1215          * Arrgh, we have to handle old on disk format
 1216          * as well as newer (preferred) SA format.
 1217          */
 1218 
 1219         if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
 1220                 locate.cb_aclp = aclp;
 1221                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
 1222                     zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
 1223                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
 1224                     NULL, &aclp->z_acl_count, sizeof (uint64_t));
 1225         } else { /* Painful legacy way */
 1226                 zfs_acl_node_t *aclnode;
 1227                 uint64_t off = 0;
 1228                 uint64_t aoid;
 1229 
 1230                 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
 1231                     &acl_phys, sizeof (acl_phys))) != 0)
 1232                         return (error);
 1233 
 1234                 aoid = acl_phys.z_acl_extern_obj;
 1235 
 1236                 if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 1237                         /*
 1238                          * If ACL was previously external and we are now
 1239                          * converting to new ACL format then release old
 1240                          * ACL object and create a new one.
 1241                          */
 1242                         if (aoid &&
 1243                             aclp->z_version != acl_phys.z_acl_version) {
 1244                                 error = dmu_object_free(zfsvfs->z_os, aoid, tx);
 1245                                 if (error)
 1246                                         return (error);
 1247                                 aoid = 0;
 1248                         }
 1249                         if (aoid == 0) {
 1250                                 aoid = dmu_object_alloc(zfsvfs->z_os,
 1251                                     otype, aclp->z_acl_bytes,
 1252                                     otype == DMU_OT_ACL ?
 1253                                     DMU_OT_SYSACL : DMU_OT_NONE,
 1254                                     otype == DMU_OT_ACL ?
 1255                                     DN_OLD_MAX_BONUSLEN : 0, tx);
 1256                         } else {
 1257                                 (void) dmu_object_set_blocksize(zfsvfs->z_os,
 1258                                     aoid, aclp->z_acl_bytes, 0, tx);
 1259                         }
 1260                         acl_phys.z_acl_extern_obj = aoid;
 1261                         for (aclnode = list_head(&aclp->z_acl); aclnode;
 1262                             aclnode = list_next(&aclp->z_acl, aclnode)) {
 1263                                 if (aclnode->z_ace_count == 0)
 1264                                         continue;
 1265                                 dmu_write(zfsvfs->z_os, aoid, off,
 1266                                     aclnode->z_size, aclnode->z_acldata, tx);
 1267                                 off += aclnode->z_size;
 1268                         }
 1269                 } else {
 1270                         void *start = acl_phys.z_ace_data;
 1271                         /*
 1272                          * Migrating back embedded?
 1273                          */
 1274                         if (acl_phys.z_acl_extern_obj) {
 1275                                 error = dmu_object_free(zfsvfs->z_os,
 1276                                     acl_phys.z_acl_extern_obj, tx);
 1277                                 if (error)
 1278                                         return (error);
 1279                                 acl_phys.z_acl_extern_obj = 0;
 1280                         }
 1281 
 1282                         for (aclnode = list_head(&aclp->z_acl); aclnode;
 1283                             aclnode = list_next(&aclp->z_acl, aclnode)) {
 1284                                 if (aclnode->z_ace_count == 0)
 1285                                         continue;
 1286                                 memcpy(start, aclnode->z_acldata,
 1287                                     aclnode->z_size);
 1288                                 start = (caddr_t)start + aclnode->z_size;
 1289                         }
 1290                 }
 1291                 /*
 1292                  * If Old version then swap count/bytes to match old
 1293                  * layout of znode_acl_phys_t.
 1294                  */
 1295                 if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
 1296                         acl_phys.z_acl_size = aclp->z_acl_count;
 1297                         acl_phys.z_acl_count = aclp->z_acl_bytes;
 1298                 } else {
 1299                         acl_phys.z_acl_size = aclp->z_acl_bytes;
 1300                         acl_phys.z_acl_count = aclp->z_acl_count;
 1301                 }
 1302                 acl_phys.z_acl_version = aclp->z_version;
 1303 
 1304                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
 1305                     &acl_phys, sizeof (acl_phys));
 1306         }
 1307 
 1308         /*
 1309          * Replace ACL wide bits, but first clear them.
 1310          */
 1311         zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
 1312 
 1313         zp->z_pflags |= aclp->z_hints;
 1314 
 1315         if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
 1316                 zp->z_pflags |= ZFS_ACL_TRIVIAL;
 1317 
 1318         zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
 1319         return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
 1320 }
 1321 
 1322 static void
 1323 zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t split, boolean_t trim,
 1324     zfs_acl_t *aclp)
 1325 {
 1326         void            *acep = NULL;
 1327         uint64_t        who;
 1328         int             new_count, new_bytes;
 1329         int             ace_size;
 1330         int             entry_type;
 1331         uint16_t        iflags, type;
 1332         uint32_t        access_mask;
 1333         zfs_acl_node_t  *newnode;
 1334         size_t          abstract_size = aclp->z_ops->ace_abstract_size();
 1335         void            *zacep;
 1336         boolean_t       isdir;
 1337         trivial_acl_t   masks;
 1338 
 1339         new_count = new_bytes = 0;
 1340 
 1341         isdir = (vtype == VDIR);
 1342 
 1343         acl_trivial_access_masks((mode_t)mode, isdir, &masks);
 1344 
 1345         newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
 1346 
 1347         zacep = newnode->z_acldata;
 1348         if (masks.allow0) {
 1349                 zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
 1350                 zacep = (void *)((uintptr_t)zacep + abstract_size);
 1351                 new_count++;
 1352                 new_bytes += abstract_size;
 1353         }
 1354         if (masks.deny1) {
 1355                 zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
 1356                 zacep = (void *)((uintptr_t)zacep + abstract_size);
 1357                 new_count++;
 1358                 new_bytes += abstract_size;
 1359         }
 1360         if (masks.deny2) {
 1361                 zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
 1362                 zacep = (void *)((uintptr_t)zacep + abstract_size);
 1363                 new_count++;
 1364                 new_bytes += abstract_size;
 1365         }
 1366 
 1367         while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
 1368             &iflags, &type))) {
 1369                 entry_type = (iflags & ACE_TYPE_FLAGS);
 1370                 /*
 1371                  * ACEs used to represent the file mode may be divided
 1372                  * into an equivalent pair of inherit-only and regular
 1373                  * ACEs, if they are inheritable.
 1374                  * Skip regular ACEs, which are replaced by the new mode.
 1375                  */
 1376                 if (split && (entry_type == ACE_OWNER ||
 1377                     entry_type == OWNING_GROUP ||
 1378                     entry_type == ACE_EVERYONE)) {
 1379                         if (!isdir || !(iflags &
 1380                             (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
 1381                                 continue;
 1382                         /*
 1383                          * We preserve owner@, group@, or @everyone
 1384                          * permissions, if they are inheritable, by
 1385                          * copying them to inherit_only ACEs. This
 1386                          * prevents inheritable permissions from being
 1387                          * altered along with the file mode.
 1388                          */
 1389                         iflags |= ACE_INHERIT_ONLY_ACE;
 1390                 }
 1391 
 1392                 /*
 1393                  * If this ACL has any inheritable ACEs, mark that in
 1394                  * the hints (which are later masked into the pflags)
 1395                  * so create knows to do inheritance.
 1396                  */
 1397                 if (isdir && (iflags &
 1398                     (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
 1399                         aclp->z_hints |= ZFS_INHERIT_ACE;
 1400 
 1401                 if ((type != ALLOW && type != DENY) ||
 1402                     (iflags & ACE_INHERIT_ONLY_ACE)) {
 1403                         switch (type) {
 1404                         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 1405                         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 1406                         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 1407                         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 1408                                 aclp->z_hints |= ZFS_ACL_OBJ_ACE;
 1409                                 break;
 1410                         }
 1411                 } else {
 1412                         /*
 1413                          * Limit permissions granted by ACEs to be no greater
 1414                          * than permissions of the requested group mode.
 1415                          * Applies when the "aclmode" property is set to
 1416                          * "groupmask".
 1417                          */
 1418                         if ((type == ALLOW) && trim)
 1419                                 access_mask &= masks.group;
 1420                 }
 1421                 zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
 1422                 ace_size = aclp->z_ops->ace_size(acep);
 1423                 zacep = (void *)((uintptr_t)zacep + ace_size);
 1424                 new_count++;
 1425                 new_bytes += ace_size;
 1426         }
 1427         zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
 1428         zacep = (void *)((uintptr_t)zacep + abstract_size);
 1429         zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
 1430         zacep = (void *)((uintptr_t)zacep + abstract_size);
 1431         zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
 1432 
 1433         new_count += 3;
 1434         new_bytes += abstract_size * 3;
 1435         zfs_acl_release_nodes(aclp);
 1436         aclp->z_acl_count = new_count;
 1437         aclp->z_acl_bytes = new_bytes;
 1438         newnode->z_ace_count = new_count;
 1439         newnode->z_size = new_bytes;
 1440         list_insert_tail(&aclp->z_acl, newnode);
 1441 }
 1442 
 1443 int
 1444 zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
 1445 {
 1446         int error = 0;
 1447 
 1448         mutex_enter(&zp->z_acl_lock);
 1449         if (zp->z_zfsvfs->z_replay == B_FALSE)
 1450                 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 1451         if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
 1452                 *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
 1453         else
 1454                 error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
 1455 
 1456         if (error == 0) {
 1457                 (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
 1458                 zfs_acl_chmod(ZTOV(zp)->v_type, mode, B_TRUE,
 1459                     (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
 1460         }
 1461         mutex_exit(&zp->z_acl_lock);
 1462 
 1463         return (error);
 1464 }
 1465 
 1466 /*
 1467  * Should ACE be inherited?
 1468  */
 1469 static int
 1470 zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
 1471 {
 1472         int     iflags = (acep_flags & 0xf);
 1473 
 1474         if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
 1475                 return (1);
 1476         else if (iflags & ACE_FILE_INHERIT_ACE)
 1477                 return (!((vtype == VDIR) &&
 1478                     (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
 1479         return (0);
 1480 }
 1481 
 1482 /*
 1483  * inherit inheritable ACEs from parent
 1484  */
 1485 static zfs_acl_t *
 1486 zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
 1487     uint64_t mode, boolean_t *need_chmod)
 1488 {
 1489         void            *pacep = NULL;
 1490         void            *acep;
 1491         zfs_acl_node_t  *aclnode;
 1492         zfs_acl_t       *aclp = NULL;
 1493         uint64_t        who;
 1494         uint32_t        access_mask;
 1495         uint16_t        iflags, newflags, type;
 1496         size_t          ace_size;
 1497         void            *data1, *data2;
 1498         size_t          data1sz, data2sz;
 1499         uint_t          aclinherit;
 1500         boolean_t       isdir = (vtype == VDIR);
 1501         boolean_t       isreg = (vtype == VREG);
 1502 
 1503         *need_chmod = B_TRUE;
 1504 
 1505         aclp = zfs_acl_alloc(paclp->z_version);
 1506         aclinherit = zfsvfs->z_acl_inherit;
 1507         if (aclinherit == ZFS_ACL_DISCARD || vtype == VLNK)
 1508                 return (aclp);
 1509 
 1510         while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
 1511             &access_mask, &iflags, &type))) {
 1512 
 1513                 /*
 1514                  * don't inherit bogus ACEs
 1515                  */
 1516                 if (!zfs_acl_valid_ace_type(type, iflags))
 1517                         continue;
 1518 
 1519                 /*
 1520                  * Check if ACE is inheritable by this vnode
 1521                  */
 1522                 if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
 1523                     !zfs_ace_can_use(vtype, iflags))
 1524                         continue;
 1525 
 1526                 /*
 1527                  * If owner@, group@, or everyone@ inheritable
 1528                  * then zfs_acl_chmod() isn't needed.
 1529                  */
 1530                 if ((aclinherit == ZFS_ACL_PASSTHROUGH ||
 1531                     aclinherit == ZFS_ACL_PASSTHROUGH_X) &&
 1532                     ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
 1533                     ((iflags & OWNING_GROUP) == OWNING_GROUP)) &&
 1534                     (isreg || (isdir && (iflags & ACE_DIRECTORY_INHERIT_ACE))))
 1535                         *need_chmod = B_FALSE;
 1536 
 1537                 /*
 1538                  * Strip inherited execute permission from file if
 1539                  * not in mode
 1540                  */
 1541                 if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
 1542                     !isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
 1543                         access_mask &= ~ACE_EXECUTE;
 1544                 }
 1545 
 1546                 /*
 1547                  * Strip write_acl and write_owner from permissions
 1548                  * when inheriting an ACE
 1549                  */
 1550                 if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
 1551                         access_mask &= ~RESTRICTED_CLEAR;
 1552                 }
 1553 
 1554                 ace_size = aclp->z_ops->ace_size(pacep);
 1555                 aclnode = zfs_acl_node_alloc(ace_size);
 1556                 list_insert_tail(&aclp->z_acl, aclnode);
 1557                 acep = aclnode->z_acldata;
 1558 
 1559                 zfs_set_ace(aclp, acep, access_mask, type,
 1560                     who, iflags|ACE_INHERITED_ACE);
 1561 
 1562                 /*
 1563                  * Copy special opaque data if any
 1564                  */
 1565                 if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {
 1566                         data2sz = aclp->z_ops->ace_data(acep, &data2);
 1567                         VERIFY3U(data2sz, ==, data1sz);
 1568                         memcpy(data2, data1, data2sz);
 1569                 }
 1570 
 1571                 aclp->z_acl_count++;
 1572                 aclnode->z_ace_count++;
 1573                 aclp->z_acl_bytes += aclnode->z_size;
 1574                 newflags = aclp->z_ops->ace_flags_get(acep);
 1575 
 1576                 /*
 1577                  * If ACE is not to be inherited further, or if the vnode is
 1578                  * not a directory, remove all inheritance flags
 1579                  */
 1580                 if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
 1581                         newflags &= ~ALL_INHERIT;
 1582                         aclp->z_ops->ace_flags_set(acep,
 1583                             newflags|ACE_INHERITED_ACE);
 1584                         continue;
 1585                 }
 1586 
 1587                 /*
 1588                  * This directory has an inheritable ACE
 1589                  */
 1590                 aclp->z_hints |= ZFS_INHERIT_ACE;
 1591 
 1592                 /*
 1593                  * If only FILE_INHERIT is set then turn on
 1594                  * inherit_only
 1595                  */
 1596                 if ((iflags & (ACE_FILE_INHERIT_ACE |
 1597                     ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
 1598                         newflags |= ACE_INHERIT_ONLY_ACE;
 1599                         aclp->z_ops->ace_flags_set(acep,
 1600                             newflags|ACE_INHERITED_ACE);
 1601                 } else {
 1602                         newflags &= ~ACE_INHERIT_ONLY_ACE;
 1603                         aclp->z_ops->ace_flags_set(acep,
 1604                             newflags|ACE_INHERITED_ACE);
 1605                 }
 1606         }
 1607         if (zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
 1608             aclp->z_acl_count != 0) {
 1609                 *need_chmod = B_FALSE;
 1610         }
 1611 
 1612         return (aclp);
 1613 }
 1614 
 1615 /*
 1616  * Create file system object initial permissions
 1617  * including inheritable ACEs.
 1618  * Also, create FUIDs for owner and group.
 1619  */
 1620 int
 1621 zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
 1622     vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids, zuserns_t *mnt_ns)
 1623 {
 1624         int             error;
 1625         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
 1626         zfs_acl_t       *paclp;
 1627         gid_t           gid;
 1628         boolean_t       need_chmod = B_TRUE;
 1629         boolean_t       trim = B_FALSE;
 1630         boolean_t       inherited = B_FALSE;
 1631 
 1632         if ((flag & IS_ROOT_NODE) == 0) {
 1633                 if (zfsvfs->z_replay == B_FALSE)
 1634                         ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
 1635         } else
 1636                 ASSERT3P(dzp->z_vnode, ==, NULL);
 1637         memset(acl_ids, 0, sizeof (zfs_acl_ids_t));
 1638         acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
 1639 
 1640         if (vsecp)
 1641                 if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
 1642                     &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
 1643                         return (error);
 1644         /*
 1645          * Determine uid and gid.
 1646          */
 1647         if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
 1648             ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
 1649                 acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
 1650                     (uint64_t)vap->va_uid, cr,
 1651                     ZFS_OWNER, &acl_ids->z_fuidp);
 1652                 acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
 1653                     (uint64_t)vap->va_gid, cr,
 1654                     ZFS_GROUP, &acl_ids->z_fuidp);
 1655                 gid = vap->va_gid;
 1656         } else {
 1657                 uid_t id = crgetuid(cr);
 1658                 if (IS_EPHEMERAL(id))
 1659                         id = UID_NOBODY;
 1660                 acl_ids->z_fuid = (uint64_t)id;
 1661                 acl_ids->z_fgid = 0;
 1662                 if (vap->va_mask & AT_GID)  {
 1663                         acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
 1664                             (uint64_t)vap->va_gid,
 1665                             cr, ZFS_GROUP, &acl_ids->z_fuidp);
 1666                         gid = vap->va_gid;
 1667                         if (acl_ids->z_fgid != dzp->z_gid &&
 1668                             !groupmember(vap->va_gid, cr) &&
 1669                             secpolicy_vnode_create_gid(cr) != 0)
 1670                                 acl_ids->z_fgid = 0;
 1671                 }
 1672                 if (acl_ids->z_fgid == 0) {
 1673                         const char      *domain;
 1674                         uint32_t        rid;
 1675 
 1676                         acl_ids->z_fgid = dzp->z_gid;
 1677                         gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
 1678                             cr, ZFS_GROUP);
 1679 
 1680                         if (zfsvfs->z_use_fuids &&
 1681                             IS_EPHEMERAL(acl_ids->z_fgid)) {
 1682                                 domain =
 1683                                     zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx,
 1684                                     FUID_INDEX(acl_ids->z_fgid));
 1685                                 rid = FUID_RID(acl_ids->z_fgid);
 1686                                 zfs_fuid_node_add(&acl_ids->z_fuidp,
 1687                                     domain, rid, FUID_INDEX(acl_ids->z_fgid),
 1688                                     acl_ids->z_fgid, ZFS_GROUP);
 1689                         }
 1690                 }
 1691         }
 1692 
 1693         /*
 1694          * If we're creating a directory, and the parent directory has the
 1695          * set-GID bit set, set in on the new directory.
 1696          * Otherwise, if the user is neither privileged nor a member of the
 1697          * file's new group, clear the file's set-GID bit.
 1698          */
 1699 
 1700         if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
 1701             (vap->va_type == VDIR)) {
 1702                 acl_ids->z_mode |= S_ISGID;
 1703         } else {
 1704                 if ((acl_ids->z_mode & S_ISGID) &&
 1705                     secpolicy_vnode_setids_setgids(ZTOV(dzp), cr, gid) != 0)
 1706                         acl_ids->z_mode &= ~S_ISGID;
 1707         }
 1708 
 1709         if (acl_ids->z_aclp == NULL) {
 1710                 mutex_enter(&dzp->z_acl_lock);
 1711                 if (!(flag & IS_ROOT_NODE) &&
 1712                     (dzp->z_pflags & ZFS_INHERIT_ACE) &&
 1713                     !(dzp->z_pflags & ZFS_XATTR)) {
 1714                         VERIFY0(zfs_acl_node_read(dzp, B_TRUE,
 1715                             &paclp, B_FALSE));
 1716                         acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
 1717                             vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
 1718                         inherited = B_TRUE;
 1719                 } else {
 1720                         acl_ids->z_aclp =
 1721                             zfs_acl_alloc(zfs_acl_version_zp(dzp));
 1722                         acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
 1723                 }
 1724                 mutex_exit(&dzp->z_acl_lock);
 1725 
 1726                 if (need_chmod) {
 1727                         if (vap->va_type == VDIR)
 1728                                 acl_ids->z_aclp->z_hints |=
 1729                                     ZFS_ACL_AUTO_INHERIT;
 1730 
 1731                         if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
 1732                             zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
 1733                             zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
 1734                                 trim = B_TRUE;
 1735                         zfs_acl_chmod(vap->va_type, acl_ids->z_mode, B_FALSE,
 1736                             trim, acl_ids->z_aclp);
 1737                 }
 1738         }
 1739 
 1740         if (inherited || vsecp) {
 1741                 acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
 1742                     acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
 1743                     acl_ids->z_fuid, acl_ids->z_fgid);
 1744                 if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
 1745                         acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
 1746         }
 1747 
 1748         return (0);
 1749 }
 1750 
 1751 /*
 1752  * Free ACL and fuid_infop, but not the acl_ids structure
 1753  */
 1754 void
 1755 zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
 1756 {
 1757         if (acl_ids->z_aclp)
 1758                 zfs_acl_free(acl_ids->z_aclp);
 1759         if (acl_ids->z_fuidp)
 1760                 zfs_fuid_info_free(acl_ids->z_fuidp);
 1761         acl_ids->z_aclp = NULL;
 1762         acl_ids->z_fuidp = NULL;
 1763 }
 1764 
 1765 boolean_t
 1766 zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)
 1767 {
 1768         return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||
 1769             zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||
 1770             (projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&
 1771             zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));
 1772 }
 1773 
 1774 /*
 1775  * Retrieve a file's ACL
 1776  */
 1777 int
 1778 zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
 1779 {
 1780         zfs_acl_t       *aclp;
 1781         ulong_t         mask;
 1782         int             error;
 1783         int             count = 0;
 1784         int             largeace = 0;
 1785 
 1786         mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
 1787             VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
 1788 
 1789         if (mask == 0)
 1790                 return (SET_ERROR(ENOSYS));
 1791 
 1792         if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr, NULL)))
 1793                 return (error);
 1794 
 1795         mutex_enter(&zp->z_acl_lock);
 1796 
 1797         if (zp->z_zfsvfs->z_replay == B_FALSE)
 1798                 ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
 1799         error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
 1800         if (error != 0) {
 1801                 mutex_exit(&zp->z_acl_lock);
 1802                 return (error);
 1803         }
 1804 
 1805         /*
 1806          * Scan ACL to determine number of ACEs
 1807          */
 1808         if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
 1809                 void *zacep = NULL;
 1810                 uint64_t who;
 1811                 uint32_t access_mask;
 1812                 uint16_t type, iflags;
 1813 
 1814                 while ((zacep = zfs_acl_next_ace(aclp, zacep,
 1815                     &who, &access_mask, &iflags, &type))) {
 1816                         switch (type) {
 1817                         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 1818                         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 1819                         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 1820                         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 1821                                 largeace++;
 1822                                 continue;
 1823                         default:
 1824                                 count++;
 1825                         }
 1826                 }
 1827                 vsecp->vsa_aclcnt = count;
 1828         } else
 1829                 count = (int)aclp->z_acl_count;
 1830 
 1831         if (mask & VSA_ACECNT) {
 1832                 vsecp->vsa_aclcnt = count;
 1833         }
 1834 
 1835         if (mask & VSA_ACE) {
 1836                 size_t aclsz;
 1837 
 1838                 aclsz = count * sizeof (ace_t) +
 1839                     sizeof (ace_object_t) * largeace;
 1840 
 1841                 vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
 1842                 vsecp->vsa_aclentsz = aclsz;
 1843 
 1844                 if (aclp->z_version == ZFS_ACL_VERSION_FUID)
 1845                         zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
 1846                             vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
 1847                 else {
 1848                         zfs_acl_node_t *aclnode;
 1849                         void *start = vsecp->vsa_aclentp;
 1850 
 1851                         for (aclnode = list_head(&aclp->z_acl); aclnode;
 1852                             aclnode = list_next(&aclp->z_acl, aclnode)) {
 1853                                 memcpy(start, aclnode->z_acldata,
 1854                                     aclnode->z_size);
 1855                                 start = (caddr_t)start + aclnode->z_size;
 1856                         }
 1857                         ASSERT3U((caddr_t)start - (caddr_t)vsecp->vsa_aclentp,
 1858                             ==, aclp->z_acl_bytes);
 1859                 }
 1860         }
 1861         if (mask & VSA_ACE_ACLFLAGS) {
 1862                 vsecp->vsa_aclflags = 0;
 1863                 if (zp->z_pflags & ZFS_ACL_DEFAULTED)
 1864                         vsecp->vsa_aclflags |= ACL_DEFAULTED;
 1865                 if (zp->z_pflags & ZFS_ACL_PROTECTED)
 1866                         vsecp->vsa_aclflags |= ACL_PROTECTED;
 1867                 if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
 1868                         vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
 1869         }
 1870 
 1871         mutex_exit(&zp->z_acl_lock);
 1872 
 1873         return (0);
 1874 }
 1875 
 1876 int
 1877 zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_type,
 1878     vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
 1879 {
 1880         zfs_acl_t *aclp;
 1881         zfs_acl_node_t *aclnode;
 1882         int aclcnt = vsecp->vsa_aclcnt;
 1883         int error;
 1884 
 1885         if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
 1886                 return (SET_ERROR(EINVAL));
 1887 
 1888         aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
 1889 
 1890         aclp->z_hints = 0;
 1891         aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
 1892         if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
 1893                 if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
 1894                     (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
 1895                     aclcnt, &aclnode->z_size)) != 0) {
 1896                         zfs_acl_free(aclp);
 1897                         zfs_acl_node_free(aclnode);
 1898                         return (error);
 1899                 }
 1900         } else {
 1901                 if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
 1902                     vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
 1903                     &aclnode->z_size, fuidp, cr)) != 0) {
 1904                         zfs_acl_free(aclp);
 1905                         zfs_acl_node_free(aclnode);
 1906                         return (error);
 1907                 }
 1908         }
 1909         aclp->z_acl_bytes = aclnode->z_size;
 1910         aclnode->z_ace_count = aclcnt;
 1911         aclp->z_acl_count = aclcnt;
 1912         list_insert_head(&aclp->z_acl, aclnode);
 1913 
 1914         /*
 1915          * If flags are being set then add them to z_hints
 1916          */
 1917         if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
 1918                 if (vsecp->vsa_aclflags & ACL_PROTECTED)
 1919                         aclp->z_hints |= ZFS_ACL_PROTECTED;
 1920                 if (vsecp->vsa_aclflags & ACL_DEFAULTED)
 1921                         aclp->z_hints |= ZFS_ACL_DEFAULTED;
 1922                 if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
 1923                         aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
 1924         }
 1925 
 1926         *zaclp = aclp;
 1927 
 1928         return (0);
 1929 }
 1930 
 1931 /*
 1932  * Set a file's ACL
 1933  */
 1934 int
 1935 zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
 1936 {
 1937         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
 1938         zilog_t         *zilog = zfsvfs->z_log;
 1939         ulong_t         mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
 1940         dmu_tx_t        *tx;
 1941         int             error;
 1942         zfs_acl_t       *aclp;
 1943         zfs_fuid_info_t *fuidp = NULL;
 1944         boolean_t       fuid_dirtied;
 1945         uint64_t        acl_obj;
 1946 
 1947         if (zp->z_zfsvfs->z_replay == B_FALSE)
 1948                 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 1949         if (mask == 0)
 1950                 return (SET_ERROR(ENOSYS));
 1951 
 1952         if (zp->z_pflags & ZFS_IMMUTABLE)
 1953                 return (SET_ERROR(EPERM));
 1954 
 1955         if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr, NULL)))
 1956                 return (error);
 1957 
 1958         error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
 1959             &aclp);
 1960         if (error)
 1961                 return (error);
 1962 
 1963         /*
 1964          * If ACL wide flags aren't being set then preserve any
 1965          * existing flags.
 1966          */
 1967         if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
 1968                 aclp->z_hints |=
 1969                     (zp->z_pflags & V4_ACL_WIDE_FLAGS);
 1970         }
 1971 top:
 1972         mutex_enter(&zp->z_acl_lock);
 1973 
 1974         tx = dmu_tx_create(zfsvfs->z_os);
 1975 
 1976         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 1977 
 1978         fuid_dirtied = zfsvfs->z_fuid_dirty;
 1979         if (fuid_dirtied)
 1980                 zfs_fuid_txhold(zfsvfs, tx);
 1981 
 1982         /*
 1983          * If old version and ACL won't fit in bonus and we aren't
 1984          * upgrading then take out necessary DMU holds
 1985          */
 1986 
 1987         if ((acl_obj = zfs_external_acl(zp)) != 0) {
 1988                 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
 1989                     zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
 1990                         dmu_tx_hold_free(tx, acl_obj, 0,
 1991                             DMU_OBJECT_END);
 1992                         dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 1993                             aclp->z_acl_bytes);
 1994                 } else {
 1995                         dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
 1996                 }
 1997         } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 1998                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
 1999         }
 2000 
 2001         zfs_sa_upgrade_txholds(tx, zp);
 2002         error = dmu_tx_assign(tx, TXG_NOWAIT);
 2003         if (error) {
 2004                 mutex_exit(&zp->z_acl_lock);
 2005 
 2006                 if (error == ERESTART) {
 2007                         dmu_tx_wait(tx);
 2008                         dmu_tx_abort(tx);
 2009                         goto top;
 2010                 }
 2011                 dmu_tx_abort(tx);
 2012                 zfs_acl_free(aclp);
 2013                 return (error);
 2014         }
 2015 
 2016         error = zfs_aclset_common(zp, aclp, cr, tx);
 2017         ASSERT0(error);
 2018         ASSERT3P(zp->z_acl_cached, ==, NULL);
 2019         zp->z_acl_cached = aclp;
 2020 
 2021         if (fuid_dirtied)
 2022                 zfs_fuid_sync(zfsvfs, tx);
 2023 
 2024         zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
 2025 
 2026         if (fuidp)
 2027                 zfs_fuid_info_free(fuidp);
 2028         dmu_tx_commit(tx);
 2029         mutex_exit(&zp->z_acl_lock);
 2030 
 2031         return (error);
 2032 }
 2033 
 2034 /*
 2035  * Check accesses of interest (AoI) against attributes of the dataset
 2036  * such as read-only.  Returns zero if no AoI conflict with dataset
 2037  * attributes, otherwise an appropriate errno is returned.
 2038  */
 2039 static int
 2040 zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
 2041 {
 2042         if ((v4_mode & WRITE_MASK) &&
 2043             (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
 2044             (!IS_DEVVP(ZTOV(zp)) || (v4_mode & WRITE_MASK_ATTRS))) {
 2045                 return (SET_ERROR(EROFS));
 2046         }
 2047 
 2048         /*
 2049          * Intentionally allow ZFS_READONLY through here.
 2050          * See zfs_zaccess_common().
 2051          */
 2052         if ((v4_mode & WRITE_MASK_DATA) &&
 2053             (zp->z_pflags & ZFS_IMMUTABLE)) {
 2054                 return (SET_ERROR(EPERM));
 2055         }
 2056 
 2057         /*
 2058          * In FreeBSD we allow to modify directory's content is ZFS_NOUNLINK
 2059          * (sunlnk) is set. We just don't allow directory removal, which is
 2060          * handled in zfs_zaccess_delete().
 2061          */
 2062         if ((v4_mode & ACE_DELETE) &&
 2063             (zp->z_pflags & ZFS_NOUNLINK)) {
 2064                 return (EPERM);
 2065         }
 2066 
 2067         if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
 2068             (zp->z_pflags & ZFS_AV_QUARANTINED))) {
 2069                 return (SET_ERROR(EACCES));
 2070         }
 2071 
 2072         return (0);
 2073 }
 2074 
 2075 /*
 2076  * The primary usage of this function is to loop through all of the
 2077  * ACEs in the znode, determining what accesses of interest (AoI) to
 2078  * the caller are allowed or denied.  The AoI are expressed as bits in
 2079  * the working_mode parameter.  As each ACE is processed, bits covered
 2080  * by that ACE are removed from the working_mode.  This removal
 2081  * facilitates two things.  The first is that when the working mode is
 2082  * empty (= 0), we know we've looked at all the AoI. The second is
 2083  * that the ACE interpretation rules don't allow a later ACE to undo
 2084  * something granted or denied by an earlier ACE.  Removing the
 2085  * discovered access or denial enforces this rule.  At the end of
 2086  * processing the ACEs, all AoI that were found to be denied are
 2087  * placed into the working_mode, giving the caller a mask of denied
 2088  * accesses.  Returns:
 2089  *      0               if all AoI granted
 2090  *      EACCESS         if the denied mask is non-zero
 2091  *      other error     if abnormal failure (e.g., IO error)
 2092  *
 2093  * A secondary usage of the function is to determine if any of the
 2094  * AoI are granted.  If an ACE grants any access in
 2095  * the working_mode, we immediately short circuit out of the function.
 2096  * This mode is chosen by setting anyaccess to B_TRUE.  The
 2097  * working_mode is not a denied access mask upon exit if the function
 2098  * is used in this manner.
 2099  */
 2100 static int
 2101 zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
 2102     boolean_t anyaccess, cred_t *cr)
 2103 {
 2104         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
 2105         zfs_acl_t       *aclp;
 2106         int             error;
 2107         uid_t           uid = crgetuid(cr);
 2108         uint64_t        who;
 2109         uint16_t        type, iflags;
 2110         uint16_t        entry_type;
 2111         uint32_t        access_mask;
 2112         uint32_t        deny_mask = 0;
 2113         zfs_ace_hdr_t   *acep = NULL;
 2114         boolean_t       checkit;
 2115         uid_t           gowner;
 2116         uid_t           fowner;
 2117 
 2118         zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
 2119 
 2120         mutex_enter(&zp->z_acl_lock);
 2121 
 2122         if (zp->z_zfsvfs->z_replay == B_FALSE)
 2123                 ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
 2124         error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
 2125         if (error != 0) {
 2126                 mutex_exit(&zp->z_acl_lock);
 2127                 return (error);
 2128         }
 2129 
 2130         ASSERT3P(zp->z_acl_cached, !=, NULL);
 2131 
 2132         while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
 2133             &iflags, &type))) {
 2134                 uint32_t mask_matched;
 2135 
 2136                 if (!zfs_acl_valid_ace_type(type, iflags))
 2137                         continue;
 2138 
 2139                 if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
 2140                         continue;
 2141 
 2142                 /* Skip ACE if it does not affect any AoI */
 2143                 mask_matched = (access_mask & *working_mode);
 2144                 if (!mask_matched)
 2145                         continue;
 2146 
 2147                 entry_type = (iflags & ACE_TYPE_FLAGS);
 2148 
 2149                 checkit = B_FALSE;
 2150 
 2151                 switch (entry_type) {
 2152                 case ACE_OWNER:
 2153                         if (uid == fowner)
 2154                                 checkit = B_TRUE;
 2155                         break;
 2156                 case OWNING_GROUP:
 2157                         who = gowner;
 2158                         zfs_fallthrough;
 2159                 case ACE_IDENTIFIER_GROUP:
 2160                         checkit = zfs_groupmember(zfsvfs, who, cr);
 2161                         break;
 2162                 case ACE_EVERYONE:
 2163                         checkit = B_TRUE;
 2164                         break;
 2165 
 2166                 /* USER Entry */
 2167                 default:
 2168                         if (entry_type == 0) {
 2169                                 uid_t newid;
 2170 
 2171                                 newid = zfs_fuid_map_id(zfsvfs, who, cr,
 2172                                     ZFS_ACE_USER);
 2173                                 if (newid !=  UID_NOBODY &&
 2174                                     uid == newid)
 2175                                         checkit = B_TRUE;
 2176                                 break;
 2177                         } else {
 2178                                 mutex_exit(&zp->z_acl_lock);
 2179                                 return (SET_ERROR(EIO));
 2180                         }
 2181                 }
 2182 
 2183                 if (checkit) {
 2184                         if (type == DENY) {
 2185                                 DTRACE_PROBE3(zfs__ace__denies,
 2186                                     znode_t *, zp,
 2187                                     zfs_ace_hdr_t *, acep,
 2188                                     uint32_t, mask_matched);
 2189                                 deny_mask |= mask_matched;
 2190                         } else {
 2191                                 DTRACE_PROBE3(zfs__ace__allows,
 2192                                     znode_t *, zp,
 2193                                     zfs_ace_hdr_t *, acep,
 2194                                     uint32_t, mask_matched);
 2195                                 if (anyaccess) {
 2196                                         mutex_exit(&zp->z_acl_lock);
 2197                                         return (0);
 2198                                 }
 2199                         }
 2200                         *working_mode &= ~mask_matched;
 2201                 }
 2202 
 2203                 /* Are we done? */
 2204                 if (*working_mode == 0)
 2205                         break;
 2206         }
 2207 
 2208         mutex_exit(&zp->z_acl_lock);
 2209 
 2210         /* Put the found 'denies' back on the working mode */
 2211         if (deny_mask) {
 2212                 *working_mode |= deny_mask;
 2213                 return (SET_ERROR(EACCES));
 2214         } else if (*working_mode) {
 2215                 return (-1);
 2216         }
 2217 
 2218         return (0);
 2219 }
 2220 
 2221 /*
 2222  * Return true if any access whatsoever granted, we don't actually
 2223  * care what access is granted.
 2224  */
 2225 boolean_t
 2226 zfs_has_access(znode_t *zp, cred_t *cr)
 2227 {
 2228         uint32_t have = ACE_ALL_PERMS;
 2229 
 2230         if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
 2231                 uid_t owner;
 2232 
 2233                 owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
 2234                 return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
 2235         }
 2236         return (B_TRUE);
 2237 }
 2238 
 2239 static int
 2240 zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
 2241     boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
 2242 {
 2243         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 2244         int err;
 2245 
 2246         *working_mode = v4_mode;
 2247         *check_privs = B_TRUE;
 2248 
 2249         /*
 2250          * Short circuit empty requests
 2251          */
 2252         if (v4_mode == 0 || zfsvfs->z_replay) {
 2253                 *working_mode = 0;
 2254                 return (0);
 2255         }
 2256 
 2257         if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
 2258                 *check_privs = B_FALSE;
 2259                 return (err);
 2260         }
 2261 
 2262         /*
 2263          * The caller requested that the ACL check be skipped.  This
 2264          * would only happen if the caller checked VOP_ACCESS() with a
 2265          * 32 bit ACE mask and already had the appropriate permissions.
 2266          */
 2267         if (skipaclchk) {
 2268                 *working_mode = 0;
 2269                 return (0);
 2270         }
 2271 
 2272         /*
 2273          * Note: ZFS_READONLY represents the "DOS R/O" attribute.
 2274          * When that flag is set, we should behave as if write access
 2275          * were not granted by anything in the ACL.  In particular:
 2276          * We _must_ allow writes after opening the file r/w, then
 2277          * setting the DOS R/O attribute, and writing some more.
 2278          * (Similar to how you can write after fchmod(fd, 0444).)
 2279          *
 2280          * Therefore ZFS_READONLY is ignored in the dataset check
 2281          * above, and checked here as if part of the ACL check.
 2282          * Also note: DOS R/O is ignored for directories.
 2283          */
 2284         if ((v4_mode & WRITE_MASK_DATA) &&
 2285             (ZTOV(zp)->v_type != VDIR) &&
 2286             (zp->z_pflags & ZFS_READONLY)) {
 2287                 return (SET_ERROR(EPERM));
 2288         }
 2289 
 2290         return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
 2291 }
 2292 
 2293 static int
 2294 zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
 2295     cred_t *cr)
 2296 {
 2297         if (*working_mode != ACE_WRITE_DATA)
 2298                 return (SET_ERROR(EACCES));
 2299 
 2300         return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
 2301             check_privs, B_FALSE, cr));
 2302 }
 2303 
 2304 /*
 2305  * Check if VEXEC is allowed.
 2306  *
 2307  * This routine is based on zfs_fastaccesschk_execute which has slowpath
 2308  * calling zfs_zaccess. This would be incorrect on FreeBSD (see
 2309  * zfs_freebsd_access for the difference). Thus this variant let's the
 2310  * caller handle the slowpath (if necessary).
 2311  *
 2312  * On top of that we perform a lockless check for ZFS_NO_EXECS_DENIED.
 2313  *
 2314  * Safe access to znode_t is provided by the vnode lock.
 2315  */
 2316 int
 2317 zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
 2318 {
 2319         boolean_t is_attr;
 2320 
 2321         if (zdp->z_pflags & ZFS_AV_QUARANTINED)
 2322                 return (1);
 2323 
 2324         is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
 2325             (ZTOV(zdp)->v_type == VDIR));
 2326         if (is_attr)
 2327                 return (1);
 2328 
 2329         if (zdp->z_pflags & ZFS_NO_EXECS_DENIED)
 2330                 return (0);
 2331 
 2332         return (1);
 2333 }
 2334 
 2335 
 2336 /*
 2337  * Determine whether Access should be granted/denied.
 2338  *
 2339  * The least priv subsystem is always consulted as a basic privilege
 2340  * can define any form of access.
 2341  */
 2342 int
 2343 zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr,
 2344     zuserns_t *mnt_ns)
 2345 {
 2346         uint32_t        working_mode;
 2347         int             error;
 2348         int             is_attr;
 2349         boolean_t       check_privs;
 2350         znode_t         *xzp = NULL;
 2351         znode_t         *check_zp = zp;
 2352         mode_t          needed_bits;
 2353         uid_t           owner;
 2354 
 2355         is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
 2356 
 2357         /*
 2358          * In FreeBSD, we don't care about permissions of individual ADS.
 2359          * Note that not checking them is not just an optimization - without
 2360          * this shortcut, EA operations may bogusly fail with EACCES.
 2361          */
 2362         if (zp->z_pflags & ZFS_XATTR)
 2363                 return (0);
 2364 
 2365         owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
 2366 
 2367         /*
 2368          * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
 2369          * in needed_bits.  Map the bits mapped by working_mode (currently
 2370          * missing) in missing_bits.
 2371          * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
 2372          * needed_bits.
 2373          */
 2374         needed_bits = 0;
 2375 
 2376         working_mode = mode;
 2377         if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
 2378             owner == crgetuid(cr))
 2379                 working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
 2380 
 2381         if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
 2382             ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
 2383                 needed_bits |= VREAD;
 2384         if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
 2385             ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
 2386                 needed_bits |= VWRITE;
 2387         if (working_mode & ACE_EXECUTE)
 2388                 needed_bits |= VEXEC;
 2389 
 2390         if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
 2391             &check_privs, skipaclchk, cr)) == 0) {
 2392                 if (is_attr)
 2393                         VN_RELE(ZTOV(xzp));
 2394                 return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
 2395                     needed_bits, needed_bits));
 2396         }
 2397 
 2398         if (error && !check_privs) {
 2399                 if (is_attr)
 2400                         VN_RELE(ZTOV(xzp));
 2401                 return (error);
 2402         }
 2403 
 2404         if (error && (flags & V_APPEND)) {
 2405                 error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
 2406         }
 2407 
 2408         if (error && check_privs) {
 2409                 mode_t          checkmode = 0;
 2410                 vnode_t *check_vp = ZTOV(check_zp);
 2411 
 2412                 /*
 2413                  * First check for implicit owner permission on
 2414                  * read_acl/read_attributes
 2415                  */
 2416 
 2417                 ASSERT3U(working_mode, !=, 0);
 2418 
 2419                 if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
 2420                     owner == crgetuid(cr)))
 2421                         working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
 2422 
 2423                 if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
 2424                     ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
 2425                         checkmode |= VREAD;
 2426                 if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
 2427                     ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
 2428                         checkmode |= VWRITE;
 2429                 if (working_mode & ACE_EXECUTE)
 2430                         checkmode |= VEXEC;
 2431 
 2432                 error = secpolicy_vnode_access2(cr, check_vp, owner,
 2433                     needed_bits & ~checkmode, needed_bits);
 2434 
 2435                 if (error == 0 && (working_mode & ACE_WRITE_OWNER))
 2436                         error = secpolicy_vnode_chown(check_vp, cr, owner);
 2437                 if (error == 0 && (working_mode & ACE_WRITE_ACL))
 2438                         error = secpolicy_vnode_setdac(check_vp, cr, owner);
 2439 
 2440                 if (error == 0 && (working_mode &
 2441                     (ACE_DELETE|ACE_DELETE_CHILD)))
 2442                         error = secpolicy_vnode_remove(check_vp, cr);
 2443 
 2444                 if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
 2445                         error = secpolicy_vnode_chown(check_vp, cr, owner);
 2446                 }
 2447                 if (error == 0) {
 2448                         /*
 2449                          * See if any bits other than those already checked
 2450                          * for are still present.  If so then return EACCES
 2451                          */
 2452                         if (working_mode & ~(ZFS_CHECKED_MASKS)) {
 2453                                 error = SET_ERROR(EACCES);
 2454                         }
 2455                 }
 2456         } else if (error == 0) {
 2457                 error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
 2458                     needed_bits, needed_bits);
 2459         }
 2460 
 2461 
 2462         if (is_attr)
 2463                 VN_RELE(ZTOV(xzp));
 2464 
 2465         return (error);
 2466 }
 2467 
 2468 /*
 2469  * Translate traditional unix VREAD/VWRITE/VEXEC mode into
 2470  * NFSv4-style ZFS ACL format and call zfs_zaccess()
 2471  */
 2472 int
 2473 zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr,
 2474     zuserns_t *mnt_ns)
 2475 {
 2476         return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr,
 2477             mnt_ns));
 2478 }
 2479 
 2480 /*
 2481  * Access function for secpolicy_vnode_setattr
 2482  */
 2483 int
 2484 zfs_zaccess_unix(void *zp, int mode, cred_t *cr)
 2485 {
 2486         int v4_mode = zfs_unix_to_v4(mode >> 6);
 2487 
 2488         return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr, NULL));
 2489 }
 2490 
 2491 static int
 2492 zfs_delete_final_check(znode_t *zp, znode_t *dzp,
 2493     mode_t available_perms, cred_t *cr)
 2494 {
 2495         int error;
 2496         uid_t downer;
 2497 
 2498         downer = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr, ZFS_OWNER);
 2499 
 2500         error = secpolicy_vnode_access2(cr, ZTOV(dzp),
 2501             downer, available_perms, VWRITE|VEXEC);
 2502 
 2503         if (error == 0)
 2504                 error = zfs_sticky_remove_access(dzp, zp, cr);
 2505 
 2506         return (error);
 2507 }
 2508 
 2509 /*
 2510  * Determine whether Access should be granted/deny, without
 2511  * consulting least priv subsystem.
 2512  *
 2513  * The following chart is the recommended NFSv4 enforcement for
 2514  * ability to delete an object.
 2515  *
 2516  *      -------------------------------------------------------
 2517  *      |   Parent Dir  |           Target Object Permissions |
 2518  *      |  permissions  |                                     |
 2519  *      -------------------------------------------------------
 2520  *      |               | ACL Allows | ACL Denies| Delete     |
 2521  *      |               |  Delete    |  Delete   | unspecified|
 2522  *      -------------------------------------------------------
 2523  *      |  ACL Allows   | Permit     | Permit    | Permit     |
 2524  *      |  DELETE_CHILD |                                     |
 2525  *      -------------------------------------------------------
 2526  *      |  ACL Denies   | Permit     | Deny      | Deny       |
 2527  *      |  DELETE_CHILD |            |           |            |
 2528  *      -------------------------------------------------------
 2529  *      | ACL specifies |            |           |            |
 2530  *      | only allow    | Permit     | Permit    | Permit     |
 2531  *      | write and     |            |           |            |
 2532  *      | execute       |            |           |            |
 2533  *      -------------------------------------------------------
 2534  *      | ACL denies    |            |           |            |
 2535  *      | write and     | Permit     | Deny      | Deny       |
 2536  *      | execute       |            |           |            |
 2537  *      -------------------------------------------------------
 2538  *         ^
 2539  *         |
 2540  *         No search privilege, can't even look up file?
 2541  *
 2542  */
 2543 int
 2544 zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr, zuserns_t *mnt_ns)
 2545 {
 2546         uint32_t dzp_working_mode = 0;
 2547         uint32_t zp_working_mode = 0;
 2548         int dzp_error, zp_error;
 2549         mode_t available_perms;
 2550         boolean_t dzpcheck_privs = B_TRUE;
 2551         boolean_t zpcheck_privs = B_TRUE;
 2552 
 2553         /*
 2554          * We want specific DELETE permissions to
 2555          * take precedence over WRITE/EXECUTE.  We don't
 2556          * want an ACL such as this to mess us up.
 2557          * user:joe:write_data:deny,user:joe:delete:allow
 2558          *
 2559          * However, deny permissions may ultimately be overridden
 2560          * by secpolicy_vnode_access().
 2561          *
 2562          * We will ask for all of the necessary permissions and then
 2563          * look at the working modes from the directory and target object
 2564          * to determine what was found.
 2565          */
 2566 
 2567         if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
 2568                 return (SET_ERROR(EPERM));
 2569 
 2570         /*
 2571          * First row
 2572          * If the directory permissions allow the delete, we are done.
 2573          */
 2574         if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
 2575             &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
 2576                 return (0);
 2577 
 2578         /*
 2579          * If target object has delete permission then we are done
 2580          */
 2581         if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
 2582             &zpcheck_privs, B_FALSE, cr)) == 0)
 2583                 return (0);
 2584 
 2585         ASSERT(dzp_error);
 2586         ASSERT(zp_error);
 2587 
 2588         if (!dzpcheck_privs)
 2589                 return (dzp_error);
 2590         if (!zpcheck_privs)
 2591                 return (zp_error);
 2592 
 2593         /*
 2594          * Second row
 2595          *
 2596          * If directory returns EACCES then delete_child was denied
 2597          * due to deny delete_child.  In this case send the request through
 2598          * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
 2599          * since that *could* allow the delete based on write/execute permission
 2600          * and we want delete permissions to override write/execute.
 2601          */
 2602 
 2603         if (dzp_error == EACCES) {
 2604                 /* XXXPJD: s/dzp/zp/ ? */
 2605                 return (secpolicy_vnode_remove(ZTOV(dzp), cr));
 2606         }
 2607         /*
 2608          * Third Row
 2609          * only need to see if we have write/execute on directory.
 2610          */
 2611 
 2612         dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
 2613             &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
 2614 
 2615         if (dzp_error != 0 && !dzpcheck_privs)
 2616                 return (dzp_error);
 2617 
 2618         /*
 2619          * Fourth row
 2620          */
 2621 
 2622         available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : VWRITE;
 2623         available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : VEXEC;
 2624 
 2625         return (zfs_delete_final_check(zp, dzp, available_perms, cr));
 2626 
 2627 }
 2628 
 2629 int
 2630 zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
 2631     znode_t *tzp, cred_t *cr, zuserns_t *mnt_ns)
 2632 {
 2633         int add_perm;
 2634         int error;
 2635 
 2636         if (szp->z_pflags & ZFS_AV_QUARANTINED)
 2637                 return (SET_ERROR(EACCES));
 2638 
 2639         add_perm = (ZTOV(szp)->v_type == VDIR) ?
 2640             ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
 2641 
 2642         /*
 2643          * Rename permissions are combination of delete permission +
 2644          * add file/subdir permission.
 2645          *
 2646          * BSD operating systems also require write permission
 2647          * on the directory being moved from one parent directory
 2648          * to another.
 2649          */
 2650         if (ZTOV(szp)->v_type == VDIR && ZTOV(sdzp) != ZTOV(tdzp)) {
 2651                 if ((error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr,
 2652                     mnt_ns)))
 2653                         return (error);
 2654         }
 2655 
 2656         /*
 2657          * first make sure we do the delete portion.
 2658          *
 2659          * If that succeeds then check for add_file/add_subdir permissions
 2660          */
 2661 
 2662         if ((error = zfs_zaccess_delete(sdzp, szp, cr, mnt_ns)))
 2663                 return (error);
 2664 
 2665         /*
 2666          * If we have a tzp, see if we can delete it?
 2667          */
 2668         if (tzp && (error = zfs_zaccess_delete(tdzp, tzp, cr, mnt_ns)))
 2669                 return (error);
 2670 
 2671         /*
 2672          * Now check for add permissions
 2673          */
 2674         error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr, mnt_ns);
 2675 
 2676         return (error);
 2677 }

Cache object: ae4cb07e30642f17d0196bd6ec5d88e1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.