The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/os/linux/spl/spl-zone.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2021 Klara Systems, Inc.
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/types.h>
   28 #include <sys/sysmacros.h>
   29 #include <sys/kmem.h>
   30 #include <linux/file.h>
   31 #include <linux/magic.h>
   32 #include <sys/zone.h>
   33 
   34 #if defined(CONFIG_USER_NS)
   35 #include <linux/statfs.h>
   36 #include <linux/proc_ns.h>
   37 #endif
   38 
   39 #include <sys/mutex.h>
   40 
   41 static kmutex_t zone_datasets_lock;
   42 static struct list_head zone_datasets;
   43 
   44 typedef struct zone_datasets {
   45         struct list_head zds_list;      /* zone_datasets linkage */
   46         struct user_namespace *zds_userns; /* namespace reference */
   47         struct list_head zds_datasets;  /* datasets for the namespace */
   48 } zone_datasets_t;
   49 
   50 typedef struct zone_dataset {
   51         struct list_head zd_list;       /* zone_dataset linkage */
   52         size_t zd_dsnamelen;            /* length of name */
   53         char zd_dsname[];               /* name of the member dataset */
   54 } zone_dataset_t;
   55 
   56 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
   57 /*
   58  * Returns:
   59  * - 0 on success
   60  * - EBADF if it cannot open the provided file descriptor
   61  * - ENOTTY if the file itself is a not a user namespace file. We want to
   62  *   intercept this error in the ZFS layer. We cannot just return one of the
   63  *   ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
   64  *   and the SPL layers.
   65  */
   66 static int
   67 user_ns_get(int fd, struct user_namespace **userns)
   68 {
   69         struct kstatfs st;
   70         struct file *nsfile;
   71         struct ns_common *ns;
   72         int error;
   73 
   74         if ((nsfile = fget(fd)) == NULL)
   75                 return (EBADF);
   76         if (vfs_statfs(&nsfile->f_path, &st) != 0) {
   77                 error = ENOTTY;
   78                 goto done;
   79         }
   80         if (st.f_type != NSFS_MAGIC) {
   81                 error = ENOTTY;
   82                 goto done;
   83         }
   84         ns = get_proc_ns(file_inode(nsfile));
   85         if (ns->ops->type != CLONE_NEWUSER) {
   86                 error = ENOTTY;
   87                 goto done;
   88         }
   89         *userns = container_of(ns, struct user_namespace, ns);
   90 
   91         error = 0;
   92 done:
   93         fput(nsfile);
   94 
   95         return (error);
   96 }
   97 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
   98 
   99 static unsigned int
  100 user_ns_zoneid(struct user_namespace *user_ns)
  101 {
  102         unsigned int r;
  103 
  104 #if defined(HAVE_USER_NS_COMMON_INUM)
  105         r = user_ns->ns.inum;
  106 #else
  107         r = user_ns->proc_inum;
  108 #endif
  109 
  110         return (r);
  111 }
  112 
  113 static struct zone_datasets *
  114 zone_datasets_lookup(unsigned int nsinum)
  115 {
  116         zone_datasets_t *zds;
  117 
  118         list_for_each_entry(zds, &zone_datasets, zds_list) {
  119                 if (user_ns_zoneid(zds->zds_userns) == nsinum)
  120                         return (zds);
  121         }
  122         return (NULL);
  123 }
  124 
  125 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
  126 static struct zone_dataset *
  127 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
  128 {
  129         zone_dataset_t *zd;
  130 
  131         list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
  132                 if (zd->zd_dsnamelen != dsnamelen)
  133                         continue;
  134                 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
  135                         return (zd);
  136         }
  137 
  138         return (NULL);
  139 }
  140 
  141 static int
  142 zone_dataset_cred_check(cred_t *cred)
  143 {
  144 
  145         if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
  146                 return (EPERM);
  147 
  148         return (0);
  149 }
  150 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
  151 
  152 static int
  153 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
  154 {
  155 
  156         if (dataset[0] == '\0' || dataset[0] == '/')
  157                 return (ENOENT);
  158 
  159         *dsnamelen = strlen(dataset);
  160         /* Ignore trailing slash, if supplied. */
  161         if (dataset[*dsnamelen - 1] == '/')
  162                 (*dsnamelen)--;
  163 
  164         return (0);
  165 }
  166 
  167 int
  168 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
  169 {
  170 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
  171         struct user_namespace *userns;
  172         zone_datasets_t *zds;
  173         zone_dataset_t *zd;
  174         int error;
  175         size_t dsnamelen;
  176 
  177         if ((error = zone_dataset_cred_check(cred)) != 0)
  178                 return (error);
  179         if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
  180                 return (error);
  181         if ((error = user_ns_get(userns_fd, &userns)) != 0)
  182                 return (error);
  183 
  184         mutex_enter(&zone_datasets_lock);
  185         zds = zone_datasets_lookup(user_ns_zoneid(userns));
  186         if (zds == NULL) {
  187                 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
  188                 INIT_LIST_HEAD(&zds->zds_list);
  189                 INIT_LIST_HEAD(&zds->zds_datasets);
  190                 zds->zds_userns = userns;
  191                 /*
  192                  * Lock the namespace by incresing its refcount to prevent
  193                  * the namespace ID from being reused.
  194                  */
  195                 get_user_ns(userns);
  196                 list_add_tail(&zds->zds_list, &zone_datasets);
  197         } else {
  198                 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
  199                 if (zd != NULL) {
  200                         mutex_exit(&zone_datasets_lock);
  201                         return (EEXIST);
  202                 }
  203         }
  204 
  205         zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
  206         zd->zd_dsnamelen = dsnamelen;
  207         strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
  208         INIT_LIST_HEAD(&zd->zd_list);
  209         list_add_tail(&zd->zd_list, &zds->zds_datasets);
  210 
  211         mutex_exit(&zone_datasets_lock);
  212         return (0);
  213 #else
  214         return (ENXIO);
  215 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
  216 }
  217 EXPORT_SYMBOL(zone_dataset_attach);
  218 
  219 int
  220 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
  221 {
  222 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
  223         struct user_namespace *userns;
  224         zone_datasets_t *zds;
  225         zone_dataset_t *zd;
  226         int error;
  227         size_t dsnamelen;
  228 
  229         if ((error = zone_dataset_cred_check(cred)) != 0)
  230                 return (error);
  231         if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
  232                 return (error);
  233         if ((error = user_ns_get(userns_fd, &userns)) != 0)
  234                 return (error);
  235 
  236         mutex_enter(&zone_datasets_lock);
  237         zds = zone_datasets_lookup(user_ns_zoneid(userns));
  238         if (zds != NULL)
  239                 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
  240         if (zds == NULL || zd == NULL) {
  241                 mutex_exit(&zone_datasets_lock);
  242                 return (ENOENT);
  243         }
  244 
  245         list_del(&zd->zd_list);
  246         kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
  247 
  248         /* Prune the namespace entry if it has no more delegations. */
  249         if (list_empty(&zds->zds_datasets)) {
  250                 /*
  251                  * Decrease the refcount now that the namespace is no longer
  252                  * used. It is no longer necessary to prevent the namespace ID
  253                  * from being reused.
  254                  */
  255                 put_user_ns(userns);
  256                 list_del(&zds->zds_list);
  257                 kmem_free(zds, sizeof (*zds));
  258         }
  259 
  260         mutex_exit(&zone_datasets_lock);
  261         return (0);
  262 #else
  263         return (ENXIO);
  264 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
  265 }
  266 EXPORT_SYMBOL(zone_dataset_detach);
  267 
  268 /*
  269  * A dataset is visible if:
  270  * - It is a parent of a namespace entry.
  271  * - It is one of the namespace entries.
  272  * - It is a child of a namespace entry.
  273  *
  274  * A dataset is writable if:
  275  * - It is one of the namespace entries.
  276  * - It is a child of a namespace entry.
  277  *
  278  * The parent datasets of namespace entries are visible and
  279  * read-only to provide a path back to the root of the pool.
  280  */
  281 int
  282 zone_dataset_visible(const char *dataset, int *write)
  283 {
  284         zone_datasets_t *zds;
  285         zone_dataset_t *zd;
  286         size_t dsnamelen, zd_len;
  287         int visible;
  288 
  289         /* Default to read-only, in case visible is returned. */
  290         if (write != NULL)
  291                 *write = 0;
  292         if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
  293                 return (0);
  294         if (INGLOBALZONE(curproc)) {
  295                 if (write != NULL)
  296                         *write = 1;
  297                 return (1);
  298         }
  299 
  300         mutex_enter(&zone_datasets_lock);
  301         zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
  302         if (zds == NULL) {
  303                 mutex_exit(&zone_datasets_lock);
  304                 return (0);
  305         }
  306 
  307         visible = 0;
  308         list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
  309                 zd_len = strlen(zd->zd_dsname);
  310                 if (zd_len > dsnamelen) {
  311                         /*
  312                          * The name of the namespace entry is longer than that
  313                          * of the dataset, so it could be that the dataset is a
  314                          * parent of the namespace entry.
  315                          */
  316                         visible = memcmp(zd->zd_dsname, dataset,
  317                             dsnamelen) == 0 &&
  318                             zd->zd_dsname[dsnamelen] == '/';
  319                         if (visible)
  320                                 break;
  321                 } else if (zd_len == dsnamelen) {
  322                         /*
  323                          * The name of the namespace entry is as long as that
  324                          * of the dataset, so perhaps the dataset itself is the
  325                          * namespace entry.
  326                          */
  327                         visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
  328                         if (visible) {
  329                                 if (write != NULL)
  330                                         *write = 1;
  331                                 break;
  332                         }
  333                 } else {
  334                         /*
  335                          * The name of the namespace entry is shorter than that
  336                          * of the dataset, so perhaps the dataset is a child of
  337                          * the namespace entry.
  338                          */
  339                         visible = memcmp(zd->zd_dsname, dataset,
  340                             zd_len) == 0 && dataset[zd_len] == '/';
  341                         if (visible) {
  342                                 if (write != NULL)
  343                                         *write = 1;
  344                                 break;
  345                         }
  346                 }
  347         }
  348 
  349         mutex_exit(&zone_datasets_lock);
  350         return (visible);
  351 }
  352 EXPORT_SYMBOL(zone_dataset_visible);
  353 
  354 unsigned int
  355 global_zoneid(void)
  356 {
  357         unsigned int z = 0;
  358 
  359 #if defined(CONFIG_USER_NS)
  360         z = user_ns_zoneid(&init_user_ns);
  361 #endif
  362 
  363         return (z);
  364 }
  365 EXPORT_SYMBOL(global_zoneid);
  366 
  367 unsigned int
  368 crgetzoneid(const cred_t *cr)
  369 {
  370         unsigned int r = 0;
  371 
  372 #if defined(CONFIG_USER_NS)
  373         r = user_ns_zoneid(cr->user_ns);
  374 #endif
  375 
  376         return (r);
  377 }
  378 EXPORT_SYMBOL(crgetzoneid);
  379 
  380 boolean_t
  381 inglobalzone(proc_t *proc)
  382 {
  383 #if defined(CONFIG_USER_NS)
  384         return (proc->cred->user_ns == &init_user_ns);
  385 #else
  386         return (B_TRUE);
  387 #endif
  388 }
  389 EXPORT_SYMBOL(inglobalzone);
  390 
  391 int
  392 spl_zone_init(void)
  393 {
  394         mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
  395         INIT_LIST_HEAD(&zone_datasets);
  396         return (0);
  397 }
  398 
  399 void
  400 spl_zone_fini(void)
  401 {
  402         zone_datasets_t *zds;
  403         zone_dataset_t *zd;
  404 
  405         /*
  406          * It would be better to assert an empty zone_datasets, but since
  407          * there's no automatic mechanism for cleaning them up if the user
  408          * namespace is destroyed, just do it here, since spl is about to go
  409          * out of context.
  410          */
  411         while (!list_empty(&zone_datasets)) {
  412                 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
  413                 while (!list_empty(&zds->zds_datasets)) {
  414                         zd = list_entry(zds->zds_datasets.next,
  415                             zone_dataset_t, zd_list);
  416                         list_del(&zd->zd_list);
  417                         kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
  418                 }
  419                 put_user_ns(zds->zds_userns);
  420                 list_del(&zds->zds_list);
  421                 kmem_free(zds, sizeof (*zds));
  422         }
  423         mutex_destroy(&zone_datasets_lock);
  424 }

Cache object: 8de7da83f9e2451bb4c780953e30f284


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.