The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 1999 Poul-Henning Kamp.
    5  * Copyright (c) 2008 Bjoern A. Zeeb.
    6  * Copyright (c) 2009 James Gritton.
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_ddb.h"
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/types.h>
   40 #include <sys/kernel.h>
   41 #include <sys/systm.h>
   42 #include <sys/errno.h>
   43 #include <sys/sysproto.h>
   44 #include <sys/malloc.h>
   45 #include <sys/osd.h>
   46 #include <sys/priv.h>
   47 #include <sys/proc.h>
   48 #include <sys/taskqueue.h>
   49 #include <sys/fcntl.h>
   50 #include <sys/jail.h>
   51 #include <sys/linker.h>
   52 #include <sys/lock.h>
   53 #include <sys/mman.h>
   54 #include <sys/mutex.h>
   55 #include <sys/racct.h>
   56 #include <sys/rctl.h>
   57 #include <sys/refcount.h>
   58 #include <sys/sx.h>
   59 #include <sys/sysent.h>
   60 #include <sys/namei.h>
   61 #include <sys/mount.h>
   62 #include <sys/queue.h>
   63 #include <sys/socket.h>
   64 #include <sys/syscallsubr.h>
   65 #include <sys/sysctl.h>
   66 #include <sys/uuid.h>
   67 #include <sys/vnode.h>
   68 
   69 #include <net/if.h>
   70 #include <net/vnet.h>
   71 
   72 #include <netinet/in.h>
   73 
   74 #ifdef DDB
   75 #include <ddb/ddb.h>
   76 #endif /* DDB */
   77 
   78 #include <security/mac/mac_framework.h>
   79 
   80 #define PRISON0_HOSTUUID_MODULE "hostuuid"
   81 
   82 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
   83 static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures");
   84 
   85 /* Keep struct prison prison0 and some code in kern_jail_set() readable. */
   86 #ifdef INET
   87 #ifdef INET6
   88 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL
   89 #else
   90 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL
   91 #endif
   92 #else /* !INET */
   93 #ifdef INET6
   94 #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL
   95 #else
   96 #define _PR_IP_SADDRSEL 0
   97 #endif
   98 #endif
   99 
  100 /* prison0 describes what is "real" about the system. */
  101 struct prison prison0 = {
  102         .pr_id          = 0,
  103         .pr_name        = "",
  104         .pr_ref         = 1,
  105         .pr_uref        = 1,
  106         .pr_path        = "/",
  107         .pr_securelevel = -1,
  108         .pr_devfs_rsnum = 0,
  109         .pr_state       = PRISON_STATE_ALIVE,
  110         .pr_childmax    = JAIL_MAX,
  111         .pr_hostuuid    = DEFAULT_HOSTUUID,
  112         .pr_children    = LIST_HEAD_INITIALIZER(prison0.pr_children),
  113 #ifdef VIMAGE
  114         .pr_flags       = PR_HOST|PR_VNET|_PR_IP_SADDRSEL,
  115 #else
  116         .pr_flags       = PR_HOST|_PR_IP_SADDRSEL,
  117 #endif
  118         .pr_allow       = PR_ALLOW_ALL_STATIC,
  119 };
  120 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
  121 
  122 struct bool_flags {
  123         const char      *name;
  124         const char      *noname;
  125         volatile u_int   flag;
  126 };
  127 struct jailsys_flags {
  128         const char      *name;
  129         unsigned         disable;
  130         unsigned         new;
  131 };
  132 
  133 /* allprison, allprison_racct and lastprid are protected by allprison_lock. */
  134 struct  sx allprison_lock;
  135 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
  136 struct  prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
  137 LIST_HEAD(, prison_racct) allprison_racct;
  138 int     lastprid = 0;
  139 
  140 static int get_next_prid(struct prison **insprp);
  141 static int do_jail_attach(struct thread *td, struct prison *pr, int drflags);
  142 static void prison_complete(void *context, int pending);
  143 static void prison_deref(struct prison *pr, int flags);
  144 static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison);
  145 static int prison_lock_xlock(struct prison *pr, int flags);
  146 static void prison_cleanup(struct prison *pr);
  147 static void prison_free_not_last(struct prison *pr);
  148 static void prison_proc_free_not_last(struct prison *pr);
  149 static void prison_set_allow_locked(struct prison *pr, unsigned flag,
  150     int enable);
  151 static char *prison_path(struct prison *pr1, struct prison *pr2);
  152 #ifdef RACCT
  153 static void prison_racct_attach(struct prison *pr);
  154 static void prison_racct_modify(struct prison *pr);
  155 static void prison_racct_detach(struct prison *pr);
  156 #endif
  157 
  158 /* Flags for prison_deref */
  159 #define PD_DEREF        0x01    /* Decrement pr_ref */
  160 #define PD_DEUREF       0x02    /* Decrement pr_uref */
  161 #define PD_KILL         0x04    /* Remove jail, kill processes, etc */
  162 #define PD_LOCKED       0x10    /* pr_mtx is held */
  163 #define PD_LIST_SLOCKED 0x20    /* allprison_lock is held shared */
  164 #define PD_LIST_XLOCKED 0x40    /* allprison_lock is held exclusive */
  165 #define PD_OP_FLAGS     0x07    /* Operation flags */
  166 #define PD_LOCK_FLAGS   0x70    /* Lock status flags */
  167 
  168 /*
  169  * Parameter names corresponding to PR_* flag values.  Size values are for kvm
  170  * as we cannot figure out the size of a sparse array, or an array without a
  171  * terminating entry.
  172  */
  173 static struct bool_flags pr_flag_bool[] = {
  174         {"persist", "nopersist", PR_PERSIST},
  175 #ifdef INET
  176         {"ip4.saddrsel", "ip4.nosaddrsel", PR_IP4_SADDRSEL},
  177 #endif
  178 #ifdef INET6
  179         {"ip6.saddrsel", "ip6.nosaddrsel", PR_IP6_SADDRSEL},
  180 #endif
  181 };
  182 const size_t pr_flag_bool_size = sizeof(pr_flag_bool);
  183 
  184 static struct jailsys_flags pr_flag_jailsys[] = {
  185         {"host", 0, PR_HOST},
  186 #ifdef VIMAGE
  187         {"vnet", 0, PR_VNET},
  188 #endif
  189 #ifdef INET
  190         {"ip4", PR_IP4_USER, PR_IP4_USER},
  191 #endif
  192 #ifdef INET6
  193         {"ip6", PR_IP6_USER, PR_IP6_USER},
  194 #endif
  195 };
  196 const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys);
  197 
  198 /*
  199  * Make this array full-size so dynamic parameters can be added.
  200  * It is protected by prison0.mtx, but lockless reading is allowed
  201  * with an atomic check of the flag values.
  202  */
  203 static struct bool_flags pr_flag_allow[NBBY * NBPW] = {
  204         {"allow.set_hostname", "allow.noset_hostname", PR_ALLOW_SET_HOSTNAME},
  205         {"allow.sysvipc", "allow.nosysvipc", PR_ALLOW_SYSVIPC},
  206         {"allow.raw_sockets", "allow.noraw_sockets", PR_ALLOW_RAW_SOCKETS},
  207         {"allow.chflags", "allow.nochflags", PR_ALLOW_CHFLAGS},
  208         {"allow.mount", "allow.nomount", PR_ALLOW_MOUNT},
  209         {"allow.quotas", "allow.noquotas", PR_ALLOW_QUOTAS},
  210         {"allow.socket_af", "allow.nosocket_af", PR_ALLOW_SOCKET_AF},
  211         {"allow.mlock", "allow.nomlock", PR_ALLOW_MLOCK},
  212         {"allow.reserved_ports", "allow.noreserved_ports",
  213          PR_ALLOW_RESERVED_PORTS},
  214         {"allow.read_msgbuf", "allow.noread_msgbuf", PR_ALLOW_READ_MSGBUF},
  215         {"allow.unprivileged_proc_debug", "allow.nounprivileged_proc_debug",
  216          PR_ALLOW_UNPRIV_DEBUG},
  217         {"allow.suser", "allow.nosuser", PR_ALLOW_SUSER},
  218 };
  219 static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC;
  220 const size_t pr_flag_allow_size = sizeof(pr_flag_allow);
  221 
  222 #define JAIL_DEFAULT_ALLOW              (PR_ALLOW_SET_HOSTNAME | \
  223                                          PR_ALLOW_RESERVED_PORTS | \
  224                                          PR_ALLOW_UNPRIV_DEBUG | \
  225                                          PR_ALLOW_SUSER)
  226 #define JAIL_DEFAULT_ENFORCE_STATFS     2
  227 #define JAIL_DEFAULT_DEVFS_RSNUM        0
  228 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
  229 static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
  230 static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM;
  231 #if defined(INET) || defined(INET6)
  232 static unsigned jail_max_af_ips = 255;
  233 #endif
  234 
  235 /*
  236  * Initialize the parts of prison0 that can't be static-initialized with
  237  * constants.  This is called from proc0_init() after creating thread0 cpuset.
  238  */
  239 void
  240 prison0_init(void)
  241 {
  242         uint8_t *file, *data;
  243         size_t size;
  244         char buf[sizeof(prison0.pr_hostuuid)];
  245         bool valid;
  246 
  247         prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset);
  248         prison0.pr_osreldate = osreldate;
  249         strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease));
  250 
  251         /* If we have a preloaded hostuuid, use it. */
  252         file = preload_search_by_type(PRISON0_HOSTUUID_MODULE);
  253         if (file != NULL) {
  254                 data = preload_fetch_addr(file);
  255                 size = preload_fetch_size(file);
  256                 if (data != NULL) {
  257                         /*
  258                          * The preloaded data may include trailing whitespace, almost
  259                          * certainly a newline; skip over any whitespace or
  260                          * non-printable characters to be safe.
  261                          */
  262                         while (size > 0 && data[size - 1] <= 0x20) {
  263                                 size--;
  264                         }
  265 
  266                         valid = false;
  267 
  268                         /*
  269                          * Not NUL-terminated when passed from loader, but
  270                          * validate_uuid requires that due to using sscanf (as
  271                          * does the subsequent strlcpy, since it still reads
  272                          * past the given size to return the true length);
  273                          * bounce to a temporary buffer to fix.
  274                          */
  275                         if (size >= sizeof(buf))
  276                                 goto done;
  277 
  278                         memcpy(buf, data, size);
  279                         buf[size] = '\0';
  280 
  281                         if (validate_uuid(buf, size, NULL, 0) != 0)
  282                                 goto done;
  283 
  284                         valid = true;
  285                         (void)strlcpy(prison0.pr_hostuuid, buf,
  286                             sizeof(prison0.pr_hostuuid));
  287 
  288 done:
  289                         if (bootverbose && !valid) {
  290                                 printf("hostuuid: preload data malformed: '%.*s'\n",
  291                                     (int)size, data);
  292                         }
  293                 }
  294         }
  295         if (bootverbose)
  296                 printf("hostuuid: using %s\n", prison0.pr_hostuuid);
  297 }
  298 
  299 /*
  300  * struct jail_args {
  301  *      struct jail *jail;
  302  * };
  303  */
  304 int
  305 sys_jail(struct thread *td, struct jail_args *uap)
  306 {
  307         uint32_t version;
  308         int error;
  309         struct jail j;
  310 
  311         error = copyin(uap->jail, &version, sizeof(uint32_t));
  312         if (error)
  313                 return (error);
  314 
  315         switch (version) {
  316         case 0:
  317         {
  318                 struct jail_v0 j0;
  319 
  320                 /* FreeBSD single IPv4 jails. */
  321                 bzero(&j, sizeof(struct jail));
  322                 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
  323                 if (error)
  324                         return (error);
  325                 j.version = j0.version;
  326                 j.path = j0.path;
  327                 j.hostname = j0.hostname;
  328                 j.ip4s = htonl(j0.ip_number);   /* jail_v0 is host order */
  329                 break;
  330         }
  331 
  332         case 1:
  333                 /*
  334                  * Version 1 was used by multi-IPv4 jail implementations
  335                  * that never made it into the official kernel.
  336                  */
  337                 return (EINVAL);
  338 
  339         case 2: /* JAIL_API_VERSION */
  340                 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
  341                 error = copyin(uap->jail, &j, sizeof(struct jail));
  342                 if (error)
  343                         return (error);
  344                 break;
  345 
  346         default:
  347                 /* Sci-Fi jails are not supported, sorry. */
  348                 return (EINVAL);
  349         }
  350         return (kern_jail(td, &j));
  351 }
  352 
  353 int
  354 kern_jail(struct thread *td, struct jail *j)
  355 {
  356         struct iovec optiov[2 * (4 + nitems(pr_flag_allow)
  357 #ifdef INET
  358                             + 1
  359 #endif
  360 #ifdef INET6
  361                             + 1
  362 #endif
  363                             )];
  364         struct uio opt;
  365         char *u_path, *u_hostname, *u_name;
  366         struct bool_flags *bf;
  367 #ifdef INET
  368         uint32_t ip4s;
  369         struct in_addr *u_ip4;
  370 #endif
  371 #ifdef INET6
  372         struct in6_addr *u_ip6;
  373 #endif
  374         size_t tmplen;
  375         int error, enforce_statfs;
  376 
  377         bzero(&optiov, sizeof(optiov));
  378         opt.uio_iov = optiov;
  379         opt.uio_iovcnt = 0;
  380         opt.uio_offset = -1;
  381         opt.uio_resid = -1;
  382         opt.uio_segflg = UIO_SYSSPACE;
  383         opt.uio_rw = UIO_READ;
  384         opt.uio_td = td;
  385 
  386         /* Set permissions for top-level jails from sysctls. */
  387         if (!jailed(td->td_ucred)) {
  388                 for (bf = pr_flag_allow;
  389                      bf < pr_flag_allow + nitems(pr_flag_allow) &&
  390                         atomic_load_int(&bf->flag) != 0;
  391                      bf++) {
  392                         optiov[opt.uio_iovcnt].iov_base = __DECONST(char *,
  393                             (jail_default_allow & bf->flag)
  394                             ? bf->name : bf->noname);
  395                         optiov[opt.uio_iovcnt].iov_len =
  396                             strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
  397                         opt.uio_iovcnt += 2;
  398                 }
  399                 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
  400                 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
  401                 opt.uio_iovcnt++;
  402                 enforce_statfs = jail_default_enforce_statfs;
  403                 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
  404                 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
  405                 opt.uio_iovcnt++;
  406         }
  407 
  408         tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
  409 #ifdef INET
  410         ip4s = (j->version == 0) ? 1 : j->ip4s;
  411         if (ip4s > jail_max_af_ips)
  412                 return (EINVAL);
  413         tmplen += ip4s * sizeof(struct in_addr);
  414 #else
  415         if (j->ip4s > 0)
  416                 return (EINVAL);
  417 #endif
  418 #ifdef INET6
  419         if (j->ip6s > jail_max_af_ips)
  420                 return (EINVAL);
  421         tmplen += j->ip6s * sizeof(struct in6_addr);
  422 #else
  423         if (j->ip6s > 0)
  424                 return (EINVAL);
  425 #endif
  426         u_path = malloc(tmplen, M_TEMP, M_WAITOK);
  427         u_hostname = u_path + MAXPATHLEN;
  428         u_name = u_hostname + MAXHOSTNAMELEN;
  429 #ifdef INET
  430         u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
  431 #endif
  432 #ifdef INET6
  433 #ifdef INET
  434         u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
  435 #else
  436         u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
  437 #endif
  438 #endif
  439         optiov[opt.uio_iovcnt].iov_base = "path";
  440         optiov[opt.uio_iovcnt].iov_len = sizeof("path");
  441         opt.uio_iovcnt++;
  442         optiov[opt.uio_iovcnt].iov_base = u_path;
  443         error = copyinstr(j->path, u_path, MAXPATHLEN,
  444             &optiov[opt.uio_iovcnt].iov_len);
  445         if (error) {
  446                 free(u_path, M_TEMP);
  447                 return (error);
  448         }
  449         opt.uio_iovcnt++;
  450         optiov[opt.uio_iovcnt].iov_base = "host.hostname";
  451         optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
  452         opt.uio_iovcnt++;
  453         optiov[opt.uio_iovcnt].iov_base = u_hostname;
  454         error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
  455             &optiov[opt.uio_iovcnt].iov_len);
  456         if (error) {
  457                 free(u_path, M_TEMP);
  458                 return (error);
  459         }
  460         opt.uio_iovcnt++;
  461         if (j->jailname != NULL) {
  462                 optiov[opt.uio_iovcnt].iov_base = "name";
  463                 optiov[opt.uio_iovcnt].iov_len = sizeof("name");
  464                 opt.uio_iovcnt++;
  465                 optiov[opt.uio_iovcnt].iov_base = u_name;
  466                 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
  467                     &optiov[opt.uio_iovcnt].iov_len);
  468                 if (error) {
  469                         free(u_path, M_TEMP);
  470                         return (error);
  471                 }
  472                 opt.uio_iovcnt++;
  473         }
  474 #ifdef INET
  475         optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
  476         optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
  477         opt.uio_iovcnt++;
  478         optiov[opt.uio_iovcnt].iov_base = u_ip4;
  479         optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
  480         if (j->version == 0)
  481                 u_ip4->s_addr = j->ip4s;
  482         else {
  483                 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
  484                 if (error) {
  485                         free(u_path, M_TEMP);
  486                         return (error);
  487                 }
  488         }
  489         opt.uio_iovcnt++;
  490 #endif
  491 #ifdef INET6
  492         optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
  493         optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
  494         opt.uio_iovcnt++;
  495         optiov[opt.uio_iovcnt].iov_base = u_ip6;
  496         optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
  497         error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
  498         if (error) {
  499                 free(u_path, M_TEMP);
  500                 return (error);
  501         }
  502         opt.uio_iovcnt++;
  503 #endif
  504         KASSERT(opt.uio_iovcnt <= nitems(optiov),
  505                 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
  506         error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
  507         free(u_path, M_TEMP);
  508         return (error);
  509 }
  510 
  511 /*
  512  * struct jail_set_args {
  513  *      struct iovec *iovp;
  514  *      unsigned int iovcnt;
  515  *      int flags;
  516  * };
  517  */
  518 int
  519 sys_jail_set(struct thread *td, struct jail_set_args *uap)
  520 {
  521         struct uio *auio;
  522         int error;
  523 
  524         /* Check that we have an even number of iovecs. */
  525         if (uap->iovcnt & 1)
  526                 return (EINVAL);
  527 
  528         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
  529         if (error)
  530                 return (error);
  531         error = kern_jail_set(td, auio, uap->flags);
  532         free(auio, M_IOV);
  533         return (error);
  534 }
  535 
  536 int
  537 kern_jail_set(struct thread *td, struct uio *optuio, int flags)
  538 {
  539         struct nameidata nd;
  540 #ifdef INET
  541         struct in_addr *ip4;
  542 #endif
  543 #ifdef INET6
  544         struct in6_addr *ip6;
  545 #endif
  546         struct vfsopt *opt;
  547         struct vfsoptlist *opts;
  548         struct prison *pr, *deadpr, *inspr, *mypr, *ppr, *tpr;
  549         struct vnode *root;
  550         char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
  551         char *g_path, *osrelstr;
  552         struct bool_flags *bf;
  553         struct jailsys_flags *jsf;
  554 #if defined(INET) || defined(INET6)
  555         struct prison *tppr;
  556         void *op;
  557 #endif
  558         unsigned long hid;
  559         size_t namelen, onamelen, pnamelen;
  560         int born, created, cuflags, descend, drflags, enforce;
  561         int error, errmsg_len, errmsg_pos;
  562         int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
  563         int jid, jsys, len, level;
  564         int childmax, osreldt, rsnum, slevel;
  565 #if defined(INET) || defined(INET6)
  566         int ii, ij;
  567 #endif
  568 #ifdef INET
  569         int ip4s, redo_ip4;
  570 #endif
  571 #ifdef INET6
  572         int ip6s, redo_ip6;
  573 #endif
  574         uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
  575         uint64_t pr_allow_diff;
  576         unsigned tallow;
  577         char numbuf[12];
  578 
  579         error = priv_check(td, PRIV_JAIL_SET);
  580         if (!error && (flags & JAIL_ATTACH))
  581                 error = priv_check(td, PRIV_JAIL_ATTACH);
  582         if (error)
  583                 return (error);
  584         mypr = td->td_ucred->cr_prison;
  585         if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
  586                 return (EPERM);
  587         if (flags & ~JAIL_SET_MASK)
  588                 return (EINVAL);
  589 
  590         /*
  591          * Check all the parameters before committing to anything.  Not all
  592          * errors can be caught early, but we may as well try.  Also, this
  593          * takes care of some expensive stuff (path lookup) before getting
  594          * the allprison lock.
  595          *
  596          * XXX Jails are not filesystems, and jail parameters are not mount
  597          *     options.  But it makes more sense to re-use the vfsopt code
  598          *     than duplicate it under a different name.
  599          */
  600         error = vfs_buildopts(optuio, &opts);
  601         if (error)
  602                 return (error);
  603 #ifdef INET
  604         ip4 = NULL;
  605 #endif
  606 #ifdef INET6
  607         ip6 = NULL;
  608 #endif
  609         g_path = NULL;
  610 
  611         cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
  612         if (!cuflags) {
  613                 error = EINVAL;
  614                 vfs_opterror(opts, "no valid operation (create or update)");
  615                 goto done_errmsg;
  616         }
  617 
  618         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
  619         if (error == ENOENT)
  620                 jid = 0;
  621         else if (error != 0)
  622                 goto done_free;
  623 
  624         error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
  625         if (error == ENOENT)
  626                 gotslevel = 0;
  627         else if (error != 0)
  628                 goto done_free;
  629         else
  630                 gotslevel = 1;
  631 
  632         error =
  633             vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
  634         if (error == ENOENT)
  635                 gotchildmax = 0;
  636         else if (error != 0)
  637                 goto done_free;
  638         else
  639                 gotchildmax = 1;
  640 
  641         error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
  642         if (error == ENOENT)
  643                 gotenforce = 0;
  644         else if (error != 0)
  645                 goto done_free;
  646         else if (enforce < 0 || enforce > 2) {
  647                 error = EINVAL;
  648                 goto done_free;
  649         } else
  650                 gotenforce = 1;
  651 
  652         error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum));
  653         if (error == ENOENT)
  654                 gotrsnum = 0;
  655         else if (error != 0)
  656                 goto done_free;
  657         else
  658                 gotrsnum = 1;
  659 
  660         pr_flags = ch_flags = 0;
  661         for (bf = pr_flag_bool;
  662              bf < pr_flag_bool + nitems(pr_flag_bool);
  663              bf++) {
  664                 vfs_flagopt(opts, bf->name, &pr_flags, bf->flag);
  665                 vfs_flagopt(opts, bf->noname, &ch_flags, bf->flag);
  666         }
  667         ch_flags |= pr_flags;
  668         for (jsf = pr_flag_jailsys;
  669              jsf < pr_flag_jailsys + nitems(pr_flag_jailsys);
  670              jsf++) {
  671                 error = vfs_copyopt(opts, jsf->name, &jsys, sizeof(jsys));
  672                 if (error == ENOENT)
  673                         continue;
  674                 if (error != 0)
  675                         goto done_free;
  676                 switch (jsys) {
  677                 case JAIL_SYS_DISABLE:
  678                         if (!jsf->disable) {
  679                                 error = EINVAL;
  680                                 goto done_free;
  681                         }
  682                         pr_flags |= jsf->disable;
  683                         break;
  684                 case JAIL_SYS_NEW:
  685                         pr_flags |= jsf->new;
  686                         break;
  687                 case JAIL_SYS_INHERIT:
  688                         break;
  689                 default:
  690                         error = EINVAL;
  691                         goto done_free;
  692                 }
  693                 ch_flags |= jsf->new | jsf->disable;
  694         }
  695         if ((flags & (JAIL_CREATE | JAIL_ATTACH)) == JAIL_CREATE
  696             && !(pr_flags & PR_PERSIST)) {
  697                 error = EINVAL;
  698                 vfs_opterror(opts, "new jail must persist or attach");
  699                 goto done_errmsg;
  700         }
  701 #ifdef VIMAGE
  702         if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
  703                 error = EINVAL;
  704                 vfs_opterror(opts, "vnet cannot be changed after creation");
  705                 goto done_errmsg;
  706         }
  707 #endif
  708 #ifdef INET
  709         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
  710                 error = EINVAL;
  711                 vfs_opterror(opts, "ip4 cannot be changed after creation");
  712                 goto done_errmsg;
  713         }
  714 #endif
  715 #ifdef INET6
  716         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
  717                 error = EINVAL;
  718                 vfs_opterror(opts, "ip6 cannot be changed after creation");
  719                 goto done_errmsg;
  720         }
  721 #endif
  722 
  723         pr_allow = ch_allow = 0;
  724         for (bf = pr_flag_allow;
  725              bf < pr_flag_allow + nitems(pr_flag_allow) &&
  726                 atomic_load_int(&bf->flag) != 0;
  727              bf++) {
  728                 vfs_flagopt(opts, bf->name, &pr_allow, bf->flag);
  729                 vfs_flagopt(opts, bf->noname, &ch_allow, bf->flag);
  730         }
  731         ch_allow |= pr_allow;
  732 
  733         error = vfs_getopt(opts, "name", (void **)&name, &len);
  734         if (error == ENOENT)
  735                 name = NULL;
  736         else if (error != 0)
  737                 goto done_free;
  738         else {
  739                 if (len == 0 || name[len - 1] != '\0') {
  740                         error = EINVAL;
  741                         goto done_free;
  742                 }
  743                 if (len > MAXHOSTNAMELEN) {
  744                         error = ENAMETOOLONG;
  745                         goto done_free;
  746                 }
  747         }
  748 
  749         error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
  750         if (error == ENOENT)
  751                 host = NULL;
  752         else if (error != 0)
  753                 goto done_free;
  754         else {
  755                 ch_flags |= PR_HOST;
  756                 pr_flags |= PR_HOST;
  757                 if (len == 0 || host[len - 1] != '\0') {
  758                         error = EINVAL;
  759                         goto done_free;
  760                 }
  761                 if (len > MAXHOSTNAMELEN) {
  762                         error = ENAMETOOLONG;
  763                         goto done_free;
  764                 }
  765         }
  766 
  767         error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
  768         if (error == ENOENT)
  769                 domain = NULL;
  770         else if (error != 0)
  771                 goto done_free;
  772         else {
  773                 ch_flags |= PR_HOST;
  774                 pr_flags |= PR_HOST;
  775                 if (len == 0 || domain[len - 1] != '\0') {
  776                         error = EINVAL;
  777                         goto done_free;
  778                 }
  779                 if (len > MAXHOSTNAMELEN) {
  780                         error = ENAMETOOLONG;
  781                         goto done_free;
  782                 }
  783         }
  784 
  785         error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
  786         if (error == ENOENT)
  787                 uuid = NULL;
  788         else if (error != 0)
  789                 goto done_free;
  790         else {
  791                 ch_flags |= PR_HOST;
  792                 pr_flags |= PR_HOST;
  793                 if (len == 0 || uuid[len - 1] != '\0') {
  794                         error = EINVAL;
  795                         goto done_free;
  796                 }
  797                 if (len > HOSTUUIDLEN) {
  798                         error = ENAMETOOLONG;
  799                         goto done_free;
  800                 }
  801         }
  802 
  803 #ifdef COMPAT_FREEBSD32
  804         if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
  805                 uint32_t hid32;
  806 
  807                 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
  808                 hid = hid32;
  809         } else
  810 #endif
  811                 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
  812         if (error == ENOENT)
  813                 gothid = 0;
  814         else if (error != 0)
  815                 goto done_free;
  816         else {
  817                 gothid = 1;
  818                 ch_flags |= PR_HOST;
  819                 pr_flags |= PR_HOST;
  820         }
  821 
  822 #ifdef INET
  823         error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
  824         if (error == ENOENT)
  825                 ip4s = 0;
  826         else if (error != 0)
  827                 goto done_free;
  828         else if (ip4s & (sizeof(*ip4) - 1)) {
  829                 error = EINVAL;
  830                 goto done_free;
  831         } else {
  832                 ch_flags |= PR_IP4_USER;
  833                 pr_flags |= PR_IP4_USER;
  834                 if (ip4s > 0) {
  835                         ip4s /= sizeof(*ip4);
  836                         if (ip4s > jail_max_af_ips) {
  837                                 error = EINVAL;
  838                                 vfs_opterror(opts, "too many IPv4 addresses");
  839                                 goto done_errmsg;
  840                         }
  841                         ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
  842                         bcopy(op, ip4, ip4s * sizeof(*ip4));
  843                         /*
  844                          * IP addresses are all sorted but ip[0] to preserve
  845                          * the primary IP address as given from userland.
  846                          * This special IP is used for unbound outgoing
  847                          * connections as well for "loopback" traffic in case
  848                          * source address selection cannot find any more fitting
  849                          * address to connect from.
  850                          */
  851                         if (ip4s > 1)
  852                                 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4),
  853                                     prison_qcmp_v4);
  854                         /*
  855                          * Check for duplicate addresses and do some simple
  856                          * zero and broadcast checks. If users give other bogus
  857                          * addresses it is their problem.
  858                          *
  859                          * We do not have to care about byte order for these
  860                          * checks so we will do them in NBO.
  861                          */
  862                         for (ii = 0; ii < ip4s; ii++) {
  863                                 if (ip4[ii].s_addr == INADDR_ANY ||
  864                                     ip4[ii].s_addr == INADDR_BROADCAST) {
  865                                         error = EINVAL;
  866                                         goto done_free;
  867                                 }
  868                                 if ((ii+1) < ip4s &&
  869                                     (ip4[0].s_addr == ip4[ii+1].s_addr ||
  870                                      ip4[ii].s_addr == ip4[ii+1].s_addr)) {
  871                                         error = EINVAL;
  872                                         goto done_free;
  873                                 }
  874                         }
  875                 }
  876         }
  877 #endif
  878 
  879 #ifdef INET6
  880         error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
  881         if (error == ENOENT)
  882                 ip6s = 0;
  883         else if (error != 0)
  884                 goto done_free;
  885         else if (ip6s & (sizeof(*ip6) - 1)) {
  886                 error = EINVAL;
  887                 goto done_free;
  888         } else {
  889                 ch_flags |= PR_IP6_USER;
  890                 pr_flags |= PR_IP6_USER;
  891                 if (ip6s > 0) {
  892                         ip6s /= sizeof(*ip6);
  893                         if (ip6s > jail_max_af_ips) {
  894                                 error = EINVAL;
  895                                 vfs_opterror(opts, "too many IPv6 addresses");
  896                                 goto done_errmsg;
  897                         }
  898                         ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
  899                         bcopy(op, ip6, ip6s * sizeof(*ip6));
  900                         if (ip6s > 1)
  901                                 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6),
  902                                     prison_qcmp_v6);
  903                         for (ii = 0; ii < ip6s; ii++) {
  904                                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
  905                                         error = EINVAL;
  906                                         goto done_free;
  907                                 }
  908                                 if ((ii+1) < ip6s &&
  909                                     (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
  910                                      IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
  911                                 {
  912                                         error = EINVAL;
  913                                         goto done_free;
  914                                 }
  915                         }
  916                 }
  917         }
  918 #endif
  919 
  920 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
  921         if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
  922                 error = EINVAL;
  923                 vfs_opterror(opts,
  924                     "vnet jails cannot have IP address restrictions");
  925                 goto done_errmsg;
  926         }
  927 #endif
  928 
  929         error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len);
  930         if (error == ENOENT)
  931                 osrelstr = NULL;
  932         else if (error != 0)
  933                 goto done_free;
  934         else {
  935                 if (flags & JAIL_UPDATE) {
  936                         error = EINVAL;
  937                         vfs_opterror(opts,
  938                             "osrelease cannot be changed after creation");
  939                         goto done_errmsg;
  940                 }
  941                 if (len == 0 || osrelstr[len - 1] != '\0') {
  942                         error = EINVAL;
  943                         goto done_free;
  944                 }
  945                 if (len >= OSRELEASELEN) {
  946                         error = ENAMETOOLONG;
  947                         vfs_opterror(opts,
  948                             "osrelease string must be 1-%d bytes long",
  949                             OSRELEASELEN - 1);
  950                         goto done_errmsg;
  951                 }
  952         }
  953 
  954         error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt));
  955         if (error == ENOENT)
  956                 osreldt = 0;
  957         else if (error != 0)
  958                 goto done_free;
  959         else {
  960                 if (flags & JAIL_UPDATE) {
  961                         error = EINVAL;
  962                         vfs_opterror(opts,
  963                             "osreldate cannot be changed after creation");
  964                         goto done_errmsg;
  965                 }
  966                 if (osreldt == 0) {
  967                         error = EINVAL;
  968                         vfs_opterror(opts, "osreldate cannot be 0");
  969                         goto done_errmsg;
  970                 }
  971         }
  972 
  973         root = NULL;
  974         error = vfs_getopt(opts, "path", (void **)&path, &len);
  975         if (error == ENOENT)
  976                 path = NULL;
  977         else if (error != 0)
  978                 goto done_free;
  979         else {
  980                 if (flags & JAIL_UPDATE) {
  981                         error = EINVAL;
  982                         vfs_opterror(opts,
  983                             "path cannot be changed after creation");
  984                         goto done_errmsg;
  985                 }
  986                 if (len == 0 || path[len - 1] != '\0') {
  987                         error = EINVAL;
  988                         goto done_free;
  989                 }
  990                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
  991                     path, td);
  992                 error = namei(&nd);
  993                 if (error)
  994                         goto done_free;
  995                 root = nd.ni_vp;
  996                 NDFREE(&nd, NDF_ONLY_PNBUF);
  997                 g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
  998                 strlcpy(g_path, path, MAXPATHLEN);
  999                 error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN);
 1000                 if (error == 0) {
 1001                         path = g_path;
 1002                 } else {
 1003                         /* exit on other errors */
 1004                         goto done_free;
 1005                 }
 1006                 if (root->v_type != VDIR) {
 1007                         error = ENOTDIR;
 1008                         vput(root);
 1009                         goto done_free;
 1010                 }
 1011                 VOP_UNLOCK(root);
 1012         }
 1013 
 1014         /*
 1015          * Find the specified jail, or at least its parent.
 1016          * This abuses the file error codes ENOENT and EEXIST.
 1017          */
 1018         pr = NULL;
 1019         inspr = NULL;
 1020         if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
 1021                 namelc = strrchr(name, '.');
 1022                 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
 1023                 if (*p != '\0')
 1024                         jid = 0;
 1025         }
 1026         sx_xlock(&allprison_lock);
 1027         drflags = PD_LIST_XLOCKED;
 1028         ppr = mypr;
 1029         if (!prison_isalive(ppr)) {
 1030                 /* This jail is dying.  This process will surely follow. */
 1031                 error = EAGAIN;
 1032                 goto done_deref;
 1033         }
 1034         if (jid != 0) {
 1035                 if (jid < 0) {
 1036                         error = EINVAL;
 1037                         vfs_opterror(opts, "negative jid");
 1038                         goto done_deref;
 1039                 }
 1040                 /*
 1041                  * See if a requested jid already exists.  Keep track of
 1042                  * where it can be inserted later.
 1043                  */
 1044                 TAILQ_FOREACH(inspr, &allprison, pr_list) {
 1045                         if (inspr->pr_id < jid)
 1046                                 continue;
 1047                         if (inspr->pr_id > jid)
 1048                                 break;
 1049                         pr = inspr;
 1050                         mtx_lock(&pr->pr_mtx);
 1051                         drflags |= PD_LOCKED;
 1052                         inspr = NULL;
 1053                         break;
 1054                 }
 1055                 if (pr != NULL) {
 1056                         /* Create: jid must not exist. */
 1057                         if (cuflags == JAIL_CREATE) {
 1058                                 /*
 1059                                  * Even creators that cannot see the jail will
 1060                                  * get EEXIST.
 1061                                  */
 1062                                 error = EEXIST;
 1063                                 vfs_opterror(opts, "jail %d already exists",
 1064                                     jid);
 1065                                 goto done_deref;
 1066                         }
 1067                         if (!prison_ischild(mypr, pr)) {
 1068                                 /*
 1069                                  * Updaters get ENOENT if they cannot see the
 1070                                  * jail.  This is true even for CREATE | UPDATE,
 1071                                  * which normally cannot give this error.
 1072                                  */
 1073                                 error = ENOENT;
 1074                                 vfs_opterror(opts, "jail %d not found", jid);
 1075                                 goto done_deref;
 1076                         }
 1077                         ppr = pr->pr_parent;
 1078                         if (!prison_isalive(ppr)) {
 1079                                 error = ENOENT;
 1080                                 vfs_opterror(opts, "jail %d is dying",
 1081                                     ppr->pr_id);
 1082                                 goto done_deref;
 1083                         }
 1084                         if (!prison_isalive(pr)) {
 1085                                 if (!(flags & JAIL_DYING)) {
 1086                                         error = ENOENT;
 1087                                         vfs_opterror(opts, "jail %d is dying",
 1088                                             jid);
 1089                                         goto done_deref;
 1090                                 }
 1091                                 if ((flags & JAIL_ATTACH) ||
 1092                                     (pr_flags & PR_PERSIST)) {
 1093                                         /*
 1094                                          * A dying jail might be resurrected
 1095                                          * (via attach or persist), but first
 1096                                          * it must determine if another jail
 1097                                          * has claimed its name.  Accomplish
 1098                                          * this by implicitly re-setting the
 1099                                          * name.
 1100                                          */
 1101                                         if (name == NULL)
 1102                                                 name = prison_name(mypr, pr);
 1103                                 }
 1104                         }
 1105                 } else {
 1106                         /* Update: jid must exist. */
 1107                         if (cuflags == JAIL_UPDATE) {
 1108                                 error = ENOENT;
 1109                                 vfs_opterror(opts, "jail %d not found", jid);
 1110                                 goto done_deref;
 1111                         }
 1112                 }
 1113         }
 1114         /*
 1115          * If the caller provided a name, look for a jail by that name.
 1116          * This has different semantics for creates and updates keyed by jid
 1117          * (where the name must not already exist in a different jail),
 1118          * and updates keyed by the name itself (where the name must exist
 1119          * because that is the jail being updated).
 1120          */
 1121         namelc = NULL;
 1122         if (name != NULL) {
 1123                 namelc = strrchr(name, '.');
 1124                 if (namelc == NULL)
 1125                         namelc = name;
 1126                 else {
 1127                         /*
 1128                          * This is a hierarchical name.  Split it into the
 1129                          * parent and child names, and make sure the parent
 1130                          * exists or matches an already found jail.
 1131                          */
 1132                         if (pr != NULL) {
 1133                                 if (strncmp(name, ppr->pr_name, namelc - name)
 1134                                     || ppr->pr_name[namelc - name] != '\0') {
 1135                                         error = EINVAL;
 1136                                         vfs_opterror(opts,
 1137                                             "cannot change jail's parent");
 1138                                         goto done_deref;
 1139                                 }
 1140                         } else {
 1141                                 *namelc = '\0';
 1142                                 ppr = prison_find_name(mypr, name);
 1143                                 if (ppr == NULL) {
 1144                                         error = ENOENT;
 1145                                         vfs_opterror(opts,
 1146                                             "jail \"%s\" not found", name);
 1147                                         goto done_deref;
 1148                                 }
 1149                                 mtx_unlock(&ppr->pr_mtx);
 1150                                 if (!prison_isalive(ppr)) {
 1151                                         error = ENOENT;
 1152                                         vfs_opterror(opts,
 1153                                             "jail \"%s\" is dying", name);
 1154                                         goto done_deref;
 1155                                 }
 1156                                 *namelc = '.';
 1157                         }
 1158                         namelc++;
 1159                 }
 1160                 if (namelc[0] != '\0') {
 1161                         pnamelen =
 1162                             (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 1163                         deadpr = NULL;
 1164                         FOREACH_PRISON_CHILD(ppr, tpr) {
 1165                                 if (tpr != pr &&
 1166                                     !strcmp(tpr->pr_name + pnamelen, namelc)) {
 1167                                         if (prison_isalive(tpr)) {
 1168                                                 if (pr == NULL &&
 1169                                                     cuflags != JAIL_CREATE) {
 1170                                                         /*
 1171                                                          * Use this jail
 1172                                                          * for updates.
 1173                                                          */
 1174                                                         pr = tpr;
 1175                                                         mtx_lock(&pr->pr_mtx);
 1176                                                         drflags |= PD_LOCKED;
 1177                                                         break;
 1178                                                 }
 1179                                                 /*
 1180                                                  * Create, or update(jid):
 1181                                                  * name must not exist in an
 1182                                                  * active sibling jail.
 1183                                                  */
 1184                                                 error = EEXIST;
 1185                                                 vfs_opterror(opts,
 1186                                                    "jail \"%s\" already exists",
 1187                                                    name);
 1188                                                 goto done_deref;
 1189                                         }
 1190                                         if (pr == NULL &&
 1191                                             cuflags != JAIL_CREATE) {
 1192                                                 deadpr = tpr;
 1193                                         }
 1194                                 }
 1195                         }
 1196                         /* If no active jail is found, use a dying one. */
 1197                         if (deadpr != NULL && pr == NULL) {
 1198                                 if (flags & JAIL_DYING) {
 1199                                         pr = deadpr;
 1200                                         mtx_lock(&pr->pr_mtx);
 1201                                         drflags |= PD_LOCKED;
 1202                                 } else if (cuflags == JAIL_UPDATE) {
 1203                                         error = ENOENT;
 1204                                         vfs_opterror(opts,
 1205                                             "jail \"%s\" is dying", name);
 1206                                         goto done_deref;
 1207                                 }
 1208                         }
 1209                         /* Update: name must exist if no jid. */
 1210                         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1211                                 error = ENOENT;
 1212                                 vfs_opterror(opts, "jail \"%s\" not found",
 1213                                     name);
 1214                                 goto done_deref;
 1215                         }
 1216                 }
 1217         }
 1218         /* Update: must provide a jid or name. */
 1219         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1220                 error = ENOENT;
 1221                 vfs_opterror(opts, "update specified no jail");
 1222                 goto done_deref;
 1223         }
 1224 
 1225         /* If there's no prison to update, create a new one and link it in. */
 1226         created = pr == NULL;
 1227         if (created) {
 1228                 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
 1229                         if (tpr->pr_childcount >= tpr->pr_childmax) {
 1230                                 error = EPERM;
 1231                                 vfs_opterror(opts, "prison limit exceeded");
 1232                                 goto done_deref;
 1233                         }
 1234                 if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
 1235                         error = EAGAIN;
 1236                         vfs_opterror(opts, "no available jail IDs");
 1237                         goto done_deref;
 1238                 }
 1239 
 1240                 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
 1241                 pr->pr_state = PRISON_STATE_INVALID;
 1242                 refcount_init(&pr->pr_ref, 1);
 1243                 refcount_init(&pr->pr_uref, 0);
 1244                 drflags |= PD_DEREF;
 1245                 LIST_INIT(&pr->pr_children);
 1246                 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
 1247                 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 1248 
 1249                 pr->pr_id = jid;
 1250                 if (inspr != NULL)
 1251                         TAILQ_INSERT_BEFORE(inspr, pr, pr_list);
 1252                 else
 1253                         TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
 1254 
 1255                 pr->pr_parent = ppr;
 1256                 prison_hold(ppr);
 1257                 prison_proc_hold(ppr);
 1258                 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
 1259                 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 1260                         tpr->pr_childcount++;
 1261 
 1262                 /* Set some default values, and inherit some from the parent. */
 1263                 if (namelc == NULL)
 1264                         namelc = "";
 1265                 if (path == NULL) {
 1266                         path = "/";
 1267                         root = mypr->pr_root;
 1268                         vref(root);
 1269                 }
 1270                 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
 1271                 pr->pr_flags |= PR_HOST;
 1272 #if defined(INET) || defined(INET6)
 1273 #ifdef VIMAGE
 1274                 if (!(pr_flags & PR_VNET))
 1275 #endif
 1276                 {
 1277 #ifdef INET
 1278                         if (!(ch_flags & PR_IP4_USER))
 1279                                 pr->pr_flags |= PR_IP4 | PR_IP4_USER;
 1280                         else if (!(pr_flags & PR_IP4_USER)) {
 1281                                 pr->pr_flags |= ppr->pr_flags & PR_IP4;
 1282                                 if (ppr->pr_ip4 != NULL) {
 1283                                         pr->pr_ip4s = ppr->pr_ip4s;
 1284                                         pr->pr_ip4 = malloc(pr->pr_ip4s *
 1285                                             sizeof(struct in_addr), M_PRISON,
 1286                                             M_WAITOK);
 1287                                         bcopy(ppr->pr_ip4, pr->pr_ip4,
 1288                                             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 1289                                 }
 1290                         }
 1291 #endif
 1292 #ifdef INET6
 1293                         if (!(ch_flags & PR_IP6_USER))
 1294                                 pr->pr_flags |= PR_IP6 | PR_IP6_USER;
 1295                         else if (!(pr_flags & PR_IP6_USER)) {
 1296                                 pr->pr_flags |= ppr->pr_flags & PR_IP6;
 1297                                 if (ppr->pr_ip6 != NULL) {
 1298                                         pr->pr_ip6s = ppr->pr_ip6s;
 1299                                         pr->pr_ip6 = malloc(pr->pr_ip6s *
 1300                                             sizeof(struct in6_addr), M_PRISON,
 1301                                             M_WAITOK);
 1302                                         bcopy(ppr->pr_ip6, pr->pr_ip6,
 1303                                             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 1304                                 }
 1305                         }
 1306 #endif
 1307                 }
 1308 #endif
 1309                 /* Source address selection is always on by default. */
 1310                 pr->pr_flags |= _PR_IP_SADDRSEL;
 1311 
 1312                 pr->pr_securelevel = ppr->pr_securelevel;
 1313                 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
 1314                 pr->pr_enforce_statfs = jail_default_enforce_statfs;
 1315                 pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum;
 1316 
 1317                 pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate;
 1318                 if (osrelstr == NULL)
 1319                         strlcpy(pr->pr_osrelease, ppr->pr_osrelease,
 1320                             sizeof(pr->pr_osrelease));
 1321                 else
 1322                         strlcpy(pr->pr_osrelease, osrelstr,
 1323                             sizeof(pr->pr_osrelease));
 1324 
 1325 #ifdef VIMAGE
 1326                 /* Allocate a new vnet if specified. */
 1327                 pr->pr_vnet = (pr_flags & PR_VNET)
 1328                     ? vnet_alloc() : ppr->pr_vnet;
 1329 #endif
 1330                 /*
 1331                  * Allocate a dedicated cpuset for each jail.
 1332                  * Unlike other initial settings, this may return an error.
 1333                  */
 1334                 error = cpuset_create_root(ppr, &pr->pr_cpuset);
 1335                 if (error)
 1336                         goto done_deref;
 1337 
 1338                 mtx_lock(&pr->pr_mtx);
 1339                 drflags |= PD_LOCKED;
 1340         } else {
 1341                 /*
 1342                  * Grab a reference for existing prisons, to ensure they
 1343                  * continue to exist for the duration of the call.
 1344                  */
 1345                 prison_hold(pr);
 1346                 drflags |= PD_DEREF;
 1347 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 1348                 if ((pr->pr_flags & PR_VNET) &&
 1349                     (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 1350                         error = EINVAL;
 1351                         vfs_opterror(opts,
 1352                             "vnet jails cannot have IP address restrictions");
 1353                         goto done_deref;
 1354                 }
 1355 #endif
 1356 #ifdef INET
 1357                 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1358                         error = EINVAL;
 1359                         vfs_opterror(opts,
 1360                             "ip4 cannot be changed after creation");
 1361                         goto done_deref;
 1362                 }
 1363 #endif
 1364 #ifdef INET6
 1365                 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1366                         error = EINVAL;
 1367                         vfs_opterror(opts,
 1368                             "ip6 cannot be changed after creation");
 1369                         goto done_deref;
 1370                 }
 1371 #endif
 1372         }
 1373 
 1374         /* Do final error checking before setting anything. */
 1375         if (gotslevel) {
 1376                 if (slevel < ppr->pr_securelevel) {
 1377                         error = EPERM;
 1378                         goto done_deref;
 1379                 }
 1380         }
 1381         if (gotchildmax) {
 1382                 if (childmax >= ppr->pr_childmax) {
 1383                         error = EPERM;
 1384                         goto done_deref;
 1385                 }
 1386         }
 1387         if (gotenforce) {
 1388                 if (enforce < ppr->pr_enforce_statfs) {
 1389                         error = EPERM;
 1390                         goto done_deref;
 1391                 }
 1392         }
 1393         if (gotrsnum) {
 1394                 /*
 1395                  * devfs_rsnum is a uint16_t
 1396                  */
 1397                 if (rsnum < 0 || rsnum > 65535) {
 1398                         error = EINVAL;
 1399                         goto done_deref;
 1400                 }
 1401                 /*
 1402                  * Nested jails always inherit parent's devfs ruleset
 1403                  */
 1404                 if (jailed(td->td_ucred)) {
 1405                         if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) {
 1406                                 error = EPERM;
 1407                                 goto done_deref;
 1408                         } else
 1409                                 rsnum = ppr->pr_devfs_rsnum;
 1410                 }
 1411         }
 1412 #ifdef INET
 1413         if (ip4s > 0) {
 1414                 if (ppr->pr_flags & PR_IP4) {
 1415                         /*
 1416                          * Make sure the new set of IP addresses is a
 1417                          * subset of the parent's list.  Don't worry
 1418                          * about the parent being unlocked, as any
 1419                          * setting is done with allprison_lock held.
 1420                          */
 1421                         for (ij = 0; ij < ppr->pr_ip4s; ij++)
 1422                                 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 1423                                         break;
 1424                         if (ij == ppr->pr_ip4s) {
 1425                                 error = EPERM;
 1426                                 goto done_deref;
 1427                         }
 1428                         if (ip4s > 1) {
 1429                                 for (ii = ij = 1; ii < ip4s; ii++) {
 1430                                         if (ip4[ii].s_addr ==
 1431                                             ppr->pr_ip4[0].s_addr)
 1432                                                 continue;
 1433                                         for (; ij < ppr->pr_ip4s; ij++)
 1434                                                 if (ip4[ii].s_addr ==
 1435                                                     ppr->pr_ip4[ij].s_addr)
 1436                                                         break;
 1437                                         if (ij == ppr->pr_ip4s)
 1438                                                 break;
 1439                                 }
 1440                                 if (ij == ppr->pr_ip4s) {
 1441                                         error = EPERM;
 1442                                         goto done_deref;
 1443                                 }
 1444                         }
 1445                 }
 1446                 /*
 1447                  * Check for conflicting IP addresses.  We permit them
 1448                  * if there is no more than one IP on each jail.  If
 1449                  * there is a duplicate on a jail with more than one
 1450                  * IP stop checking and return error.
 1451                  */
 1452 #ifdef VIMAGE
 1453                 for (tppr = ppr; tppr != &prison0; tppr = tppr->pr_parent)
 1454                         if (tppr->pr_flags & PR_VNET)
 1455                                 break;
 1456 #else
 1457                 tppr = &prison0;
 1458 #endif
 1459                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1460                         if (tpr == pr ||
 1461 #ifdef VIMAGE
 1462                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1463 #endif
 1464                             !prison_isalive(tpr)) {
 1465                                 descend = 0;
 1466                                 continue;
 1467                         }
 1468                         if (!(tpr->pr_flags & PR_IP4_USER))
 1469                                 continue;
 1470                         descend = 0;
 1471                         if (tpr->pr_ip4 == NULL ||
 1472                             (ip4s == 1 && tpr->pr_ip4s == 1))
 1473                                 continue;
 1474                         for (ii = 0; ii < ip4s; ii++) {
 1475                                 if (prison_check_ip4_locked(tpr, &ip4[ii]) ==
 1476                                     0) {
 1477                                         error = EADDRINUSE;
 1478                                         vfs_opterror(opts,
 1479                                             "IPv4 addresses clash");
 1480                                         goto done_deref;
 1481                                 }
 1482                         }
 1483                 }
 1484         }
 1485 #endif
 1486 #ifdef INET6
 1487         if (ip6s > 0) {
 1488                 if (ppr->pr_flags & PR_IP6) {
 1489                         /*
 1490                          * Make sure the new set of IP addresses is a
 1491                          * subset of the parent's list.
 1492                          */
 1493                         for (ij = 0; ij < ppr->pr_ip6s; ij++)
 1494                                 if (IN6_ARE_ADDR_EQUAL(&ip6[0],
 1495                                     &ppr->pr_ip6[ij]))
 1496                                         break;
 1497                         if (ij == ppr->pr_ip6s) {
 1498                                 error = EPERM;
 1499                                 goto done_deref;
 1500                         }
 1501                         if (ip6s > 1) {
 1502                                 for (ii = ij = 1; ii < ip6s; ii++) {
 1503                                         if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
 1504                                              &ppr->pr_ip6[0]))
 1505                                                 continue;
 1506                                         for (; ij < ppr->pr_ip6s; ij++)
 1507                                                 if (IN6_ARE_ADDR_EQUAL(
 1508                                                     &ip6[ii], &ppr->pr_ip6[ij]))
 1509                                                         break;
 1510                                         if (ij == ppr->pr_ip6s)
 1511                                                 break;
 1512                                 }
 1513                                 if (ij == ppr->pr_ip6s) {
 1514                                         error = EPERM;
 1515                                         goto done_deref;
 1516                                 }
 1517                         }
 1518                 }
 1519                 /* Check for conflicting IP addresses. */
 1520 #ifdef VIMAGE
 1521                 for (tppr = ppr; tppr != &prison0; tppr = tppr->pr_parent)
 1522                         if (tppr->pr_flags & PR_VNET)
 1523                                 break;
 1524 #else
 1525                 tppr = &prison0;
 1526 #endif
 1527                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1528                         if (tpr == pr ||
 1529 #ifdef VIMAGE
 1530                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1531 #endif
 1532                             !prison_isalive(tpr)) {
 1533                                 descend = 0;
 1534                                 continue;
 1535                         }
 1536                         if (!(tpr->pr_flags & PR_IP6_USER))
 1537                                 continue;
 1538                         descend = 0;
 1539                         if (tpr->pr_ip6 == NULL ||
 1540                             (ip6s == 1 && tpr->pr_ip6s == 1))
 1541                                 continue;
 1542                         for (ii = 0; ii < ip6s; ii++) {
 1543                                 if (prison_check_ip6_locked(tpr, &ip6[ii]) ==
 1544                                     0) {
 1545                                         error = EADDRINUSE;
 1546                                         vfs_opterror(opts,
 1547                                             "IPv6 addresses clash");
 1548                                         goto done_deref;
 1549                                 }
 1550                         }
 1551                 }
 1552         }
 1553 #endif
 1554         onamelen = namelen = 0;
 1555         if (namelc != NULL) {
 1556                 /* Give a default name of the jid.  Also allow the name to be
 1557                  * explicitly the jid - but not any other number, and only in
 1558                  * normal form (no leading zero/etc).
 1559                  */
 1560                 if (namelc[0] == '\0')
 1561                         snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
 1562                 else if ((strtoul(namelc, &p, 10) != jid ||
 1563                           namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
 1564                         error = EINVAL;
 1565                         vfs_opterror(opts,
 1566                             "name cannot be numeric (unless it is the jid)");
 1567                         goto done_deref;
 1568                 }
 1569                 /*
 1570                  * Make sure the name isn't too long for the prison or its
 1571                  * children.
 1572                  */
 1573                 pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 1574                 onamelen = strlen(pr->pr_name + pnamelen);
 1575                 namelen = strlen(namelc);
 1576                 if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
 1577                         error = ENAMETOOLONG;
 1578                         goto done_deref;
 1579                 }
 1580                 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
 1581                         if (strlen(tpr->pr_name) + (namelen - onamelen) >=
 1582                             sizeof(pr->pr_name)) {
 1583                                 error = ENAMETOOLONG;
 1584                                 goto done_deref;
 1585                         }
 1586                 }
 1587         }
 1588         pr_allow_diff = pr_allow & ~ppr->pr_allow;
 1589         if (pr_allow_diff & ~PR_ALLOW_DIFFERENCES) {
 1590                 error = EPERM;
 1591                 goto done_deref;
 1592         }
 1593 
 1594         /*
 1595          * Let modules check their parameters.  This requires unlocking and
 1596          * then re-locking the prison, but this is still a valid state as long
 1597          * as allprison_lock remains xlocked.
 1598          */
 1599         mtx_unlock(&pr->pr_mtx);
 1600         drflags &= ~PD_LOCKED;
 1601         error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
 1602         if (error != 0)
 1603                 goto done_deref;
 1604         mtx_lock(&pr->pr_mtx);
 1605         drflags |= PD_LOCKED;
 1606 
 1607         /* At this point, all valid parameters should have been noted. */
 1608         TAILQ_FOREACH(opt, opts, link) {
 1609                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
 1610                         error = EINVAL;
 1611                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
 1612                         goto done_deref;
 1613                 }
 1614         }
 1615 
 1616         /* Set the parameters of the prison. */
 1617 #ifdef INET
 1618         redo_ip4 = 0;
 1619         if (pr_flags & PR_IP4_USER) {
 1620                 pr->pr_flags |= PR_IP4;
 1621                 free(pr->pr_ip4, M_PRISON);
 1622                 pr->pr_ip4s = ip4s;
 1623                 pr->pr_ip4 = ip4;
 1624                 ip4 = NULL;
 1625                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1626 #ifdef VIMAGE
 1627                         if (tpr->pr_flags & PR_VNET) {
 1628                                 descend = 0;
 1629                                 continue;
 1630                         }
 1631 #endif
 1632                         if (prison_restrict_ip4(tpr, NULL)) {
 1633                                 redo_ip4 = 1;
 1634                                 descend = 0;
 1635                         }
 1636                 }
 1637         }
 1638 #endif
 1639 #ifdef INET6
 1640         redo_ip6 = 0;
 1641         if (pr_flags & PR_IP6_USER) {
 1642                 pr->pr_flags |= PR_IP6;
 1643                 free(pr->pr_ip6, M_PRISON);
 1644                 pr->pr_ip6s = ip6s;
 1645                 pr->pr_ip6 = ip6;
 1646                 ip6 = NULL;
 1647                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1648 #ifdef VIMAGE
 1649                         if (tpr->pr_flags & PR_VNET) {
 1650                                 descend = 0;
 1651                                 continue;
 1652                         }
 1653 #endif
 1654                         if (prison_restrict_ip6(tpr, NULL)) {
 1655                                 redo_ip6 = 1;
 1656                                 descend = 0;
 1657                         }
 1658                 }
 1659         }
 1660 #endif
 1661         if (gotslevel) {
 1662                 pr->pr_securelevel = slevel;
 1663                 /* Set all child jails to be at least this level. */
 1664                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1665                         if (tpr->pr_securelevel < slevel)
 1666                                 tpr->pr_securelevel = slevel;
 1667         }
 1668         if (gotchildmax) {
 1669                 pr->pr_childmax = childmax;
 1670                 /* Set all child jails to under this limit. */
 1671                 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
 1672                         if (tpr->pr_childmax > childmax - level)
 1673                                 tpr->pr_childmax = childmax > level
 1674                                     ? childmax - level : 0;
 1675         }
 1676         if (gotenforce) {
 1677                 pr->pr_enforce_statfs = enforce;
 1678                 /* Pass this restriction on to the children. */
 1679                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1680                         if (tpr->pr_enforce_statfs < enforce)
 1681                                 tpr->pr_enforce_statfs = enforce;
 1682         }
 1683         if (gotrsnum) {
 1684                 pr->pr_devfs_rsnum = rsnum;
 1685                 /* Pass this restriction on to the children. */
 1686                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1687                         tpr->pr_devfs_rsnum = rsnum;
 1688         }
 1689         if (namelc != NULL) {
 1690                 if (ppr == &prison0)
 1691                         strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
 1692                 else
 1693                         snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
 1694                             ppr->pr_name, namelc);
 1695                 /* Change this component of child names. */
 1696                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1697                         bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
 1698                             strlen(tpr->pr_name + onamelen) + 1);
 1699                         bcopy(pr->pr_name, tpr->pr_name, namelen);
 1700                 }
 1701         }
 1702         if (path != NULL) {
 1703                 /* Try to keep a real-rooted full pathname. */
 1704                 strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
 1705                 pr->pr_root = root;
 1706                 root = NULL;
 1707         }
 1708         if (PR_HOST & ch_flags & ~pr_flags) {
 1709                 if (pr->pr_flags & PR_HOST) {
 1710                         /*
 1711                          * Copy the parent's host info.  As with pr_ip4 above,
 1712                          * the lack of a lock on the parent is not a problem;
 1713                          * it is always set with allprison_lock at least
 1714                          * shared, and is held exclusively here.
 1715                          */
 1716                         strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
 1717                             sizeof(pr->pr_hostname));
 1718                         strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
 1719                             sizeof(pr->pr_domainname));
 1720                         strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
 1721                             sizeof(pr->pr_hostuuid));
 1722                         pr->pr_hostid = pr->pr_parent->pr_hostid;
 1723                 }
 1724         } else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
 1725                 /* Set this prison, and any descendants without PR_HOST. */
 1726                 if (host != NULL)
 1727                         strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
 1728                 if (domain != NULL)
 1729                         strlcpy(pr->pr_domainname, domain, 
 1730                             sizeof(pr->pr_domainname));
 1731                 if (uuid != NULL)
 1732                         strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
 1733                 if (gothid)
 1734                         pr->pr_hostid = hid;
 1735                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1736                         if (tpr->pr_flags & PR_HOST)
 1737                                 descend = 0;
 1738                         else {
 1739                                 if (host != NULL)
 1740                                         strlcpy(tpr->pr_hostname,
 1741                                             pr->pr_hostname,
 1742                                             sizeof(tpr->pr_hostname));
 1743                                 if (domain != NULL)
 1744                                         strlcpy(tpr->pr_domainname, 
 1745                                             pr->pr_domainname,
 1746                                             sizeof(tpr->pr_domainname));
 1747                                 if (uuid != NULL)
 1748                                         strlcpy(tpr->pr_hostuuid,
 1749                                             pr->pr_hostuuid,
 1750                                             sizeof(tpr->pr_hostuuid));
 1751                                 if (gothid)
 1752                                         tpr->pr_hostid = hid;
 1753                         }
 1754                 }
 1755         }
 1756         pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
 1757         if ((tallow = ch_allow & ~pr_allow))
 1758                 prison_set_allow_locked(pr, tallow, 0);
 1759         /*
 1760          * Persistent prisons get an extra reference, and prisons losing their
 1761          * persist flag lose that reference.
 1762          */
 1763         born = !prison_isalive(pr);
 1764         if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
 1765                 if (pr_flags & PR_PERSIST) {
 1766                         prison_hold(pr);
 1767                         /*
 1768                          * This may make a dead prison alive again, but wait
 1769                          * to label it as such until after OSD calls have had
 1770                          * a chance to run (and perhaps to fail).
 1771                          */
 1772                         refcount_acquire(&pr->pr_uref);
 1773                 } else {
 1774                         drflags |= PD_DEUREF;
 1775                         prison_free_not_last(pr);
 1776                 }
 1777         }
 1778         pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
 1779         mtx_unlock(&pr->pr_mtx);
 1780         drflags &= ~PD_LOCKED;
 1781         /*
 1782          * Any errors past this point will need to de-persist newly created
 1783          * prisons, as well as call remove methods.
 1784          */
 1785         if (born)
 1786                 drflags |= PD_KILL;
 1787 
 1788 #ifdef RACCT
 1789         if (racct_enable && created)
 1790                 prison_racct_attach(pr);
 1791 #endif
 1792 
 1793         /* Locks may have prevented a complete restriction of child IP
 1794          * addresses.  If so, allocate some more memory and try again.
 1795          */
 1796 #ifdef INET
 1797         while (redo_ip4) {
 1798                 ip4s = pr->pr_ip4s;
 1799                 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 1800                 mtx_lock(&pr->pr_mtx);
 1801                 redo_ip4 = 0;
 1802                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1803 #ifdef VIMAGE
 1804                         if (tpr->pr_flags & PR_VNET) {
 1805                                 descend = 0;
 1806                                 continue;
 1807                         }
 1808 #endif
 1809                         if (prison_restrict_ip4(tpr, ip4)) {
 1810                                 if (ip4 != NULL)
 1811                                         ip4 = NULL;
 1812                                 else
 1813                                         redo_ip4 = 1;
 1814                         }
 1815                 }
 1816                 mtx_unlock(&pr->pr_mtx);
 1817         }
 1818 #endif
 1819 #ifdef INET6
 1820         while (redo_ip6) {
 1821                 ip6s = pr->pr_ip6s;
 1822                 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 1823                 mtx_lock(&pr->pr_mtx);
 1824                 redo_ip6 = 0;
 1825                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1826 #ifdef VIMAGE
 1827                         if (tpr->pr_flags & PR_VNET) {
 1828                                 descend = 0;
 1829                                 continue;
 1830                         }
 1831 #endif
 1832                         if (prison_restrict_ip6(tpr, ip6)) {
 1833                                 if (ip6 != NULL)
 1834                                         ip6 = NULL;
 1835                                 else
 1836                                         redo_ip6 = 1;
 1837                         }
 1838                 }
 1839                 mtx_unlock(&pr->pr_mtx);
 1840         }
 1841 #endif
 1842 
 1843         /* Let the modules do their work. */
 1844         if (born) {
 1845                 error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
 1846                 if (error)
 1847                         goto done_deref;
 1848         }
 1849         error = osd_jail_call(pr, PR_METHOD_SET, opts);
 1850         if (error)
 1851                 goto done_deref;
 1852 
 1853         /*
 1854          * A new prison is now ready to be seen; either it has gained a user
 1855          * reference via persistence, or is about to gain one via attachment.
 1856          */
 1857         if (born) {
 1858                 drflags = prison_lock_xlock(pr, drflags);
 1859                 pr->pr_state = PRISON_STATE_ALIVE;
 1860         }
 1861 
 1862         /* Attach this process to the prison if requested. */
 1863         if (flags & JAIL_ATTACH) {
 1864                 error = do_jail_attach(td, pr,
 1865                     prison_lock_xlock(pr, drflags & PD_LOCK_FLAGS));
 1866                 drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
 1867                 if (error) {
 1868                         vfs_opterror(opts, "attach failed");
 1869                         goto done_deref;
 1870                 }
 1871         }
 1872 
 1873 #ifdef RACCT
 1874         if (racct_enable && !created) {
 1875                 if (drflags & PD_LOCKED) {
 1876                         mtx_unlock(&pr->pr_mtx);
 1877                         drflags &= ~PD_LOCKED;
 1878                 }
 1879                 if (drflags & PD_LIST_XLOCKED) {
 1880                         sx_xunlock(&allprison_lock);
 1881                         drflags &= ~PD_LIST_XLOCKED;
 1882                 }
 1883                 prison_racct_modify(pr);
 1884         }
 1885 #endif
 1886 
 1887         drflags &= ~PD_KILL;
 1888         td->td_retval[0] = pr->pr_id;
 1889 
 1890  done_deref:
 1891         /* Release any temporary prison holds and/or locks. */
 1892         if (pr != NULL)
 1893                 prison_deref(pr, drflags);
 1894         else if (drflags & PD_LIST_SLOCKED)
 1895                 sx_sunlock(&allprison_lock);
 1896         else if (drflags & PD_LIST_XLOCKED)
 1897                 sx_xunlock(&allprison_lock);
 1898         if (root != NULL)
 1899                 vrele(root);
 1900  done_errmsg:
 1901         if (error) {
 1902                 /* Write the error message back to userspace. */
 1903                 if (vfs_getopt(opts, "errmsg", (void **)&errmsg,
 1904                     &errmsg_len) == 0 && errmsg_len > 0) {
 1905                         errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
 1906                         if (optuio->uio_segflg == UIO_SYSSPACE)
 1907                                 bcopy(errmsg,
 1908                                     optuio->uio_iov[errmsg_pos].iov_base,
 1909                                     errmsg_len);
 1910                         else
 1911                                 copyout(errmsg,
 1912                                     optuio->uio_iov[errmsg_pos].iov_base,
 1913                                     errmsg_len);
 1914                 }
 1915         }
 1916  done_free:
 1917 #ifdef INET
 1918         free(ip4, M_PRISON);
 1919 #endif
 1920 #ifdef INET6
 1921         free(ip6, M_PRISON);
 1922 #endif
 1923         if (g_path != NULL)
 1924                 free(g_path, M_TEMP);
 1925         vfs_freeopts(opts);
 1926         return (error);
 1927 }
 1928 
 1929 /*
 1930  * Find the next available prison ID.  Return the ID on success, or zero
 1931  * on failure.  Also set a pointer to the allprison list entry the prison
 1932  * should be inserted before.
 1933  */
 1934 static int
 1935 get_next_prid(struct prison **insprp)
 1936 {
 1937         struct prison *inspr;
 1938         int jid, maxid;
 1939 
 1940         jid = lastprid % JAIL_MAX + 1;
 1941         if (TAILQ_EMPTY(&allprison) ||
 1942             TAILQ_LAST(&allprison, prisonlist)->pr_id < jid) {
 1943                 /*
 1944                  * A common case is for all jails to be implicitly numbered,
 1945                  * which means they'll go on the end of the list, at least
 1946                  * for the first JAIL_MAX times.
 1947                  */
 1948                 inspr = NULL;
 1949         } else {
 1950                 /*
 1951                  * Take two passes through the allprison list: first starting
 1952                  * with the proposed jid, then ending with it.
 1953                  */
 1954                 for (maxid = JAIL_MAX; maxid != 0; ) {
 1955                         TAILQ_FOREACH(inspr, &allprison, pr_list) {
 1956                                 if (inspr->pr_id < jid)
 1957                                         continue;
 1958                                 if (inspr->pr_id > jid) {
 1959                                         /* Found an opening. */
 1960                                         maxid = 0;
 1961                                         break;
 1962                                 }
 1963                                 if (++jid > maxid) {
 1964                                         if (lastprid == maxid || lastprid == 0)
 1965                                         {
 1966                                                 /*
 1967                                                  * The entire legal range
 1968                                                  * has been traversed
 1969                                                  */
 1970                                                 return 0;
 1971                                         }
 1972                                         /* Try again from the start. */
 1973                                         jid = 1;
 1974                                         maxid = lastprid;
 1975                                         break;
 1976                                 }
 1977                         }
 1978                         if (inspr == NULL) {
 1979                                 /* Found room at the end of the list. */
 1980                                 break;
 1981                         }
 1982                 }
 1983         }
 1984         *insprp = inspr;
 1985         lastprid = jid;
 1986         return (jid);
 1987 }
 1988 
 1989 /*
 1990  * struct jail_get_args {
 1991  *      struct iovec *iovp;
 1992  *      unsigned int iovcnt;
 1993  *      int flags;
 1994  * };
 1995  */
 1996 int
 1997 sys_jail_get(struct thread *td, struct jail_get_args *uap)
 1998 {
 1999         struct uio *auio;
 2000         int error;
 2001 
 2002         /* Check that we have an even number of iovecs. */
 2003         if (uap->iovcnt & 1)
 2004                 return (EINVAL);
 2005 
 2006         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 2007         if (error)
 2008                 return (error);
 2009         error = kern_jail_get(td, auio, uap->flags);
 2010         if (error == 0)
 2011                 error = copyout(auio->uio_iov, uap->iovp,
 2012                     uap->iovcnt * sizeof (struct iovec));
 2013         free(auio, M_IOV);
 2014         return (error);
 2015 }
 2016 
 2017 int
 2018 kern_jail_get(struct thread *td, struct uio *optuio, int flags)
 2019 {
 2020         struct bool_flags *bf;
 2021         struct jailsys_flags *jsf;
 2022         struct prison *pr, *mypr;
 2023         struct vfsopt *opt;
 2024         struct vfsoptlist *opts;
 2025         char *errmsg, *name;
 2026         int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos;
 2027         unsigned f;
 2028 
 2029         if (flags & ~JAIL_GET_MASK)
 2030                 return (EINVAL);
 2031 
 2032         /* Get the parameter list. */
 2033         error = vfs_buildopts(optuio, &opts);
 2034         if (error)
 2035                 return (error);
 2036         errmsg_pos = vfs_getopt_pos(opts, "errmsg");
 2037         mypr = td->td_ucred->cr_prison;
 2038         pr = NULL;
 2039 
 2040         /*
 2041          * Find the prison specified by one of: lastjid, jid, name.
 2042          */
 2043         sx_slock(&allprison_lock);
 2044         drflags = PD_LIST_SLOCKED;
 2045         error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
 2046         if (error == 0) {
 2047                 TAILQ_FOREACH(pr, &allprison, pr_list) {
 2048                         if (pr->pr_id > jid &&
 2049                             ((flags & JAIL_DYING) || prison_isalive(pr)) &&
 2050                             prison_ischild(mypr, pr)) {
 2051                                 mtx_lock(&pr->pr_mtx);
 2052                                 drflags |= PD_LOCKED;
 2053                                 goto found_prison;
 2054                         }
 2055                 }
 2056                 error = ENOENT;
 2057                 vfs_opterror(opts, "no jail after %d", jid);
 2058                 goto done;
 2059         } else if (error != ENOENT)
 2060                 goto done;
 2061 
 2062         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 2063         if (error == 0) {
 2064                 if (jid != 0) {
 2065                         pr = prison_find_child(mypr, jid);
 2066                         if (pr != NULL) {
 2067                                 drflags |= PD_LOCKED;
 2068                                 if (!(prison_isalive(pr) ||
 2069                                     (flags & JAIL_DYING))) {
 2070                                         error = ENOENT;
 2071                                         vfs_opterror(opts, "jail %d is dying",
 2072                                             jid);
 2073                                         goto done;
 2074                                 }
 2075                                 goto found_prison;
 2076                         }
 2077                         error = ENOENT;
 2078                         vfs_opterror(opts, "jail %d not found", jid);
 2079                         goto done;
 2080                 }
 2081         } else if (error != ENOENT)
 2082                 goto done;
 2083 
 2084         error = vfs_getopt(opts, "name", (void **)&name, &len);
 2085         if (error == 0) {
 2086                 if (len == 0 || name[len - 1] != '\0') {
 2087                         error = EINVAL;
 2088                         goto done;
 2089                 }
 2090                 pr = prison_find_name(mypr, name);
 2091                 if (pr != NULL) {
 2092                         drflags |= PD_LOCKED;
 2093                         if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
 2094                                 error = ENOENT;
 2095                                 vfs_opterror(opts, "jail \"%s\" is dying",
 2096                                     name);
 2097                                 goto done;
 2098                         }
 2099                         goto found_prison;
 2100                 }
 2101                 error = ENOENT;
 2102                 vfs_opterror(opts, "jail \"%s\" not found", name);
 2103                 goto done;
 2104         } else if (error != ENOENT)
 2105                 goto done;
 2106 
 2107         vfs_opterror(opts, "no jail specified");
 2108         error = ENOENT;
 2109         goto done;
 2110 
 2111  found_prison:
 2112         /* Get the parameters of the prison. */
 2113         prison_hold(pr);
 2114         drflags |= PD_DEREF;
 2115         td->td_retval[0] = pr->pr_id;
 2116         error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
 2117         if (error != 0 && error != ENOENT)
 2118                 goto done;
 2119         i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
 2120         error = vfs_setopt(opts, "parent", &i, sizeof(i));
 2121         if (error != 0 && error != ENOENT)
 2122                 goto done;
 2123         error = vfs_setopts(opts, "name", prison_name(mypr, pr));
 2124         if (error != 0 && error != ENOENT)
 2125                 goto done;
 2126         error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
 2127             sizeof(pr->pr_cpuset->cs_id));
 2128         if (error != 0 && error != ENOENT)
 2129                 goto done;
 2130         error = vfs_setopts(opts, "path", prison_path(mypr, pr));
 2131         if (error != 0 && error != ENOENT)
 2132                 goto done;
 2133 #ifdef INET
 2134         error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
 2135             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 2136         if (error != 0 && error != ENOENT)
 2137                 goto done;
 2138 #endif
 2139 #ifdef INET6
 2140         error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
 2141             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 2142         if (error != 0 && error != ENOENT)
 2143                 goto done;
 2144 #endif
 2145         error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
 2146             sizeof(pr->pr_securelevel));
 2147         if (error != 0 && error != ENOENT)
 2148                 goto done;
 2149         error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
 2150             sizeof(pr->pr_childcount));
 2151         if (error != 0 && error != ENOENT)
 2152                 goto done;
 2153         error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
 2154             sizeof(pr->pr_childmax));
 2155         if (error != 0 && error != ENOENT)
 2156                 goto done;
 2157         error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
 2158         if (error != 0 && error != ENOENT)
 2159                 goto done;
 2160         error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
 2161         if (error != 0 && error != ENOENT)
 2162                 goto done;
 2163         error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
 2164         if (error != 0 && error != ENOENT)
 2165                 goto done;
 2166 #ifdef COMPAT_FREEBSD32
 2167         if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 2168                 uint32_t hid32 = pr->pr_hostid;
 2169 
 2170                 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
 2171         } else
 2172 #endif
 2173         error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
 2174             sizeof(pr->pr_hostid));
 2175         if (error != 0 && error != ENOENT)
 2176                 goto done;
 2177         error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
 2178             sizeof(pr->pr_enforce_statfs));
 2179         if (error != 0 && error != ENOENT)
 2180                 goto done;
 2181         error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum,
 2182             sizeof(pr->pr_devfs_rsnum));
 2183         if (error != 0 && error != ENOENT)
 2184                 goto done;
 2185         for (bf = pr_flag_bool;
 2186              bf < pr_flag_bool + nitems(pr_flag_bool);
 2187              bf++) {
 2188                 i = (pr->pr_flags & bf->flag) ? 1 : 0;
 2189                 error = vfs_setopt(opts, bf->name, &i, sizeof(i));
 2190                 if (error != 0 && error != ENOENT)
 2191                         goto done;
 2192                 i = !i;
 2193                 error = vfs_setopt(opts, bf->noname, &i, sizeof(i));
 2194                 if (error != 0 && error != ENOENT)
 2195                         goto done;
 2196         }
 2197         for (jsf = pr_flag_jailsys;
 2198              jsf < pr_flag_jailsys + nitems(pr_flag_jailsys);
 2199              jsf++) {
 2200                 f = pr->pr_flags & (jsf->disable | jsf->new);
 2201                 i = (f != 0 && f == jsf->disable) ? JAIL_SYS_DISABLE
 2202                     : (f == jsf->new) ? JAIL_SYS_NEW
 2203                     : JAIL_SYS_INHERIT;
 2204                 error = vfs_setopt(opts, jsf->name, &i, sizeof(i));
 2205                 if (error != 0 && error != ENOENT)
 2206                         goto done;
 2207         }
 2208         for (bf = pr_flag_allow;
 2209              bf < pr_flag_allow + nitems(pr_flag_allow) &&
 2210                 atomic_load_int(&bf->flag) != 0;
 2211              bf++) {
 2212                 i = (pr->pr_allow & bf->flag) ? 1 : 0;
 2213                 error = vfs_setopt(opts, bf->name, &i, sizeof(i));
 2214                 if (error != 0 && error != ENOENT)
 2215                         goto done;
 2216                 i = !i;
 2217                 error = vfs_setopt(opts, bf->noname, &i, sizeof(i));
 2218                 if (error != 0 && error != ENOENT)
 2219                         goto done;
 2220         }
 2221         i = !prison_isalive(pr);
 2222         error = vfs_setopt(opts, "dying", &i, sizeof(i));
 2223         if (error != 0 && error != ENOENT)
 2224                 goto done;
 2225         i = !i;
 2226         error = vfs_setopt(opts, "nodying", &i, sizeof(i));
 2227         if (error != 0 && error != ENOENT)
 2228                 goto done;
 2229         error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate,
 2230             sizeof(pr->pr_osreldate));
 2231         if (error != 0 && error != ENOENT)
 2232                 goto done;
 2233         error = vfs_setopts(opts, "osrelease", pr->pr_osrelease);
 2234         if (error != 0 && error != ENOENT)
 2235                 goto done;
 2236 
 2237         /* Get the module parameters. */
 2238         mtx_unlock(&pr->pr_mtx);
 2239         drflags &= ~PD_LOCKED;
 2240         error = osd_jail_call(pr, PR_METHOD_GET, opts);
 2241         if (error)
 2242                 goto done;
 2243         prison_deref(pr, drflags);
 2244         pr = NULL;
 2245         drflags = 0;
 2246 
 2247         /* By now, all parameters should have been noted. */
 2248         TAILQ_FOREACH(opt, opts, link) {
 2249                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
 2250                         error = EINVAL;
 2251                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
 2252                         goto done;
 2253                 }
 2254         }
 2255 
 2256         /* Write the fetched parameters back to userspace. */
 2257         error = 0;
 2258         TAILQ_FOREACH(opt, opts, link) {
 2259                 if (opt->pos >= 0 && opt->pos != errmsg_pos) {
 2260                         pos = 2 * opt->pos + 1;
 2261                         optuio->uio_iov[pos].iov_len = opt->len;
 2262                         if (opt->value != NULL) {
 2263                                 if (optuio->uio_segflg == UIO_SYSSPACE) {
 2264                                         bcopy(opt->value,
 2265                                             optuio->uio_iov[pos].iov_base,
 2266                                             opt->len);
 2267                                 } else {
 2268                                         error = copyout(opt->value,
 2269                                             optuio->uio_iov[pos].iov_base,
 2270                                             opt->len);
 2271                                         if (error)
 2272                                                 break;
 2273                                 }
 2274                         }
 2275                 }
 2276         }
 2277 
 2278  done:
 2279         /* Release any temporary prison holds and/or locks. */
 2280         if (pr != NULL)
 2281                 prison_deref(pr, drflags);
 2282         else if (drflags & PD_LIST_SLOCKED)
 2283                 sx_sunlock(&allprison_lock);
 2284         if (error && errmsg_pos >= 0) {
 2285                 /* Write the error message back to userspace. */
 2286                 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 2287                 errmsg_pos = 2 * errmsg_pos + 1;
 2288                 if (errmsg_len > 0) {
 2289                         if (optuio->uio_segflg == UIO_SYSSPACE)
 2290                                 bcopy(errmsg,
 2291                                     optuio->uio_iov[errmsg_pos].iov_base,
 2292                                     errmsg_len);
 2293                         else
 2294                                 copyout(errmsg,
 2295                                     optuio->uio_iov[errmsg_pos].iov_base,
 2296                                     errmsg_len);
 2297                 }
 2298         }
 2299         vfs_freeopts(opts);
 2300         return (error);
 2301 }
 2302 
 2303 /*
 2304  * struct jail_remove_args {
 2305  *      int jid;
 2306  * };
 2307  */
 2308 int
 2309 sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 2310 {
 2311         struct prison *pr;
 2312         int error;
 2313 
 2314         error = priv_check(td, PRIV_JAIL_REMOVE);
 2315         if (error)
 2316                 return (error);
 2317 
 2318         sx_xlock(&allprison_lock);
 2319         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2320         if (pr == NULL) {
 2321                 sx_xunlock(&allprison_lock);
 2322                 return (EINVAL);
 2323         }
 2324         if (!prison_isalive(pr)) {
 2325                 /* Silently ignore already-dying prisons. */
 2326                 mtx_unlock(&pr->pr_mtx);
 2327                 sx_xunlock(&allprison_lock);
 2328                 return (0);
 2329         }
 2330         prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
 2331         return (0);
 2332 }
 2333 
 2334 /*
 2335  * struct jail_attach_args {
 2336  *      int jid;
 2337  * };
 2338  */
 2339 int
 2340 sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
 2341 {
 2342         struct prison *pr;
 2343         int error;
 2344 
 2345         error = priv_check(td, PRIV_JAIL_ATTACH);
 2346         if (error)
 2347                 return (error);
 2348 
 2349         sx_slock(&allprison_lock);
 2350         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2351         if (pr == NULL) {
 2352                 sx_sunlock(&allprison_lock);
 2353                 return (EINVAL);
 2354         }
 2355 
 2356         /* Do not allow a process to attach to a prison that is not alive. */
 2357         if (!prison_isalive(pr)) {
 2358                 mtx_unlock(&pr->pr_mtx);
 2359                 sx_sunlock(&allprison_lock);
 2360                 return (EINVAL);
 2361         }
 2362 
 2363         return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
 2364 }
 2365 
 2366 static int
 2367 do_jail_attach(struct thread *td, struct prison *pr, int drflags)
 2368 {
 2369         struct proc *p;
 2370         struct ucred *newcred, *oldcred;
 2371         int error;
 2372 
 2373         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2374         sx_assert(&allprison_lock, SX_LOCKED);
 2375         drflags &= PD_LOCK_FLAGS;
 2376         /*
 2377          * XXX: Note that there is a slight race here if two threads
 2378          * in the same privileged process attempt to attach to two
 2379          * different jails at the same time.  It is important for
 2380          * user processes not to do this, or they might end up with
 2381          * a process root from one prison, but attached to the jail
 2382          * of another.
 2383          */
 2384         prison_hold(pr);
 2385         refcount_acquire(&pr->pr_uref);
 2386         drflags |= PD_DEREF | PD_DEUREF;
 2387         mtx_unlock(&pr->pr_mtx);
 2388         drflags &= ~PD_LOCKED;
 2389 
 2390         /* Let modules do whatever they need to prepare for attaching. */
 2391         error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
 2392         if (error) {
 2393                 prison_deref(pr, drflags);
 2394                 return (error);
 2395         }
 2396         sx_unlock(&allprison_lock);
 2397         drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED);
 2398 
 2399         /*
 2400          * Reparent the newly attached process to this jail.
 2401          */
 2402         p = td->td_proc;
 2403         error = cpuset_setproc_update_set(p, pr->pr_cpuset);
 2404         if (error)
 2405                 goto e_revert_osd;
 2406 
 2407         vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
 2408         if ((error = change_dir(pr->pr_root, td)) != 0)
 2409                 goto e_unlock;
 2410 #ifdef MAC
 2411         if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
 2412                 goto e_unlock;
 2413 #endif
 2414         VOP_UNLOCK(pr->pr_root);
 2415         if ((error = pwd_chroot_chdir(td, pr->pr_root)))
 2416                 goto e_revert_osd;
 2417 
 2418         newcred = crget();
 2419         PROC_LOCK(p);
 2420         oldcred = crcopysafe(p, newcred);
 2421         newcred->cr_prison = pr;
 2422         proc_set_cred(p, newcred);
 2423         setsugid(p);
 2424 #ifdef RACCT
 2425         racct_proc_ucred_changed(p, oldcred, newcred);
 2426         crhold(newcred);
 2427 #endif
 2428         PROC_UNLOCK(p);
 2429 #ifdef RCTL
 2430         rctl_proc_ucred_changed(p, newcred);
 2431         crfree(newcred);
 2432 #endif
 2433         prison_deref(oldcred->cr_prison, drflags);
 2434         crfree(oldcred);
 2435 
 2436         /*
 2437          * If the prison was killed while changing credentials, die along
 2438          * with it.
 2439          */
 2440         if (!prison_isalive(pr)) {
 2441                 PROC_LOCK(p);
 2442                 kern_psignal(p, SIGKILL);
 2443                 PROC_UNLOCK(p);
 2444         }
 2445 
 2446         return (0);
 2447 
 2448  e_unlock:
 2449         VOP_UNLOCK(pr->pr_root);
 2450  e_revert_osd:
 2451         /* Tell modules this thread is still in its old jail after all. */
 2452         sx_slock(&allprison_lock);
 2453         drflags |= PD_LIST_SLOCKED;
 2454         (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
 2455         prison_deref(pr, drflags);
 2456         return (error);
 2457 }
 2458 
 2459 /*
 2460  * Returns a locked prison instance, or NULL on failure.
 2461  */
 2462 struct prison *
 2463 prison_find(int prid)
 2464 {
 2465         struct prison *pr;
 2466 
 2467         sx_assert(&allprison_lock, SX_LOCKED);
 2468         TAILQ_FOREACH(pr, &allprison, pr_list) {
 2469                 if (pr->pr_id < prid)
 2470                         continue;
 2471                 if (pr->pr_id > prid)
 2472                         break;
 2473                 KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr));
 2474                 mtx_lock(&pr->pr_mtx);
 2475                 return (pr);
 2476         }
 2477         return (NULL);
 2478 }
 2479 
 2480 /*
 2481  * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
 2482  */
 2483 struct prison *
 2484 prison_find_child(struct prison *mypr, int prid)
 2485 {
 2486         struct prison *pr;
 2487         int descend;
 2488 
 2489         sx_assert(&allprison_lock, SX_LOCKED);
 2490         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2491                 if (pr->pr_id == prid) {
 2492                         KASSERT(prison_isvalid(pr),
 2493                             ("Found invalid prison %p", pr));
 2494                         mtx_lock(&pr->pr_mtx);
 2495                         return (pr);
 2496                 }
 2497         }
 2498         return (NULL);
 2499 }
 2500 
 2501 /*
 2502  * Look for the name relative to mypr.  Returns a locked prison or NULL.
 2503  */
 2504 struct prison *
 2505 prison_find_name(struct prison *mypr, const char *name)
 2506 {
 2507         struct prison *pr, *deadpr;
 2508         size_t mylen;
 2509         int descend;
 2510 
 2511         sx_assert(&allprison_lock, SX_LOCKED);
 2512         mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
 2513         deadpr = NULL;
 2514         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2515                 if (!strcmp(pr->pr_name + mylen, name)) {
 2516                         KASSERT(prison_isvalid(pr),
 2517                             ("Found invalid prison %p", pr));
 2518                         if (prison_isalive(pr)) {
 2519                                 mtx_lock(&pr->pr_mtx);
 2520                                 return (pr);
 2521                         }
 2522                         deadpr = pr;
 2523                 }
 2524         }
 2525         /* There was no valid prison - perhaps there was a dying one. */
 2526         if (deadpr != NULL)
 2527                 mtx_lock(&deadpr->pr_mtx);
 2528         return (deadpr);
 2529 }
 2530 
 2531 /*
 2532  * See if a prison has the specific flag set.  The prison should be locked,
 2533  * unless checking for flags that are only set at jail creation (such as
 2534  * PR_IP4 and PR_IP6), or only the single bit is examined, without regard
 2535  * to any other prison data.
 2536  */
 2537 int
 2538 prison_flag(struct ucred *cred, unsigned flag)
 2539 {
 2540 
 2541         return (cred->cr_prison->pr_flags & flag);
 2542 }
 2543 
 2544 int
 2545 prison_allow(struct ucred *cred, unsigned flag)
 2546 {
 2547 
 2548         return ((cred->cr_prison->pr_allow & flag) != 0);
 2549 }
 2550 
 2551 /*
 2552  * Hold a prison reference, by incrementing pr_ref.  It is generally
 2553  * an error to hold a prison that does not already have a reference.
 2554  * A prison record will remain valid as long as it has at least one
 2555  * reference, and will not be removed as long as either the prison
 2556  * mutex or the allprison lock is held (allprison_lock may be shared).
 2557  */
 2558 void
 2559 prison_hold_locked(struct prison *pr)
 2560 {
 2561 
 2562         /* Locking is no longer required. */
 2563         prison_hold(pr);
 2564 }
 2565 
 2566 void
 2567 prison_hold(struct prison *pr)
 2568 {
 2569 #ifdef INVARIANTS
 2570         int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
 2571 
 2572         KASSERT(was_valid,
 2573             ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
 2574 #else
 2575         refcount_acquire(&pr->pr_ref);
 2576 #endif
 2577 }
 2578 
 2579 /*
 2580  * Remove a prison reference.  If that was the last reference, the
 2581  * prison will be removed (at a later time).
 2582  */
 2583 void
 2584 prison_free_locked(struct prison *pr)
 2585 {
 2586 
 2587         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2588         /*
 2589          * Locking is no longer required, but unlock because the caller
 2590          * expects it.
 2591          */
 2592         mtx_unlock(&pr->pr_mtx);
 2593         prison_free(pr);
 2594 }
 2595 
 2596 void
 2597 prison_free(struct prison *pr)
 2598 {
 2599 
 2600         KASSERT(refcount_load(&pr->pr_ref) > 0,
 2601             ("Trying to free dead prison %p (jid=%d).",
 2602              pr, pr->pr_id));
 2603         if (!refcount_release_if_not_last(&pr->pr_ref)) {
 2604                 /*
 2605                  * Don't remove the last reference in this context,
 2606                  * in case there are locks held.
 2607                  */
 2608                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 2609         }
 2610 }
 2611 
 2612 static void
 2613 prison_free_not_last(struct prison *pr)
 2614 {
 2615 #ifdef INVARIANTS
 2616         int lastref;
 2617 
 2618         KASSERT(refcount_load(&pr->pr_ref) > 0,
 2619             ("Trying to free dead prison %p (jid=%d).",
 2620              pr, pr->pr_id));
 2621         lastref = refcount_release(&pr->pr_ref);
 2622         KASSERT(!lastref,
 2623             ("prison_free_not_last freed last ref on prison %p (jid=%d).",
 2624              pr, pr->pr_id));
 2625 #else
 2626         refcount_release(&pr->pr_ref);
 2627 #endif
 2628 }
 2629 
 2630 /*
 2631  * Hold a prison for user visibility, by incrementing pr_uref.
 2632  * It is generally an error to hold a prison that isn't already
 2633  * user-visible, except through the the jail system calls.  It is also
 2634  * an error to hold an invalid prison.  A prison record will remain
 2635  * alive as long as it has at least one user reference, and will not
 2636  * be set to the dying state until the prison mutex and allprison_lock
 2637  * are both freed.
 2638  */
 2639 void
 2640 prison_proc_hold(struct prison *pr)
 2641 {
 2642 #ifdef INVARIANTS
 2643         int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
 2644 
 2645         KASSERT(was_alive,
 2646             ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
 2647 #else
 2648         refcount_acquire(&pr->pr_uref);
 2649 #endif
 2650 }
 2651 
 2652 /*
 2653  * Remove a prison user reference.  If it was the last reference, the
 2654  * prison will be considered "dying", and may be removed once all of
 2655  * its references are dropped.
 2656  */
 2657 void
 2658 prison_proc_free(struct prison *pr)
 2659 {
 2660 
 2661         /*
 2662          * Locking is only required when releasing the last reference.
 2663          * This allows assurance that a locked prison will remain alive
 2664          * until it is unlocked.
 2665          */
 2666         KASSERT(refcount_load(&pr->pr_uref) > 0,
 2667             ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
 2668         if (!refcount_release_if_not_last(&pr->pr_uref)) {
 2669                 /*
 2670                  * Don't remove the last user reference in this context,
 2671                  * which is expected to be a process that is not only locked,
 2672                  * but also half dead.  Add a reference so any calls to
 2673                  * prison_free() won't re-submit the task.
 2674                  */
 2675                 prison_hold(pr);
 2676                 mtx_lock(&pr->pr_mtx);
 2677                 KASSERT(!(pr->pr_flags & PR_COMPLETE_PROC),
 2678                     ("Redundant last reference in prison_proc_free (jid=%d)",
 2679                      pr->pr_id));
 2680                 pr->pr_flags |= PR_COMPLETE_PROC;
 2681                 mtx_unlock(&pr->pr_mtx);
 2682                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 2683         }
 2684 }
 2685 
 2686 static void
 2687 prison_proc_free_not_last(struct prison *pr)
 2688 {
 2689 #ifdef INVARIANTS
 2690         int lastref;
 2691 
 2692         KASSERT(refcount_load(&pr->pr_uref) > 0,
 2693             ("Trying to free dead prison %p (jid=%d).",
 2694              pr, pr->pr_id));
 2695         lastref = refcount_release(&pr->pr_uref);
 2696         KASSERT(!lastref,
 2697             ("prison_proc_free_not_last freed last uref on prison %p (jid=%d).",
 2698              pr, pr->pr_id));
 2699 #else
 2700         refcount_release(&pr->pr_uref);
 2701 #endif
 2702 }
 2703 
 2704 /*
 2705  * Complete a call to either prison_free or prison_proc_free.
 2706  */
 2707 static void
 2708 prison_complete(void *context, int pending)
 2709 {
 2710         struct prison *pr = context;
 2711         int drflags;
 2712 
 2713         /*
 2714          * This could be called to release the last reference, or the last
 2715          * user reference (plus the reference held in prison_proc_free).
 2716          */
 2717         drflags = prison_lock_xlock(pr, PD_DEREF);
 2718         if (pr->pr_flags & PR_COMPLETE_PROC) {
 2719                 pr->pr_flags &= ~PR_COMPLETE_PROC;
 2720                 drflags |= PD_DEUREF;
 2721         }
 2722         prison_deref(pr, drflags);
 2723 }
 2724 
 2725 /*
 2726  * Remove a prison reference and/or user reference (usually).
 2727  * This assumes context that allows sleeping (for allprison_lock),
 2728  * with no non-sleeping locks held, except perhaps the prison itself.
 2729  * If there are no more references, release and delist the prison.
 2730  * On completion, the prison lock and the allprison lock are both
 2731  * unlocked.
 2732  */
 2733 static void
 2734 prison_deref(struct prison *pr, int flags)
 2735 {
 2736         struct prisonlist freeprison;
 2737         struct prison *killpr, *rpr, *ppr, *tpr;
 2738         struct proc *p;
 2739 
 2740         killpr = NULL;
 2741         TAILQ_INIT(&freeprison);
 2742         /*
 2743          * Release this prison as requested, which may cause its parent
 2744          * to be released, and then maybe its grandparent, etc.
 2745          */
 2746         for (;;) {
 2747                 if (flags & PD_KILL) {
 2748                         /* Kill the prison and its descendents. */
 2749                         KASSERT(pr != &prison0,
 2750                             ("prison_deref trying to kill prison0"));
 2751                         if (!(flags & PD_DEREF)) {
 2752                                 prison_hold(pr);
 2753                                 flags |= PD_DEREF;
 2754                         }
 2755                         flags = prison_lock_xlock(pr, flags);
 2756                         prison_deref_kill(pr, &freeprison);
 2757                 }
 2758                 if (flags & PD_DEUREF) {
 2759                         /* Drop a user reference. */
 2760                         KASSERT(refcount_load(&pr->pr_uref) > 0,
 2761                             ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
 2762                              pr->pr_id));
 2763                         if (!refcount_release_if_not_last(&pr->pr_uref)) {
 2764                                 if (!(flags & PD_DEREF)) {
 2765                                         prison_hold(pr);
 2766                                         flags |= PD_DEREF;
 2767                                 }
 2768                                 flags = prison_lock_xlock(pr, flags);
 2769                                 if (refcount_release(&pr->pr_uref) &&
 2770                                     pr->pr_state == PRISON_STATE_ALIVE) {
 2771                                         /*
 2772                                          * When the last user references goes,
 2773                                          * this becomes a dying prison.
 2774                                          */
 2775                                         KASSERT(
 2776                                             refcount_load(&prison0.pr_uref) > 0,
 2777                                             ("prison0 pr_uref=0"));
 2778                                         pr->pr_state = PRISON_STATE_DYING;
 2779                                         mtx_unlock(&pr->pr_mtx);
 2780                                         flags &= ~PD_LOCKED;
 2781                                         prison_cleanup(pr);
 2782                                 }
 2783                         }
 2784                 }
 2785                 if (flags & PD_KILL) {
 2786                         /*
 2787                          * Any remaining user references are probably processes
 2788                          * that need to be killed, either in this prison or its
 2789                          * descendants.
 2790                          */
 2791                         if (refcount_load(&pr->pr_uref) > 0)
 2792                                 killpr = pr;
 2793                         /* Make sure the parent prison doesn't get killed. */
 2794                         flags &= ~PD_KILL;
 2795                 }
 2796                 if (flags & PD_DEREF) {
 2797                         /* Drop a reference. */
 2798                         KASSERT(refcount_load(&pr->pr_ref) > 0,
 2799                             ("prison_deref PD_DEREF on a dead prison (jid=%d)",
 2800                              pr->pr_id));
 2801                         if (!refcount_release_if_not_last(&pr->pr_ref)) {
 2802                                 flags = prison_lock_xlock(pr, flags);
 2803                                 if (refcount_release(&pr->pr_ref)) {
 2804                                         /*
 2805                                          * When the last reference goes,
 2806                                          * unlink the prison and set it aside.
 2807                                          */
 2808                                         KASSERT(
 2809                                             refcount_load(&pr->pr_uref) == 0,
 2810                                             ("prison_deref: last ref, "
 2811                                              "but still has %d urefs (jid=%d)",
 2812                                              pr->pr_uref, pr->pr_id));
 2813                                         KASSERT(
 2814                                             refcount_load(&prison0.pr_ref) != 0,
 2815                                             ("prison0 pr_ref=0"));
 2816                                         pr->pr_state = PRISON_STATE_INVALID;
 2817                                         TAILQ_REMOVE(&allprison, pr, pr_list);
 2818                                         LIST_REMOVE(pr, pr_sibling);
 2819                                         TAILQ_INSERT_TAIL(&freeprison, pr,
 2820                                             pr_list);
 2821                                         for (ppr = pr->pr_parent;
 2822                                              ppr != NULL;
 2823                                              ppr = ppr->pr_parent)
 2824                                                 ppr->pr_childcount--;
 2825                                         /*
 2826                                          * Removing a prison frees references
 2827                                          * from its parent.
 2828                                          */
 2829                                         mtx_unlock(&pr->pr_mtx);
 2830                                         flags &= ~PD_LOCKED;
 2831                                         pr = pr->pr_parent;
 2832                                         flags |= PD_DEREF | PD_DEUREF;
 2833                                         continue;
 2834                                 }
 2835                         }
 2836                 }
 2837                 break;
 2838         }
 2839 
 2840         /* Release all the prison locks. */
 2841         if (flags & PD_LOCKED)
 2842                 mtx_unlock(&pr->pr_mtx);
 2843         if (flags & PD_LIST_SLOCKED)
 2844                 sx_sunlock(&allprison_lock);
 2845         else if (flags & PD_LIST_XLOCKED)
 2846                 sx_xunlock(&allprison_lock);
 2847 
 2848         /* Kill any processes attached to a killed prison. */
 2849         if (killpr != NULL) {
 2850                 sx_slock(&allproc_lock);
 2851                 FOREACH_PROC_IN_SYSTEM(p) {
 2852                         PROC_LOCK(p);
 2853                         if (p->p_state != PRS_NEW && p->p_ucred != NULL) {
 2854                                 for (ppr = p->p_ucred->cr_prison;
 2855                                      ppr != &prison0;
 2856                                      ppr = ppr->pr_parent)
 2857                                         if (ppr == killpr) {
 2858                                                 kern_psignal(p, SIGKILL);
 2859                                                 break;
 2860                                         }
 2861                         }
 2862                         PROC_UNLOCK(p);
 2863                 }
 2864                 sx_sunlock(&allproc_lock);
 2865         }
 2866 
 2867         /*
 2868          * Finish removing any unreferenced prisons, which couldn't happen
 2869          * while allprison_lock was held (to avoid a LOR on vrele).
 2870          */
 2871         TAILQ_FOREACH_SAFE(rpr, &freeprison, pr_list, tpr) {
 2872 #ifdef VIMAGE
 2873                 if (rpr->pr_vnet != rpr->pr_parent->pr_vnet)
 2874                         vnet_destroy(rpr->pr_vnet);
 2875 #endif
 2876                 if (rpr->pr_root != NULL)
 2877                         vrele(rpr->pr_root);
 2878                 mtx_destroy(&rpr->pr_mtx);
 2879 #ifdef INET
 2880                 free(rpr->pr_ip4, M_PRISON);
 2881 #endif
 2882 #ifdef INET6
 2883                 free(rpr->pr_ip6, M_PRISON);
 2884 #endif
 2885                 if (rpr->pr_cpuset != NULL)
 2886                         cpuset_rel(rpr->pr_cpuset);
 2887                 osd_jail_exit(rpr);
 2888 #ifdef RACCT
 2889                 if (racct_enable)
 2890                         prison_racct_detach(rpr);
 2891 #endif
 2892                 TAILQ_REMOVE(&freeprison, rpr, pr_list);
 2893                 free(rpr, M_PRISON);
 2894         }
 2895 }
 2896 
 2897 /*
 2898  * Kill the prison and its descendants.  Mark them as dying, clear the
 2899  * persist flag, and call module remove methods.
 2900  */
 2901 static void
 2902 prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
 2903 {
 2904         struct prison *cpr, *ppr, *rpr;
 2905         bool descend;
 2906 
 2907         /*
 2908          * Unlike the descendants, the target prison can be killed
 2909          * even if it is currently dying.  This is useful for failed
 2910          * creation in jail_set(2).
 2911          */
 2912         KASSERT(refcount_load(&pr->pr_ref) > 0,
 2913             ("Trying to kill dead prison %p (jid=%d).",
 2914              pr, pr->pr_id));
 2915         refcount_acquire(&pr->pr_uref);
 2916         pr->pr_state = PRISON_STATE_DYING;
 2917         mtx_unlock(&pr->pr_mtx);
 2918 
 2919         rpr = NULL;
 2920         FOREACH_PRISON_DESCENDANT_PRE_POST(pr, cpr, descend) {
 2921                 if (descend) {
 2922                         if (!prison_isalive(cpr)) {
 2923                                 descend = false;
 2924                                 continue;
 2925                         }
 2926                         prison_hold(cpr);
 2927                         prison_proc_hold(cpr);
 2928                         mtx_lock(&cpr->pr_mtx);
 2929                         cpr->pr_state = PRISON_STATE_DYING;
 2930                         cpr->pr_flags |= PR_REMOVE;
 2931                         mtx_unlock(&cpr->pr_mtx);
 2932                         continue;
 2933                 }
 2934                 if (!(cpr->pr_flags & PR_REMOVE))
 2935                         continue;
 2936                 prison_cleanup(cpr);
 2937                 mtx_lock(&cpr->pr_mtx);
 2938                 cpr->pr_flags &= ~PR_REMOVE;
 2939                 if (cpr->pr_flags & PR_PERSIST) {
 2940                         cpr->pr_flags &= ~PR_PERSIST;
 2941                         prison_proc_free_not_last(cpr);
 2942                         prison_free_not_last(cpr);
 2943                 }
 2944                 (void)refcount_release(&cpr->pr_uref);
 2945                 if (refcount_release(&cpr->pr_ref)) {
 2946                         /*
 2947                          * When the last reference goes, unlink the prison
 2948                          * and set it aside for prison_deref() to handle.
 2949                          * Delay unlinking the sibling list to keep the loop
 2950                          * safe.
 2951                          */
 2952                         if (rpr != NULL)
 2953                                 LIST_REMOVE(rpr, pr_sibling);
 2954                         rpr = cpr;
 2955                         rpr->pr_state = PRISON_STATE_INVALID;
 2956                         TAILQ_REMOVE(&allprison, rpr, pr_list);
 2957                         TAILQ_INSERT_TAIL(freeprison, rpr, pr_list);
 2958                         /*
 2959                          * Removing a prison frees references from its parent.
 2960                          */
 2961                         ppr = rpr->pr_parent;
 2962                         prison_proc_free_not_last(ppr);
 2963                         prison_free_not_last(ppr);
 2964                         for (; ppr != NULL; ppr = ppr->pr_parent)
 2965                                 ppr->pr_childcount--;
 2966                 }
 2967                 mtx_unlock(&cpr->pr_mtx);
 2968         }
 2969         if (rpr != NULL)
 2970                 LIST_REMOVE(rpr, pr_sibling);
 2971 
 2972         prison_cleanup(pr);
 2973         mtx_lock(&pr->pr_mtx);
 2974         if (pr->pr_flags & PR_PERSIST) {
 2975                 pr->pr_flags &= ~PR_PERSIST;
 2976                 prison_proc_free_not_last(pr);
 2977                 prison_free_not_last(pr);
 2978         }
 2979         (void)refcount_release(&pr->pr_uref);
 2980 }
 2981 
 2982 /*
 2983  * Given the current locking state in the flags, make sure allprison_lock
 2984  * is held exclusive, and the prison is locked.  Return flags indicating
 2985  * the new state.
 2986  */
 2987 static int
 2988 prison_lock_xlock(struct prison *pr, int flags)
 2989 {
 2990 
 2991         if (!(flags & PD_LIST_XLOCKED)) {
 2992                 /*
 2993                  * Get allprison_lock, which may be an upgrade,
 2994                  * and may require unlocking the prison.
 2995                  */
 2996                 if (flags & PD_LOCKED) {
 2997                         mtx_unlock(&pr->pr_mtx);
 2998                         flags &= ~PD_LOCKED;
 2999                 }
 3000                 if (flags & PD_LIST_SLOCKED) {
 3001                         if (!sx_try_upgrade(&allprison_lock)) {
 3002                                 sx_sunlock(&allprison_lock);
 3003                                 sx_xlock(&allprison_lock);
 3004                         }
 3005                         flags &= ~PD_LIST_SLOCKED;
 3006                 } else
 3007                         sx_xlock(&allprison_lock);
 3008                 flags |= PD_LIST_XLOCKED;
 3009         }
 3010         if (!(flags & PD_LOCKED)) {
 3011                 /* Lock the prison mutex. */
 3012                 mtx_lock(&pr->pr_mtx);
 3013                 flags |= PD_LOCKED;
 3014         }
 3015         return flags;
 3016 }
 3017 
 3018 /*
 3019  * Release a prison's resources when it starts dying (when the last user
 3020  * reference is dropped, or when it is killed).
 3021  */
 3022 static void
 3023 prison_cleanup(struct prison *pr)
 3024 {
 3025         sx_assert(&allprison_lock, SA_XLOCKED);
 3026         mtx_assert(&pr->pr_mtx, MA_NOTOWNED);
 3027         shm_remove_prison(pr);
 3028         (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
 3029 }
 3030 
 3031 /*
 3032  * Set or clear a permission bit in the pr_allow field, passing restrictions
 3033  * (cleared permission) down to child jails.
 3034  */
 3035 void
 3036 prison_set_allow(struct ucred *cred, unsigned flag, int enable)
 3037 {
 3038         struct prison *pr;
 3039 
 3040         pr = cred->cr_prison;
 3041         sx_slock(&allprison_lock);
 3042         mtx_lock(&pr->pr_mtx);
 3043         prison_set_allow_locked(pr, flag, enable);
 3044         mtx_unlock(&pr->pr_mtx);
 3045         sx_sunlock(&allprison_lock);
 3046 }
 3047 
 3048 static void
 3049 prison_set_allow_locked(struct prison *pr, unsigned flag, int enable)
 3050 {
 3051         struct prison *cpr;
 3052         int descend;
 3053 
 3054         if (enable != 0)
 3055                 pr->pr_allow |= flag;
 3056         else {
 3057                 pr->pr_allow &= ~flag;
 3058                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend)
 3059                         cpr->pr_allow &= ~flag;
 3060         }
 3061 }
 3062 
 3063 /*
 3064  * Check if a jail supports the given address family.
 3065  *
 3066  * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
 3067  * if not.
 3068  */
 3069 int
 3070 prison_check_af(struct ucred *cred, int af)
 3071 {
 3072         struct prison *pr;
 3073         int error;
 3074 
 3075         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3076 
 3077         pr = cred->cr_prison;
 3078 #ifdef VIMAGE
 3079         /* Prisons with their own network stack are not limited. */
 3080         if (prison_owns_vnet(cred))
 3081                 return (0);
 3082 #endif
 3083 
 3084         error = 0;
 3085         switch (af)
 3086         {
 3087 #ifdef INET
 3088         case AF_INET:
 3089                 if (pr->pr_flags & PR_IP4)
 3090                 {
 3091                         mtx_lock(&pr->pr_mtx);
 3092                         if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
 3093                                 error = EAFNOSUPPORT;
 3094                         mtx_unlock(&pr->pr_mtx);
 3095                 }
 3096                 break;
 3097 #endif
 3098 #ifdef INET6
 3099         case AF_INET6:
 3100                 if (pr->pr_flags & PR_IP6)
 3101                 {
 3102                         mtx_lock(&pr->pr_mtx);
 3103                         if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
 3104                                 error = EAFNOSUPPORT;
 3105                         mtx_unlock(&pr->pr_mtx);
 3106                 }
 3107                 break;
 3108 #endif
 3109         case AF_LOCAL:
 3110         case AF_ROUTE:
 3111                 break;
 3112         default:
 3113                 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
 3114                         error = EAFNOSUPPORT;
 3115         }
 3116         return (error);
 3117 }
 3118 
 3119 /*
 3120  * Check if given address belongs to the jail referenced by cred (wrapper to
 3121  * prison_check_ip[46]).
 3122  *
 3123  * Returns 0 if jail doesn't restrict the address family or if address belongs
 3124  * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
 3125  * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
 3126  */
 3127 int
 3128 prison_if(struct ucred *cred, const struct sockaddr *sa)
 3129 {
 3130 #ifdef INET
 3131         const struct sockaddr_in *sai;
 3132 #endif
 3133 #ifdef INET6
 3134         const struct sockaddr_in6 *sai6;
 3135 #endif
 3136         int error;
 3137 
 3138         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3139         KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 3140 
 3141 #ifdef VIMAGE
 3142         if (prison_owns_vnet(cred))
 3143                 return (0);
 3144 #endif
 3145 
 3146         error = 0;
 3147         switch (sa->sa_family)
 3148         {
 3149 #ifdef INET
 3150         case AF_INET:
 3151                 sai = (const struct sockaddr_in *)sa;
 3152                 error = prison_check_ip4(cred, &sai->sin_addr);
 3153                 break;
 3154 #endif
 3155 #ifdef INET6
 3156         case AF_INET6:
 3157                 sai6 = (const struct sockaddr_in6 *)sa;
 3158                 error = prison_check_ip6(cred, &sai6->sin6_addr);
 3159                 break;
 3160 #endif
 3161         default:
 3162                 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
 3163                         error = EAFNOSUPPORT;
 3164         }
 3165         return (error);
 3166 }
 3167 
 3168 /*
 3169  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
 3170  */
 3171 int
 3172 prison_check(struct ucred *cred1, struct ucred *cred2)
 3173 {
 3174 
 3175         return ((cred1->cr_prison == cred2->cr_prison ||
 3176             prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
 3177 }
 3178 
 3179 /*
 3180  * Return 1 if p2 is a child of p1, otherwise 0.
 3181  */
 3182 int
 3183 prison_ischild(struct prison *pr1, struct prison *pr2)
 3184 {
 3185 
 3186         for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
 3187                 if (pr1 == pr2)
 3188                         return (1);
 3189         return (0);
 3190 }
 3191 
 3192 /*
 3193  * Return true if the prison is currently alive.  A prison is alive if it
 3194  * holds user references and it isn't being removed.
 3195  */
 3196 bool
 3197 prison_isalive(struct prison *pr)
 3198 {
 3199 
 3200         if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
 3201                 return (false);
 3202         return (true);
 3203 }
 3204 
 3205 /*
 3206  * Return true if the prison is currently valid.  A prison is valid if it has
 3207  * been fully created, and is not being destroyed.  Note that dying prisons
 3208  * are still considered valid.  Invalid prisons won't be found under normal
 3209  * circumstances, as they're only put in that state by functions that have
 3210  * an exclusive hold on allprison_lock.
 3211  */
 3212 bool
 3213 prison_isvalid(struct prison *pr)
 3214 {
 3215 
 3216         if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
 3217                 return (false);
 3218         if (__predict_false(refcount_load(&pr->pr_ref) == 0))
 3219                 return (false);
 3220         return (true);
 3221 }
 3222 
 3223 /*
 3224  * Return 1 if the passed credential is in a jail and that jail does not
 3225  * have its own virtual network stack, otherwise 0.
 3226  */
 3227 int
 3228 jailed_without_vnet(struct ucred *cred)
 3229 {
 3230 
 3231         if (!jailed(cred))
 3232                 return (0);
 3233 #ifdef VIMAGE
 3234         if (prison_owns_vnet(cred))
 3235                 return (0);
 3236 #endif
 3237 
 3238         return (1);
 3239 }
 3240 
 3241 /*
 3242  * Return the correct hostname (domainname, et al) for the passed credential.
 3243  */
 3244 void
 3245 getcredhostname(struct ucred *cred, char *buf, size_t size)
 3246 {
 3247         struct prison *pr;
 3248 
 3249         /*
 3250          * A NULL credential can be used to shortcut to the physical
 3251          * system's hostname.
 3252          */
 3253         pr = (cred != NULL) ? cred->cr_prison : &prison0;
 3254         mtx_lock(&pr->pr_mtx);
 3255         strlcpy(buf, pr->pr_hostname, size);
 3256         mtx_unlock(&pr->pr_mtx);
 3257 }
 3258 
 3259 void
 3260 getcreddomainname(struct ucred *cred, char *buf, size_t size)
 3261 {
 3262 
 3263         mtx_lock(&cred->cr_prison->pr_mtx);
 3264         strlcpy(buf, cred->cr_prison->pr_domainname, size);
 3265         mtx_unlock(&cred->cr_prison->pr_mtx);
 3266 }
 3267 
 3268 void
 3269 getcredhostuuid(struct ucred *cred, char *buf, size_t size)
 3270 {
 3271 
 3272         mtx_lock(&cred->cr_prison->pr_mtx);
 3273         strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
 3274         mtx_unlock(&cred->cr_prison->pr_mtx);
 3275 }
 3276 
 3277 void
 3278 getcredhostid(struct ucred *cred, unsigned long *hostid)
 3279 {
 3280 
 3281         mtx_lock(&cred->cr_prison->pr_mtx);
 3282         *hostid = cred->cr_prison->pr_hostid;
 3283         mtx_unlock(&cred->cr_prison->pr_mtx);
 3284 }
 3285 
 3286 void
 3287 getjailname(struct ucred *cred, char *name, size_t len)
 3288 {
 3289 
 3290         mtx_lock(&cred->cr_prison->pr_mtx);
 3291         strlcpy(name, cred->cr_prison->pr_name, len);
 3292         mtx_unlock(&cred->cr_prison->pr_mtx);
 3293 }
 3294 
 3295 #ifdef VIMAGE
 3296 /*
 3297  * Determine whether the prison represented by cred owns
 3298  * its vnet rather than having it inherited.
 3299  *
 3300  * Returns 1 in case the prison owns the vnet, 0 otherwise.
 3301  */
 3302 int
 3303 prison_owns_vnet(struct ucred *cred)
 3304 {
 3305 
 3306         /*
 3307          * vnets cannot be added/removed after jail creation,
 3308          * so no need to lock here.
 3309          */
 3310         return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
 3311 }
 3312 #endif
 3313 
 3314 /*
 3315  * Determine whether the subject represented by cred can "see"
 3316  * status of a mount point.
 3317  * Returns: 0 for permitted, ENOENT otherwise.
 3318  * XXX: This function should be called cr_canseemount() and should be
 3319  *      placed in kern_prot.c.
 3320  */
 3321 int
 3322 prison_canseemount(struct ucred *cred, struct mount *mp)
 3323 {
 3324         struct prison *pr;
 3325         struct statfs *sp;
 3326         size_t len;
 3327 
 3328         pr = cred->cr_prison;
 3329         if (pr->pr_enforce_statfs == 0)
 3330                 return (0);
 3331         if (pr->pr_root->v_mount == mp)
 3332                 return (0);
 3333         if (pr->pr_enforce_statfs == 2)
 3334                 return (ENOENT);
 3335         /*
 3336          * If jail's chroot directory is set to "/" we should be able to see
 3337          * all mount-points from inside a jail.
 3338          * This is ugly check, but this is the only situation when jail's
 3339          * directory ends with '/'.
 3340          */
 3341         if (strcmp(pr->pr_path, "/") == 0)
 3342                 return (0);
 3343         len = strlen(pr->pr_path);
 3344         sp = &mp->mnt_stat;
 3345         if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
 3346                 return (ENOENT);
 3347         /*
 3348          * Be sure that we don't have situation where jail's root directory
 3349          * is "/some/path" and mount point is "/some/pathpath".
 3350          */
 3351         if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
 3352                 return (ENOENT);
 3353         return (0);
 3354 }
 3355 
 3356 void
 3357 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
 3358 {
 3359         char jpath[MAXPATHLEN];
 3360         struct prison *pr;
 3361         size_t len;
 3362 
 3363         pr = cred->cr_prison;
 3364         if (pr->pr_enforce_statfs == 0)
 3365                 return;
 3366         if (prison_canseemount(cred, mp) != 0) {
 3367                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3368                 strlcpy(sp->f_mntonname, "[restricted]",
 3369                     sizeof(sp->f_mntonname));
 3370                 return;
 3371         }
 3372         if (pr->pr_root->v_mount == mp) {
 3373                 /*
 3374                  * Clear current buffer data, so we are sure nothing from
 3375                  * the valid path left there.
 3376                  */
 3377                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3378                 *sp->f_mntonname = '/';
 3379                 return;
 3380         }
 3381         /*
 3382          * If jail's chroot directory is set to "/" we should be able to see
 3383          * all mount-points from inside a jail.
 3384          */
 3385         if (strcmp(pr->pr_path, "/") == 0)
 3386                 return;
 3387         len = strlen(pr->pr_path);
 3388         strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
 3389         /*
 3390          * Clear current buffer data, so we are sure nothing from
 3391          * the valid path left there.
 3392          */
 3393         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3394         if (*jpath == '\0') {
 3395                 /* Should never happen. */
 3396                 *sp->f_mntonname = '/';
 3397         } else {
 3398                 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
 3399         }
 3400 }
 3401 
 3402 /*
 3403  * Check with permission for a specific privilege is granted within jail.  We
 3404  * have a specific list of accepted privileges; the rest are denied.
 3405  */
 3406 int
 3407 prison_priv_check(struct ucred *cred, int priv)
 3408 {
 3409         struct prison *pr;
 3410         int error;
 3411 
 3412         /*
 3413          * Some policies have custom handlers. This routine should not be
 3414          * called for them. See priv_check_cred().
 3415          */
 3416         switch (priv) {
 3417         case PRIV_VFS_LOOKUP:
 3418         case PRIV_VFS_GENERATION:
 3419                 KASSERT(0, ("prison_priv_check instead of a custom handler "
 3420                     "called for %d\n", priv));
 3421         }
 3422 
 3423         if (!jailed(cred))
 3424                 return (0);
 3425 
 3426 #ifdef VIMAGE
 3427         /*
 3428          * Privileges specific to prisons with a virtual network stack.
 3429          * There might be a duplicate entry here in case the privilege
 3430          * is only granted conditionally in the legacy jail case.
 3431          */
 3432         switch (priv) {
 3433 #ifdef notyet
 3434                 /*
 3435                  * NFS-specific privileges.
 3436                  */
 3437         case PRIV_NFS_DAEMON:
 3438         case PRIV_NFS_LOCKD:
 3439 #endif
 3440                 /*
 3441                  * Network stack privileges.
 3442                  */
 3443         case PRIV_NET_BRIDGE:
 3444         case PRIV_NET_GRE:
 3445         case PRIV_NET_BPF:
 3446         case PRIV_NET_RAW:              /* Dup, cond. in legacy jail case. */
 3447         case PRIV_NET_ROUTE:
 3448         case PRIV_NET_TAP:
 3449         case PRIV_NET_SETIFMTU:
 3450         case PRIV_NET_SETIFFLAGS:
 3451         case PRIV_NET_SETIFCAP:
 3452         case PRIV_NET_SETIFDESCR:
 3453         case PRIV_NET_SETIFNAME :
 3454         case PRIV_NET_SETIFMETRIC:
 3455         case PRIV_NET_SETIFPHYS:
 3456         case PRIV_NET_SETIFMAC:
 3457         case PRIV_NET_SETLANPCP:
 3458         case PRIV_NET_ADDMULTI:
 3459         case PRIV_NET_DELMULTI:
 3460         case PRIV_NET_HWIOCTL:
 3461         case PRIV_NET_SETLLADDR:
 3462         case PRIV_NET_ADDIFGROUP:
 3463         case PRIV_NET_DELIFGROUP:
 3464         case PRIV_NET_IFCREATE:
 3465         case PRIV_NET_IFDESTROY:
 3466         case PRIV_NET_ADDIFADDR:
 3467         case PRIV_NET_DELIFADDR:
 3468         case PRIV_NET_LAGG:
 3469         case PRIV_NET_GIF:
 3470         case PRIV_NET_SETIFVNET:
 3471         case PRIV_NET_SETIFFIB:
 3472         case PRIV_NET_ME:
 3473         case PRIV_NET_WG:
 3474 
 3475                 /*
 3476                  * 802.11-related privileges.
 3477                  */
 3478         case PRIV_NET80211_VAP_GETKEY:
 3479         case PRIV_NET80211_VAP_MANAGE:
 3480 
 3481 #ifdef notyet
 3482                 /*
 3483                  * ATM privileges.
 3484                  */
 3485         case PRIV_NETATM_CFG:
 3486         case PRIV_NETATM_ADD:
 3487         case PRIV_NETATM_DEL:
 3488         case PRIV_NETATM_SET:
 3489 
 3490                 /*
 3491                  * Bluetooth privileges.
 3492                  */
 3493         case PRIV_NETBLUETOOTH_RAW:
 3494 #endif
 3495 
 3496                 /*
 3497                  * Netgraph and netgraph module privileges.
 3498                  */
 3499         case PRIV_NETGRAPH_CONTROL:
 3500 #ifdef notyet
 3501         case PRIV_NETGRAPH_TTY:
 3502 #endif
 3503 
 3504                 /*
 3505                  * IPv4 and IPv6 privileges.
 3506                  */
 3507         case PRIV_NETINET_IPFW:
 3508         case PRIV_NETINET_DIVERT:
 3509         case PRIV_NETINET_PF:
 3510         case PRIV_NETINET_DUMMYNET:
 3511         case PRIV_NETINET_CARP:
 3512         case PRIV_NETINET_MROUTE:
 3513         case PRIV_NETINET_RAW:
 3514         case PRIV_NETINET_ADDRCTRL6:
 3515         case PRIV_NETINET_ND6:
 3516         case PRIV_NETINET_SCOPE6:
 3517         case PRIV_NETINET_ALIFETIME6:
 3518         case PRIV_NETINET_IPSEC:
 3519         case PRIV_NETINET_BINDANY:
 3520 
 3521 #ifdef notyet
 3522                 /*
 3523                  * NCP privileges.
 3524                  */
 3525         case PRIV_NETNCP:
 3526 
 3527                 /*
 3528                  * SMB privileges.
 3529                  */
 3530         case PRIV_NETSMB:
 3531 #endif
 3532 
 3533         /*
 3534          * No default: or deny here.
 3535          * In case of no permit fall through to next switch().
 3536          */
 3537                 if (cred->cr_prison->pr_flags & PR_VNET)
 3538                         return (0);
 3539         }
 3540 #endif /* VIMAGE */
 3541 
 3542         switch (priv) {
 3543                 /*
 3544                  * Allow ktrace privileges for root in jail.
 3545                  */
 3546         case PRIV_KTRACE:
 3547 
 3548 #if 0
 3549                 /*
 3550                  * Allow jailed processes to configure audit identity and
 3551                  * submit audit records (login, etc).  In the future we may
 3552                  * want to further refine the relationship between audit and
 3553                  * jail.
 3554                  */
 3555         case PRIV_AUDIT_GETAUDIT:
 3556         case PRIV_AUDIT_SETAUDIT:
 3557         case PRIV_AUDIT_SUBMIT:
 3558 #endif
 3559 
 3560                 /*
 3561                  * Allow jailed processes to manipulate process UNIX
 3562                  * credentials in any way they see fit.
 3563                  */
 3564         case PRIV_CRED_SETUID:
 3565         case PRIV_CRED_SETEUID:
 3566         case PRIV_CRED_SETGID:
 3567         case PRIV_CRED_SETEGID:
 3568         case PRIV_CRED_SETGROUPS:
 3569         case PRIV_CRED_SETREUID:
 3570         case PRIV_CRED_SETREGID:
 3571         case PRIV_CRED_SETRESUID:
 3572         case PRIV_CRED_SETRESGID:
 3573 
 3574                 /*
 3575                  * Jail implements visibility constraints already, so allow
 3576                  * jailed root to override uid/gid-based constraints.
 3577                  */
 3578         case PRIV_SEEOTHERGIDS:
 3579         case PRIV_SEEOTHERUIDS:
 3580 
 3581                 /*
 3582                  * Jail implements inter-process debugging limits already, so
 3583                  * allow jailed root various debugging privileges.
 3584                  */
 3585         case PRIV_DEBUG_DIFFCRED:
 3586         case PRIV_DEBUG_SUGID:
 3587         case PRIV_DEBUG_UNPRIV:
 3588 
 3589                 /*
 3590                  * Allow jail to set various resource limits and login
 3591                  * properties, and for now, exceed process resource limits.
 3592                  */
 3593         case PRIV_PROC_LIMIT:
 3594         case PRIV_PROC_SETLOGIN:
 3595         case PRIV_PROC_SETRLIMIT:
 3596 
 3597                 /*
 3598                  * System V and POSIX IPC privileges are granted in jail.
 3599                  */
 3600         case PRIV_IPC_READ:
 3601         case PRIV_IPC_WRITE:
 3602         case PRIV_IPC_ADMIN:
 3603         case PRIV_IPC_MSGSIZE:
 3604         case PRIV_MQ_ADMIN:
 3605 
 3606                 /*
 3607                  * Jail operations within a jail work on child jails.
 3608                  */
 3609         case PRIV_JAIL_ATTACH:
 3610         case PRIV_JAIL_SET:
 3611         case PRIV_JAIL_REMOVE:
 3612 
 3613                 /*
 3614                  * Jail implements its own inter-process limits, so allow
 3615                  * root processes in jail to change scheduling on other
 3616                  * processes in the same jail.  Likewise for signalling.
 3617                  */
 3618         case PRIV_SCHED_DIFFCRED:
 3619         case PRIV_SCHED_CPUSET:
 3620         case PRIV_SIGNAL_DIFFCRED:
 3621         case PRIV_SIGNAL_SUGID:
 3622 
 3623                 /*
 3624                  * Allow jailed processes to write to sysctls marked as jail
 3625                  * writable.
 3626                  */
 3627         case PRIV_SYSCTL_WRITEJAIL:
 3628 
 3629                 /*
 3630                  * Allow root in jail to manage a variety of quota
 3631                  * properties.  These should likely be conditional on a
 3632                  * configuration option.
 3633                  */
 3634         case PRIV_VFS_GETQUOTA:
 3635         case PRIV_VFS_SETQUOTA:
 3636 
 3637                 /*
 3638                  * Since Jail relies on chroot() to implement file system
 3639                  * protections, grant many VFS privileges to root in jail.
 3640                  * Be careful to exclude mount-related and NFS-related
 3641                  * privileges.
 3642                  */
 3643         case PRIV_VFS_READ:
 3644         case PRIV_VFS_WRITE:
 3645         case PRIV_VFS_ADMIN:
 3646         case PRIV_VFS_EXEC:
 3647         case PRIV_VFS_BLOCKRESERVE:     /* XXXRW: Slightly surprising. */
 3648         case PRIV_VFS_CHFLAGS_DEV:
 3649         case PRIV_VFS_CHOWN:
 3650         case PRIV_VFS_CHROOT:
 3651         case PRIV_VFS_RETAINSUGID:
 3652         case PRIV_VFS_FCHROOT:
 3653         case PRIV_VFS_LINK:
 3654         case PRIV_VFS_SETGID:
 3655         case PRIV_VFS_STAT:
 3656         case PRIV_VFS_STICKYFILE:
 3657 
 3658                 /*
 3659                  * As in the non-jail case, non-root users are expected to be
 3660                  * able to read kernel/physical memory (provided /dev/[k]mem
 3661                  * exists in the jail and they have permission to access it).
 3662                  */
 3663         case PRIV_KMEM_READ:
 3664                 return (0);
 3665 
 3666                 /*
 3667                  * Depending on the global setting, allow privilege of
 3668                  * setting system flags.
 3669                  */
 3670         case PRIV_VFS_SYSFLAGS:
 3671                 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
 3672                         return (0);
 3673                 else
 3674                         return (EPERM);
 3675 
 3676                 /*
 3677                  * Depending on the global setting, allow privilege of
 3678                  * mounting/unmounting file systems.
 3679                  */
 3680         case PRIV_VFS_MOUNT:
 3681         case PRIV_VFS_UNMOUNT:
 3682         case PRIV_VFS_MOUNT_NONUSER:
 3683         case PRIV_VFS_MOUNT_OWNER:
 3684                 pr = cred->cr_prison;
 3685                 prison_lock(pr);
 3686                 if (pr->pr_allow & PR_ALLOW_MOUNT && pr->pr_enforce_statfs < 2)
 3687                         error = 0;
 3688                 else
 3689                         error = EPERM;
 3690                 prison_unlock(pr);
 3691                 return (error);
 3692 
 3693                 /*
 3694                  * Jails should hold no disposition on the PRIV_VFS_READ_DIR
 3695                  * policy.  priv_check_cred will not specifically allow it, and
 3696                  * we may want a MAC policy to allow it.
 3697                  */
 3698         case PRIV_VFS_READ_DIR:
 3699                 return (0);
 3700 
 3701                 /*
 3702                  * Conditionnaly allow locking (unlocking) physical pages
 3703                  * in memory.
 3704                  */
 3705         case PRIV_VM_MLOCK:
 3706         case PRIV_VM_MUNLOCK:
 3707                 if (cred->cr_prison->pr_allow & PR_ALLOW_MLOCK)
 3708                         return (0);
 3709                 else
 3710                         return (EPERM);
 3711 
 3712                 /*
 3713                  * Conditionally allow jailed root to bind reserved ports.
 3714                  */
 3715         case PRIV_NETINET_RESERVEDPORT:
 3716                 if (cred->cr_prison->pr_allow & PR_ALLOW_RESERVED_PORTS)
 3717                         return (0);
 3718                 else
 3719                         return (EPERM);
 3720 
 3721                 /*
 3722                  * Allow jailed root to reuse in-use ports.
 3723                  */
 3724         case PRIV_NETINET_REUSEPORT:
 3725                 return (0);
 3726 
 3727                 /*
 3728                  * Allow jailed root to set certain IPv4/6 (option) headers.
 3729                  */
 3730         case PRIV_NETINET_SETHDROPTS:
 3731                 return (0);
 3732 
 3733                 /*
 3734                  * Conditionally allow creating raw sockets in jail.
 3735                  */
 3736         case PRIV_NETINET_RAW:
 3737                 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
 3738                         return (0);
 3739                 else
 3740                         return (EPERM);
 3741 
 3742                 /*
 3743                  * Since jail implements its own visibility limits on netstat
 3744                  * sysctls, allow getcred.  This allows identd to work in
 3745                  * jail.
 3746                  */
 3747         case PRIV_NETINET_GETCRED:
 3748                 return (0);
 3749 
 3750                 /*
 3751                  * Allow jailed root to set loginclass.
 3752                  */
 3753         case PRIV_PROC_SETLOGINCLASS:
 3754                 return (0);
 3755 
 3756                 /*
 3757                  * Do not allow a process inside a jail to read the kernel
 3758                  * message buffer unless explicitly permitted.
 3759                  */
 3760         case PRIV_MSGBUF:
 3761                 if (cred->cr_prison->pr_allow & PR_ALLOW_READ_MSGBUF)
 3762                         return (0);
 3763                 return (EPERM);
 3764 
 3765         default:
 3766                 /*
 3767                  * In all remaining cases, deny the privilege request.  This
 3768                  * includes almost all network privileges, many system
 3769                  * configuration privileges.
 3770                  */
 3771                 return (EPERM);
 3772         }
 3773 }
 3774 
 3775 /*
 3776  * Return the part of pr2's name that is relative to pr1, or the whole name
 3777  * if it does not directly follow.
 3778  */
 3779 
 3780 char *
 3781 prison_name(struct prison *pr1, struct prison *pr2)
 3782 {
 3783         char *name;
 3784 
 3785         /* Jails see themselves as "" (if they see themselves at all). */
 3786         if (pr1 == pr2)
 3787                 return "";
 3788         name = pr2->pr_name;
 3789         if (prison_ischild(pr1, pr2)) {
 3790                 /*
 3791                  * pr1 isn't locked (and allprison_lock may not be either)
 3792                  * so its length can't be counted on.  But the number of dots
 3793                  * can be counted on - and counted.
 3794                  */
 3795                 for (; pr1 != &prison0; pr1 = pr1->pr_parent)
 3796                         name = strchr(name, '.') + 1;
 3797         }
 3798         return (name);
 3799 }
 3800 
 3801 /*
 3802  * Return the part of pr2's path that is relative to pr1, or the whole path
 3803  * if it does not directly follow.
 3804  */
 3805 static char *
 3806 prison_path(struct prison *pr1, struct prison *pr2)
 3807 {
 3808         char *path1, *path2;
 3809         int len1;
 3810 
 3811         path1 = pr1->pr_path;
 3812         path2 = pr2->pr_path;
 3813         if (!strcmp(path1, "/"))
 3814                 return (path2);
 3815         len1 = strlen(path1);
 3816         if (strncmp(path1, path2, len1))
 3817                 return (path2);
 3818         if (path2[len1] == '\0')
 3819                 return "/";
 3820         if (path2[len1] == '/')
 3821                 return (path2 + len1);
 3822         return (path2);
 3823 }
 3824 
 3825 /*
 3826  * Jail-related sysctls.
 3827  */
 3828 static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 3829     "Jails");
 3830 
 3831 static int
 3832 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 3833 {
 3834         struct xprison *xp;
 3835         struct prison *pr, *cpr;
 3836 #ifdef INET
 3837         struct in_addr *ip4 = NULL;
 3838         int ip4s = 0;
 3839 #endif
 3840 #ifdef INET6
 3841         struct in6_addr *ip6 = NULL;
 3842         int ip6s = 0;
 3843 #endif
 3844         int descend, error;
 3845 
 3846         xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
 3847         pr = req->td->td_ucred->cr_prison;
 3848         error = 0;
 3849         sx_slock(&allprison_lock);
 3850         FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 3851 #if defined(INET) || defined(INET6)
 3852  again:
 3853 #endif
 3854                 mtx_lock(&cpr->pr_mtx);
 3855 #ifdef INET
 3856                 if (cpr->pr_ip4s > 0) {
 3857                         if (ip4s < cpr->pr_ip4s) {
 3858                                 ip4s = cpr->pr_ip4s;
 3859                                 mtx_unlock(&cpr->pr_mtx);
 3860                                 ip4 = realloc(ip4, ip4s *
 3861                                     sizeof(struct in_addr), M_TEMP, M_WAITOK);
 3862                                 goto again;
 3863                         }
 3864                         bcopy(cpr->pr_ip4, ip4,
 3865                             cpr->pr_ip4s * sizeof(struct in_addr));
 3866                 }
 3867 #endif
 3868 #ifdef INET6
 3869                 if (cpr->pr_ip6s > 0) {
 3870                         if (ip6s < cpr->pr_ip6s) {
 3871                                 ip6s = cpr->pr_ip6s;
 3872                                 mtx_unlock(&cpr->pr_mtx);
 3873                                 ip6 = realloc(ip6, ip6s *
 3874                                     sizeof(struct in6_addr), M_TEMP, M_WAITOK);
 3875                                 goto again;
 3876                         }
 3877                         bcopy(cpr->pr_ip6, ip6,
 3878                             cpr->pr_ip6s * sizeof(struct in6_addr));
 3879                 }
 3880 #endif
 3881                 bzero(xp, sizeof(*xp));
 3882                 xp->pr_version = XPRISON_VERSION;
 3883                 xp->pr_id = cpr->pr_id;
 3884                 xp->pr_state = cpr->pr_state;
 3885                 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
 3886                 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
 3887                 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
 3888 #ifdef INET
 3889                 xp->pr_ip4s = cpr->pr_ip4s;
 3890 #endif
 3891 #ifdef INET6
 3892                 xp->pr_ip6s = cpr->pr_ip6s;
 3893 #endif
 3894                 mtx_unlock(&cpr->pr_mtx);
 3895                 error = SYSCTL_OUT(req, xp, sizeof(*xp));
 3896                 if (error)
 3897                         break;
 3898 #ifdef INET
 3899                 if (xp->pr_ip4s > 0) {
 3900                         error = SYSCTL_OUT(req, ip4,
 3901                             xp->pr_ip4s * sizeof(struct in_addr));
 3902                         if (error)
 3903                                 break;
 3904                 }
 3905 #endif
 3906 #ifdef INET6
 3907                 if (xp->pr_ip6s > 0) {
 3908                         error = SYSCTL_OUT(req, ip6,
 3909                             xp->pr_ip6s * sizeof(struct in6_addr));
 3910                         if (error)
 3911                                 break;
 3912                 }
 3913 #endif
 3914         }
 3915         sx_sunlock(&allprison_lock);
 3916         free(xp, M_TEMP);
 3917 #ifdef INET
 3918         free(ip4, M_TEMP);
 3919 #endif
 3920 #ifdef INET6
 3921         free(ip6, M_TEMP);
 3922 #endif
 3923         return (error);
 3924 }
 3925 
 3926 SYSCTL_OID(_security_jail, OID_AUTO, list,
 3927     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 3928     sysctl_jail_list, "S", "List of active jails");
 3929 
 3930 static int
 3931 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
 3932 {
 3933         int error, injail;
 3934 
 3935         injail = jailed(req->td->td_ucred);
 3936         error = SYSCTL_OUT(req, &injail, sizeof(injail));
 3937 
 3938         return (error);
 3939 }
 3940 
 3941 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
 3942     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 3943     sysctl_jail_jailed, "I", "Process in jail?");
 3944 
 3945 static int
 3946 sysctl_jail_vnet(SYSCTL_HANDLER_ARGS)
 3947 {
 3948         int error, havevnet;
 3949 #ifdef VIMAGE
 3950         struct ucred *cred = req->td->td_ucred;
 3951 
 3952         havevnet = jailed(cred) && prison_owns_vnet(cred);
 3953 #else
 3954         havevnet = 0;
 3955 #endif
 3956         error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet));
 3957 
 3958         return (error);
 3959 }
 3960 
 3961 SYSCTL_PROC(_security_jail, OID_AUTO, vnet,
 3962     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 3963     sysctl_jail_vnet, "I", "Jail owns vnet?");
 3964 
 3965 #if defined(INET) || defined(INET6)
 3966 SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
 3967     &jail_max_af_ips, 0,
 3968     "Number of IP addresses a jail may have at most per address family (deprecated)");
 3969 #endif
 3970 
 3971 /*
 3972  * Default parameters for jail(2) compatibility.  For historical reasons,
 3973  * the sysctl names have varying similarity to the parameter names.  Prisons
 3974  * just see their own parameters, and can't change them.
 3975  */
 3976 static int
 3977 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
 3978 {
 3979         int error, i;
 3980 
 3981         /* Get the current flag value, and convert it to a boolean. */
 3982         if (req->td->td_ucred->cr_prison == &prison0) {
 3983                 mtx_lock(&prison0.pr_mtx);
 3984                 i = (jail_default_allow & arg2) != 0;
 3985                 mtx_unlock(&prison0.pr_mtx);
 3986         } else
 3987                 i = prison_allow(req->td->td_ucred, arg2);
 3988 
 3989         if (arg1 != NULL)
 3990                 i = !i;
 3991         error = sysctl_handle_int(oidp, &i, 0, req);
 3992         if (error || !req->newptr)
 3993                 return (error);
 3994         i = i ? arg2 : 0;
 3995         if (arg1 != NULL)
 3996                 i ^= arg2;
 3997         /*
 3998          * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
 3999          * for writing.
 4000          */
 4001         mtx_lock(&prison0.pr_mtx);
 4002         jail_default_allow = (jail_default_allow & ~arg2) | i;
 4003         mtx_unlock(&prison0.pr_mtx);
 4004         return (0);
 4005 }
 4006 
 4007 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
 4008     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4009     NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
 4010     "Processes in jail can set their hostnames (deprecated)");
 4011 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
 4012     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4013     (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
 4014     "Processes in jail are limited to creating UNIX/IP/route sockets only (deprecated)");
 4015 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
 4016     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4017     NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
 4018     "Processes in jail can use System V IPC primitives (deprecated)");
 4019 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
 4020     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4021     NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
 4022     "Prison root can create raw sockets (deprecated)");
 4023 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
 4024     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4025     NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
 4026     "Processes in jail can alter system file flags (deprecated)");
 4027 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
 4028     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4029     NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
 4030     "Processes in jail can mount/unmount jail-friendly file systems (deprecated)");
 4031 
 4032 static int
 4033 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
 4034 {
 4035         struct prison *pr;
 4036         int level, error;
 4037 
 4038         pr = req->td->td_ucred->cr_prison;
 4039         level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
 4040         error = sysctl_handle_int(oidp, &level, 0, req);
 4041         if (error || !req->newptr)
 4042                 return (error);
 4043         *(int *)arg1 = level;
 4044         return (0);
 4045 }
 4046 
 4047 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
 4048     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4049     &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
 4050     sysctl_jail_default_level, "I",
 4051     "Processes in jail cannot see all mounted file systems (deprecated)");
 4052 
 4053 SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset,
 4054     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 4055     &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum),
 4056     sysctl_jail_default_level, "I",
 4057     "Ruleset for the devfs filesystem in jail (deprecated)");
 4058 
 4059 /*
 4060  * Nodes to describe jail parameters.  Maximum length of string parameters
 4061  * is returned in the string itself, and the other parameters exist merely
 4062  * to make themselves and their types known.
 4063  */
 4064 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 4065     "Jail parameters");
 4066 
 4067 int
 4068 sysctl_jail_param(SYSCTL_HANDLER_ARGS)
 4069 {
 4070         int i;
 4071         long l;
 4072         size_t s;
 4073         char numbuf[12];
 4074 
 4075         switch (oidp->oid_kind & CTLTYPE)
 4076         {
 4077         case CTLTYPE_LONG:
 4078         case CTLTYPE_ULONG:
 4079                 l = 0;
 4080 #ifdef SCTL_MASK32
 4081                 if (!(req->flags & SCTL_MASK32))
 4082 #endif
 4083                         return (SYSCTL_OUT(req, &l, sizeof(l)));
 4084         case CTLTYPE_INT:
 4085         case CTLTYPE_UINT:
 4086                 i = 0;
 4087                 return (SYSCTL_OUT(req, &i, sizeof(i)));
 4088         case CTLTYPE_STRING:
 4089                 snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2);
 4090                 return
 4091                     (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
 4092         case CTLTYPE_STRUCT:
 4093                 s = (size_t)arg2;
 4094                 return (SYSCTL_OUT(req, &s, sizeof(s)));
 4095         }
 4096         return (0);
 4097 }
 4098 
 4099 /*
 4100  * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at
 4101  * jail creation time but cannot be changed in an existing jail.
 4102  */
 4103 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
 4104 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
 4105 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
 4106 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
 4107 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
 4108     "I", "Jail secure level");
 4109 SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I",
 4110     "Jail value for kern.osreldate and uname -K");
 4111 SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN,
 4112     "Jail value for kern.osrelease and uname -r");
 4113 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
 4114     "I", "Jail cannot see all mounted file systems");
 4115 SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW,
 4116     "I", "Ruleset for in-jail devfs mounts");
 4117 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
 4118     "B", "Jail persistence");
 4119 #ifdef VIMAGE
 4120 SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
 4121     "E,jailsys", "Virtual network stack");
 4122 #endif
 4123 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
 4124     "B", "Jail is in the process of shutting down");
 4125 
 4126 SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
 4127 SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
 4128     "I", "Current number of child jails");
 4129 SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
 4130     "I", "Maximum number of child jails");
 4131 
 4132 SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
 4133 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
 4134     "Jail hostname");
 4135 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
 4136     "Jail NIS domainname");
 4137 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
 4138     "Jail host UUID");
 4139 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
 4140     "LU", "Jail host ID");
 4141 
 4142 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
 4143 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
 4144 
 4145 #ifdef INET
 4146 SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
 4147     "Jail IPv4 address virtualization");
 4148 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
 4149     "S,in_addr,a", "Jail IPv4 addresses");
 4150 SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 4151     "B", "Do (not) use IPv4 source address selection rather than the "
 4152     "primary jail IPv4 address.");
 4153 #endif
 4154 #ifdef INET6
 4155 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
 4156     "Jail IPv6 address virtualization");
 4157 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
 4158     "S,in6_addr,a", "Jail IPv6 addresses");
 4159 SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 4160     "B", "Do (not) use IPv6 source address selection rather than the "
 4161     "primary jail IPv6 address.");
 4162 #endif
 4163 
 4164 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
 4165 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
 4166     "B", "Jail may set hostname");
 4167 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
 4168     "B", "Jail may use SYSV IPC");
 4169 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
 4170     "B", "Jail may create raw sockets");
 4171 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
 4172     "B", "Jail may alter system file flags");
 4173 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
 4174     "B", "Jail may set file quotas");
 4175 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
 4176     "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
 4177 SYSCTL_JAIL_PARAM(_allow, mlock, CTLTYPE_INT | CTLFLAG_RW,
 4178     "B", "Jail may lock (unlock) physical pages in memory");
 4179 SYSCTL_JAIL_PARAM(_allow, reserved_ports, CTLTYPE_INT | CTLFLAG_RW,
 4180     "B", "Jail may bind sockets to reserved ports");
 4181 SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW,
 4182     "B", "Jail may read the kernel message buffer");
 4183 SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW,
 4184     "B", "Unprivileged processes may use process debugging facilities");
 4185 SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW,
 4186     "B", "Processes in jail with uid 0 have privilege");
 4187 
 4188 SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags");
 4189 SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW,
 4190     "B", "Jail may mount/unmount jail-friendly file systems in general");
 4191 
 4192 /*
 4193  * Add a dynamic parameter allow.<name>, or allow.<prefix>.<name>.  Return
 4194  * its associated bit in the pr_allow bitmask, or zero if the parameter was
 4195  * not created.
 4196  */
 4197 unsigned
 4198 prison_add_allow(const char *prefix, const char *name, const char *prefix_descr,
 4199     const char *descr)
 4200 {
 4201         struct bool_flags *bf;
 4202         struct sysctl_oid *parent;
 4203         char *allow_name, *allow_noname, *allowed;
 4204 #ifndef NO_SYSCTL_DESCR
 4205         char *descr_deprecated;
 4206 #endif
 4207         u_int allow_flag;
 4208 
 4209         if (prefix
 4210             ? asprintf(&allow_name, M_PRISON, "allow.%s.%s", prefix, name)
 4211                 < 0 ||
 4212               asprintf(&allow_noname, M_PRISON, "allow.%s.no%s", prefix, name)
 4213                 < 0
 4214             : asprintf(&allow_name, M_PRISON, "allow.%s", name) < 0 ||
 4215               asprintf(&allow_noname, M_PRISON, "allow.no%s", name) < 0) {
 4216                 free(allow_name, M_PRISON);
 4217                 return 0;
 4218         }
 4219 
 4220         /*
 4221          * See if this parameter has already beed added, i.e. a module was
 4222          * previously loaded/unloaded.
 4223          */
 4224         mtx_lock(&prison0.pr_mtx);
 4225         for (bf = pr_flag_allow;
 4226              bf < pr_flag_allow + nitems(pr_flag_allow) &&
 4227                 atomic_load_int(&bf->flag) != 0;
 4228              bf++) {
 4229                 if (strcmp(bf->name, allow_name) == 0) {
 4230                         allow_flag = bf->flag;
 4231                         goto no_add;
 4232                 }
 4233         }
 4234 
 4235         /*
 4236          * Find a free bit in pr_allow_all, failing if there are none
 4237          * (which shouldn't happen as long as we keep track of how many
 4238          * potential dynamic flags exist).
 4239          */
 4240         for (allow_flag = 1;; allow_flag <<= 1) {
 4241                 if (allow_flag == 0)
 4242                         goto no_add;
 4243                 if ((pr_allow_all & allow_flag) == 0)
 4244                         break;
 4245         }
 4246 
 4247         /* Note the parameter in the next open slot in pr_flag_allow. */
 4248         for (bf = pr_flag_allow; ; bf++) {
 4249                 if (bf == pr_flag_allow + nitems(pr_flag_allow)) {
 4250                         /* This should never happen, but is not fatal. */
 4251                         allow_flag = 0;
 4252                         goto no_add;
 4253                 }
 4254                 if (atomic_load_int(&bf->flag) == 0)
 4255                         break;
 4256         }
 4257         bf->name = allow_name;
 4258         bf->noname = allow_noname;
 4259         pr_allow_all |= allow_flag;
 4260         /*
 4261          * prison0 always has permission for the new parameter.
 4262          * Other jails must have it granted to them.
 4263          */
 4264         prison0.pr_allow |= allow_flag;
 4265         /* The flag indicates a valid entry, so make sure it is set last. */
 4266         atomic_store_rel_int(&bf->flag, allow_flag);
 4267         mtx_unlock(&prison0.pr_mtx);
 4268 
 4269         /*
 4270          * Create sysctls for the parameter, and the back-compat global
 4271          * permission.
 4272          */
 4273         parent = prefix
 4274             ? SYSCTL_ADD_NODE(NULL,
 4275                   SYSCTL_CHILDREN(&sysctl___security_jail_param_allow),
 4276                   OID_AUTO, prefix, CTLFLAG_MPSAFE, 0, prefix_descr)
 4277             : &sysctl___security_jail_param_allow;
 4278         (void)SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(parent), OID_AUTO,
 4279             name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4280             NULL, 0, sysctl_jail_param, "B", descr);
 4281         if ((prefix
 4282              ? asprintf(&allowed, M_TEMP, "%s_%s_allowed", prefix, name)
 4283              : asprintf(&allowed, M_TEMP, "%s_allowed", name)) >= 0) {
 4284 #ifndef NO_SYSCTL_DESCR
 4285                 (void)asprintf(&descr_deprecated, M_TEMP, "%s (deprecated)",
 4286                     descr);
 4287 #endif
 4288                 (void)SYSCTL_ADD_PROC(NULL,
 4289                     SYSCTL_CHILDREN(&sysctl___security_jail), OID_AUTO, allowed,
 4290                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, allow_flag,
 4291                     sysctl_jail_default_allow, "I", descr_deprecated);
 4292 #ifndef NO_SYSCTL_DESCR
 4293                 free(descr_deprecated, M_TEMP);
 4294 #endif
 4295                 free(allowed, M_TEMP);
 4296         }
 4297         return allow_flag;
 4298 
 4299  no_add:
 4300         mtx_unlock(&prison0.pr_mtx);
 4301         free(allow_name, M_PRISON);
 4302         free(allow_noname, M_PRISON);
 4303         return allow_flag;
 4304 }
 4305 
 4306 /*
 4307  * The VFS system will register jail-aware filesystems here.  They each get
 4308  * a parameter allow.mount.xxxfs and a flag to check when a jailed user
 4309  * attempts to mount.
 4310  */
 4311 void
 4312 prison_add_vfs(struct vfsconf *vfsp)
 4313 {
 4314 #ifdef NO_SYSCTL_DESCR
 4315 
 4316         vfsp->vfc_prison_flag = prison_add_allow("mount", vfsp->vfc_name,
 4317             NULL, NULL);
 4318 #else
 4319         char *descr;
 4320 
 4321         (void)asprintf(&descr, M_TEMP, "Jail may mount the %s file system",
 4322             vfsp->vfc_name);
 4323         vfsp->vfc_prison_flag = prison_add_allow("mount", vfsp->vfc_name,
 4324             NULL, descr);
 4325         free(descr, M_TEMP);
 4326 #endif
 4327 }
 4328 
 4329 #ifdef RACCT
 4330 void
 4331 prison_racct_foreach(void (*callback)(struct racct *racct,
 4332     void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
 4333     void *arg2, void *arg3)
 4334 {
 4335         struct prison_racct *prr;
 4336 
 4337         ASSERT_RACCT_ENABLED();
 4338 
 4339         sx_slock(&allprison_lock);
 4340         if (pre != NULL)
 4341                 (pre)();
 4342         LIST_FOREACH(prr, &allprison_racct, prr_next)
 4343                 (callback)(prr->prr_racct, arg2, arg3);
 4344         if (post != NULL)
 4345                 (post)();
 4346         sx_sunlock(&allprison_lock);
 4347 }
 4348 
 4349 static struct prison_racct *
 4350 prison_racct_find_locked(const char *name)
 4351 {
 4352         struct prison_racct *prr;
 4353 
 4354         ASSERT_RACCT_ENABLED();
 4355         sx_assert(&allprison_lock, SA_XLOCKED);
 4356 
 4357         if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN)
 4358                 return (NULL);
 4359 
 4360         LIST_FOREACH(prr, &allprison_racct, prr_next) {
 4361                 if (strcmp(name, prr->prr_name) != 0)
 4362                         continue;
 4363 
 4364                 /* Found prison_racct with a matching name? */
 4365                 prison_racct_hold(prr);
 4366                 return (prr);
 4367         }
 4368 
 4369         /* Add new prison_racct. */
 4370         prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK);
 4371         racct_create(&prr->prr_racct);
 4372 
 4373         strcpy(prr->prr_name, name);
 4374         refcount_init(&prr->prr_refcount, 1);
 4375         LIST_INSERT_HEAD(&allprison_racct, prr, prr_next);
 4376 
 4377         return (prr);
 4378 }
 4379 
 4380 struct prison_racct *
 4381 prison_racct_find(const char *name)
 4382 {
 4383         struct prison_racct *prr;
 4384 
 4385         ASSERT_RACCT_ENABLED();
 4386 
 4387         sx_xlock(&allprison_lock);
 4388         prr = prison_racct_find_locked(name);
 4389         sx_xunlock(&allprison_lock);
 4390         return (prr);
 4391 }
 4392 
 4393 void
 4394 prison_racct_hold(struct prison_racct *prr)
 4395 {
 4396 
 4397         ASSERT_RACCT_ENABLED();
 4398 
 4399         refcount_acquire(&prr->prr_refcount);
 4400 }
 4401 
 4402 static void
 4403 prison_racct_free_locked(struct prison_racct *prr)
 4404 {
 4405 
 4406         ASSERT_RACCT_ENABLED();
 4407         sx_assert(&allprison_lock, SA_XLOCKED);
 4408 
 4409         if (refcount_release(&prr->prr_refcount)) {
 4410                 racct_destroy(&prr->prr_racct);
 4411                 LIST_REMOVE(prr, prr_next);
 4412                 free(prr, M_PRISON_RACCT);
 4413         }
 4414 }
 4415 
 4416 void
 4417 prison_racct_free(struct prison_racct *prr)
 4418 {
 4419 
 4420         ASSERT_RACCT_ENABLED();
 4421         sx_assert(&allprison_lock, SA_UNLOCKED);
 4422 
 4423         if (refcount_release_if_not_last(&prr->prr_refcount))
 4424                 return;
 4425 
 4426         sx_xlock(&allprison_lock);
 4427         prison_racct_free_locked(prr);
 4428         sx_xunlock(&allprison_lock);
 4429 }
 4430 
 4431 static void
 4432 prison_racct_attach(struct prison *pr)
 4433 {
 4434         struct prison_racct *prr;
 4435 
 4436         ASSERT_RACCT_ENABLED();
 4437         sx_assert(&allprison_lock, SA_XLOCKED);
 4438 
 4439         prr = prison_racct_find_locked(pr->pr_name);
 4440         KASSERT(prr != NULL, ("cannot find prison_racct"));
 4441 
 4442         pr->pr_prison_racct = prr;
 4443 }
 4444 
 4445 /*
 4446  * Handle jail renaming.  From the racct point of view, renaming means
 4447  * moving from one prison_racct to another.
 4448  */
 4449 static void
 4450 prison_racct_modify(struct prison *pr)
 4451 {
 4452 #ifdef RCTL
 4453         struct proc *p;
 4454         struct ucred *cred;
 4455 #endif
 4456         struct prison_racct *oldprr;
 4457 
 4458         ASSERT_RACCT_ENABLED();
 4459 
 4460         sx_slock(&allproc_lock);
 4461         sx_xlock(&allprison_lock);
 4462 
 4463         if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) {
 4464                 sx_xunlock(&allprison_lock);
 4465                 sx_sunlock(&allproc_lock);
 4466                 return;
 4467         }
 4468 
 4469         oldprr = pr->pr_prison_racct;
 4470         pr->pr_prison_racct = NULL;
 4471 
 4472         prison_racct_attach(pr);
 4473 
 4474         /*
 4475          * Move resource utilisation records.
 4476          */
 4477         racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct);
 4478 
 4479 #ifdef RCTL
 4480         /*
 4481          * Force rctl to reattach rules to processes.
 4482          */
 4483         FOREACH_PROC_IN_SYSTEM(p) {
 4484                 PROC_LOCK(p);
 4485                 cred = crhold(p->p_ucred);
 4486                 PROC_UNLOCK(p);
 4487                 rctl_proc_ucred_changed(p, cred);
 4488                 crfree(cred);
 4489         }
 4490 #endif
 4491 
 4492         sx_sunlock(&allproc_lock);
 4493         prison_racct_free_locked(oldprr);
 4494         sx_xunlock(&allprison_lock);
 4495 }
 4496 
 4497 static void
 4498 prison_racct_detach(struct prison *pr)
 4499 {
 4500 
 4501         ASSERT_RACCT_ENABLED();
 4502         sx_assert(&allprison_lock, SA_UNLOCKED);
 4503 
 4504         if (pr->pr_prison_racct == NULL)
 4505                 return;
 4506         prison_racct_free(pr->pr_prison_racct);
 4507         pr->pr_prison_racct = NULL;
 4508 }
 4509 #endif /* RACCT */
 4510 
 4511 #ifdef DDB
 4512 
 4513 static void
 4514 db_show_prison(struct prison *pr)
 4515 {
 4516         struct bool_flags *bf;
 4517         struct jailsys_flags *jsf;
 4518 #if defined(INET) || defined(INET6)
 4519         int ii;
 4520 #endif
 4521         unsigned f;
 4522 #ifdef INET
 4523         char ip4buf[INET_ADDRSTRLEN];
 4524 #endif
 4525 #ifdef INET6
 4526         char ip6buf[INET6_ADDRSTRLEN];
 4527 #endif
 4528 
 4529         db_printf("prison %p:\n", pr);
 4530         db_printf(" jid             = %d\n", pr->pr_id);
 4531         db_printf(" name            = %s\n", pr->pr_name);
 4532         db_printf(" parent          = %p\n", pr->pr_parent);
 4533         db_printf(" ref             = %d\n", pr->pr_ref);
 4534         db_printf(" uref            = %d\n", pr->pr_uref);
 4535         db_printf(" state           = %s\n",
 4536             pr->pr_state == PRISON_STATE_ALIVE ? "alive" :
 4537             pr->pr_state == PRISON_STATE_DYING ? "dying" :
 4538             "invalid");
 4539         db_printf(" path            = %s\n", pr->pr_path);
 4540         db_printf(" cpuset          = %d\n", pr->pr_cpuset
 4541             ? pr->pr_cpuset->cs_id : -1);
 4542 #ifdef VIMAGE
 4543         db_printf(" vnet            = %p\n", pr->pr_vnet);
 4544 #endif
 4545         db_printf(" root            = %p\n", pr->pr_root);
 4546         db_printf(" securelevel     = %d\n", pr->pr_securelevel);
 4547         db_printf(" devfs_rsnum     = %d\n", pr->pr_devfs_rsnum);
 4548         db_printf(" children.max    = %d\n", pr->pr_childmax);
 4549         db_printf(" children.cur    = %d\n", pr->pr_childcount);
 4550         db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
 4551         db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
 4552         db_printf(" flags           = 0x%x", pr->pr_flags);
 4553         for (bf = pr_flag_bool; bf < pr_flag_bool + nitems(pr_flag_bool); bf++)
 4554                 if (pr->pr_flags & bf->flag)
 4555                         db_printf(" %s", bf->name);
 4556         for (jsf = pr_flag_jailsys;
 4557              jsf < pr_flag_jailsys + nitems(pr_flag_jailsys);
 4558              jsf++) {
 4559                 f = pr->pr_flags & (jsf->disable | jsf->new);
 4560                 db_printf(" %-16s= %s\n", jsf->name,
 4561                     (f != 0 && f == jsf->disable) ? "disable"
 4562                     : (f == jsf->new) ? "new"
 4563                     : "inherit");
 4564         }
 4565         db_printf(" allow           = 0x%x", pr->pr_allow);
 4566         for (bf = pr_flag_allow;
 4567              bf < pr_flag_allow + nitems(pr_flag_allow) &&
 4568                 atomic_load_int(&bf->flag) != 0;
 4569              bf++)
 4570                 if (pr->pr_allow & bf->flag)
 4571                         db_printf(" %s", bf->name);
 4572         db_printf("\n");
 4573         db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
 4574         db_printf(" host.hostname   = %s\n", pr->pr_hostname);
 4575         db_printf(" host.domainname = %s\n", pr->pr_domainname);
 4576         db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
 4577         db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
 4578 #ifdef INET
 4579         db_printf(" ip4s            = %d\n", pr->pr_ip4s);
 4580         for (ii = 0; ii < pr->pr_ip4s; ii++)
 4581                 db_printf(" %s %s\n",
 4582                     ii == 0 ? "ip4.addr        =" : "                 ",
 4583                     inet_ntoa_r(pr->pr_ip4[ii], ip4buf));
 4584 #endif
 4585 #ifdef INET6
 4586         db_printf(" ip6s            = %d\n", pr->pr_ip6s);
 4587         for (ii = 0; ii < pr->pr_ip6s; ii++)
 4588                 db_printf(" %s %s\n",
 4589                     ii == 0 ? "ip6.addr        =" : "                 ",
 4590                     ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
 4591 #endif
 4592 }
 4593 
 4594 DB_SHOW_COMMAND(prison, db_show_prison_command)
 4595 {
 4596         struct prison *pr;
 4597 
 4598         if (!have_addr) {
 4599                 /*
 4600                  * Show all prisons in the list, and prison0 which is not
 4601                  * listed.
 4602                  */
 4603                 db_show_prison(&prison0);
 4604                 if (!db_pager_quit) {
 4605                         TAILQ_FOREACH(pr, &allprison, pr_list) {
 4606                                 db_show_prison(pr);
 4607                                 if (db_pager_quit)
 4608                                         break;
 4609                         }
 4610                 }
 4611                 return;
 4612         }
 4613 
 4614         if (addr == 0)
 4615                 pr = &prison0;
 4616         else {
 4617                 /* Look for a prison with the ID and with references. */
 4618                 TAILQ_FOREACH(pr, &allprison, pr_list)
 4619                         if (pr->pr_id == addr && pr->pr_ref > 0)
 4620                                 break;
 4621                 if (pr == NULL)
 4622                         /* Look again, without requiring a reference. */
 4623                         TAILQ_FOREACH(pr, &allprison, pr_list)
 4624                                 if (pr->pr_id == addr)
 4625                                         break;
 4626                 if (pr == NULL)
 4627                         /* Assume address points to a valid prison. */
 4628                         pr = (struct prison *)addr;
 4629         }
 4630         db_show_prison(pr);
 4631 }
 4632 
 4633 #endif /* DDB */

Cache object: e493a61e28233e32e51bc53061e5a9ad


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.