The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999 Poul-Henning Kamp.
    3  * Copyright (c) 2008 Bjoern A. Zeeb.
    4  * Copyright (c) 2009 James Gritton.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_compat.h"
   33 #include "opt_ddb.h"
   34 #include "opt_inet.h"
   35 #include "opt_inet6.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/types.h>
   39 #include <sys/kernel.h>
   40 #include <sys/systm.h>
   41 #include <sys/errno.h>
   42 #include <sys/sysproto.h>
   43 #include <sys/malloc.h>
   44 #include <sys/osd.h>
   45 #include <sys/priv.h>
   46 #include <sys/proc.h>
   47 #include <sys/taskqueue.h>
   48 #include <sys/fcntl.h>
   49 #include <sys/jail.h>
   50 #include <sys/lock.h>
   51 #include <sys/mutex.h>
   52 #include <sys/racct.h>
   53 #include <sys/rctl.h>
   54 #include <sys/refcount.h>
   55 #include <sys/sx.h>
   56 #include <sys/sysent.h>
   57 #include <sys/namei.h>
   58 #include <sys/mount.h>
   59 #include <sys/queue.h>
   60 #include <sys/socket.h>
   61 #include <sys/syscallsubr.h>
   62 #include <sys/sysctl.h>
   63 #include <sys/vnode.h>
   64 
   65 #include <net/if.h>
   66 #include <net/vnet.h>
   67 
   68 #include <netinet/in.h>
   69 
   70 #ifdef DDB
   71 #include <ddb/ddb.h>
   72 #endif /* DDB */
   73 
   74 #include <security/mac/mac_framework.h>
   75 
   76 #define DEFAULT_HOSTUUID        "00000000-0000-0000-0000-000000000000"
   77 
   78 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
   79 static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures");
   80 
   81 /* Keep struct prison prison0 and some code in kern_jail_set() readable. */
   82 #ifdef INET
   83 #ifdef INET6
   84 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL
   85 #else
   86 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL
   87 #endif
   88 #else /* !INET */
   89 #ifdef INET6
   90 #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL
   91 #else
   92 #define _PR_IP_SADDRSEL 0
   93 #endif
   94 #endif
   95 
   96 /* prison0 describes what is "real" about the system. */
   97 struct prison prison0 = {
   98         .pr_id          = 0,
   99         .pr_name        = "",
  100         .pr_ref         = 1,
  101         .pr_uref        = 1,
  102         .pr_path        = "/",
  103         .pr_securelevel = -1,
  104         .pr_devfs_rsnum = 0,
  105         .pr_childmax    = JAIL_MAX,
  106         .pr_hostuuid    = DEFAULT_HOSTUUID,
  107         .pr_children    = LIST_HEAD_INITIALIZER(prison0.pr_children),
  108 #ifdef VIMAGE
  109         .pr_flags       = PR_HOST|PR_VNET|_PR_IP_SADDRSEL,
  110 #else
  111         .pr_flags       = PR_HOST|_PR_IP_SADDRSEL,
  112 #endif
  113         .pr_allow       = PR_ALLOW_ALL,
  114 };
  115 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
  116 
  117 /* allprison, allprison_racct and lastprid are protected by allprison_lock. */
  118 struct  sx allprison_lock;
  119 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
  120 struct  prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
  121 LIST_HEAD(, prison_racct) allprison_racct;
  122 int     lastprid = 0;
  123 
  124 static int do_jail_attach(struct thread *td, struct prison *pr);
  125 static void prison_complete(void *context, int pending);
  126 static void prison_deref(struct prison *pr, int flags);
  127 static char *prison_path(struct prison *pr1, struct prison *pr2);
  128 static void prison_remove_one(struct prison *pr);
  129 #ifdef RACCT
  130 static void prison_racct_attach(struct prison *pr);
  131 static void prison_racct_modify(struct prison *pr);
  132 static void prison_racct_detach(struct prison *pr);
  133 #endif
  134 
  135 /* Flags for prison_deref */
  136 #define PD_DEREF        0x01
  137 #define PD_DEUREF       0x02
  138 #define PD_LOCKED       0x04
  139 #define PD_LIST_SLOCKED 0x08
  140 #define PD_LIST_XLOCKED 0x10
  141 
  142 /*
  143  * Parameter names corresponding to PR_* flag values.  Size values are for kvm
  144  * as we cannot figure out the size of a sparse array, or an array without a
  145  * terminating entry.
  146  */
  147 static char *pr_flag_names[] = {
  148         [0] = "persist",
  149 #ifdef INET
  150         [7] = "ip4.saddrsel",
  151 #endif
  152 #ifdef INET6
  153         [8] = "ip6.saddrsel",
  154 #endif
  155 };
  156 const size_t pr_flag_names_size = sizeof(pr_flag_names);
  157 
  158 static char *pr_flag_nonames[] = {
  159         [0] = "nopersist",
  160 #ifdef INET
  161         [7] = "ip4.nosaddrsel",
  162 #endif
  163 #ifdef INET6
  164         [8] = "ip6.nosaddrsel",
  165 #endif
  166 };
  167 const size_t pr_flag_nonames_size = sizeof(pr_flag_nonames);
  168 
  169 struct jailsys_flags {
  170         const char      *name;
  171         unsigned         disable;
  172         unsigned         new;
  173 } pr_flag_jailsys[] = {
  174         { "host", 0, PR_HOST },
  175 #ifdef VIMAGE
  176         { "vnet", 0, PR_VNET },
  177 #endif
  178 #ifdef INET
  179         { "ip4", PR_IP4_USER, PR_IP4_USER },
  180 #endif
  181 #ifdef INET6
  182         { "ip6", PR_IP6_USER, PR_IP6_USER },
  183 #endif
  184 };
  185 const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys);
  186 
  187 static char *pr_allow_names[] = {
  188         "allow.set_hostname",
  189         "allow.sysvipc",
  190         "allow.raw_sockets",
  191         "allow.chflags",
  192         "allow.mount",
  193         "allow.quotas",
  194         "allow.socket_af",
  195         "allow.mount.devfs",
  196         "allow.mount.nullfs",
  197         "allow.mount.zfs",
  198         "allow.mount.procfs",
  199         "allow.mount.tmpfs",
  200         "allow.mount.fdescfs",
  201         "allow.mount.linprocfs",
  202         "allow.mount.linsysfs",
  203         "allow.read_msgbuf",
  204 };
  205 const size_t pr_allow_names_size = sizeof(pr_allow_names);
  206 
  207 static char *pr_allow_nonames[] = {
  208         "allow.noset_hostname",
  209         "allow.nosysvipc",
  210         "allow.noraw_sockets",
  211         "allow.nochflags",
  212         "allow.nomount",
  213         "allow.noquotas",
  214         "allow.nosocket_af",
  215         "allow.mount.nodevfs",
  216         "allow.mount.nonullfs",
  217         "allow.mount.nozfs",
  218         "allow.mount.noprocfs",
  219         "allow.mount.notmpfs",
  220         "allow.mount.nofdescfs",
  221         "allow.mount.nolinprocfs",
  222         "allow.mount.nolinsysfs",
  223         "allow.noread_msgbuf",
  224 };
  225 const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames);
  226 
  227 #define JAIL_DEFAULT_ALLOW              PR_ALLOW_SET_HOSTNAME
  228 #define JAIL_DEFAULT_ENFORCE_STATFS     2
  229 #define JAIL_DEFAULT_DEVFS_RSNUM        0
  230 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
  231 static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
  232 static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM;
  233 #if defined(INET) || defined(INET6)
  234 static unsigned jail_max_af_ips = 255;
  235 #endif
  236 
  237 /*
  238  * Initialize the parts of prison0 that can't be static-initialized with
  239  * constants.  This is called from proc0_init() after creating thread0 cpuset.
  240  */
  241 void
  242 prison0_init(void)
  243 {
  244 
  245         prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset);
  246         prison0.pr_osreldate = osreldate;
  247         strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease));
  248 }
  249 
  250 /*
  251  * struct jail_args {
  252  *      struct jail *jail;
  253  * };
  254  */
  255 int
  256 sys_jail(struct thread *td, struct jail_args *uap)
  257 {
  258         uint32_t version;
  259         int error;
  260         struct jail j;
  261 
  262         error = copyin(uap->jail, &version, sizeof(uint32_t));
  263         if (error)
  264                 return (error);
  265 
  266         switch (version) {
  267         case 0:
  268         {
  269                 struct jail_v0 j0;
  270 
  271                 /* FreeBSD single IPv4 jails. */
  272                 bzero(&j, sizeof(struct jail));
  273                 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
  274                 if (error)
  275                         return (error);
  276                 j.version = j0.version;
  277                 j.path = j0.path;
  278                 j.hostname = j0.hostname;
  279                 j.ip4s = htonl(j0.ip_number);   /* jail_v0 is host order */
  280                 break;
  281         }
  282 
  283         case 1:
  284                 /*
  285                  * Version 1 was used by multi-IPv4 jail implementations
  286                  * that never made it into the official kernel.
  287                  */
  288                 return (EINVAL);
  289 
  290         case 2: /* JAIL_API_VERSION */
  291                 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
  292                 error = copyin(uap->jail, &j, sizeof(struct jail));
  293                 if (error)
  294                         return (error);
  295                 break;
  296 
  297         default:
  298                 /* Sci-Fi jails are not supported, sorry. */
  299                 return (EINVAL);
  300         }
  301         return (kern_jail(td, &j));
  302 }
  303 
  304 int
  305 kern_jail(struct thread *td, struct jail *j)
  306 {
  307         struct iovec optiov[2 * (4 + nitems(pr_allow_names)
  308 #ifdef INET
  309                             + 1
  310 #endif
  311 #ifdef INET6
  312                             + 1
  313 #endif
  314                             )];
  315         struct uio opt;
  316         char *u_path, *u_hostname, *u_name;
  317 #ifdef INET
  318         uint32_t ip4s;
  319         struct in_addr *u_ip4;
  320 #endif
  321 #ifdef INET6
  322         struct in6_addr *u_ip6;
  323 #endif
  324         size_t tmplen;
  325         int error, enforce_statfs, fi;
  326 
  327         bzero(&optiov, sizeof(optiov));
  328         opt.uio_iov = optiov;
  329         opt.uio_iovcnt = 0;
  330         opt.uio_offset = -1;
  331         opt.uio_resid = -1;
  332         opt.uio_segflg = UIO_SYSSPACE;
  333         opt.uio_rw = UIO_READ;
  334         opt.uio_td = td;
  335 
  336         /* Set permissions for top-level jails from sysctls. */
  337         if (!jailed(td->td_ucred)) {
  338                 for (fi = 0; fi < nitems(pr_allow_names); fi++) {
  339                         optiov[opt.uio_iovcnt].iov_base =
  340                             (jail_default_allow & (1 << fi))
  341                             ? pr_allow_names[fi] : pr_allow_nonames[fi];
  342                         optiov[opt.uio_iovcnt].iov_len =
  343                             strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
  344                         opt.uio_iovcnt += 2;
  345                 }
  346                 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
  347                 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
  348                 opt.uio_iovcnt++;
  349                 enforce_statfs = jail_default_enforce_statfs;
  350                 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
  351                 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
  352                 opt.uio_iovcnt++;
  353         }
  354 
  355         tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
  356 #ifdef INET
  357         ip4s = (j->version == 0) ? 1 : j->ip4s;
  358         if (ip4s > jail_max_af_ips)
  359                 return (EINVAL);
  360         tmplen += ip4s * sizeof(struct in_addr);
  361 #else
  362         if (j->ip4s > 0)
  363                 return (EINVAL);
  364 #endif
  365 #ifdef INET6
  366         if (j->ip6s > jail_max_af_ips)
  367                 return (EINVAL);
  368         tmplen += j->ip6s * sizeof(struct in6_addr);
  369 #else
  370         if (j->ip6s > 0)
  371                 return (EINVAL);
  372 #endif
  373         u_path = malloc(tmplen, M_TEMP, M_WAITOK);
  374         u_hostname = u_path + MAXPATHLEN;
  375         u_name = u_hostname + MAXHOSTNAMELEN;
  376 #ifdef INET
  377         u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
  378 #endif
  379 #ifdef INET6
  380 #ifdef INET
  381         u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
  382 #else
  383         u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
  384 #endif
  385 #endif
  386         optiov[opt.uio_iovcnt].iov_base = "path";
  387         optiov[opt.uio_iovcnt].iov_len = sizeof("path");
  388         opt.uio_iovcnt++;
  389         optiov[opt.uio_iovcnt].iov_base = u_path;
  390         error = copyinstr(j->path, u_path, MAXPATHLEN,
  391             &optiov[opt.uio_iovcnt].iov_len);
  392         if (error) {
  393                 free(u_path, M_TEMP);
  394                 return (error);
  395         }
  396         opt.uio_iovcnt++;
  397         optiov[opt.uio_iovcnt].iov_base = "host.hostname";
  398         optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
  399         opt.uio_iovcnt++;
  400         optiov[opt.uio_iovcnt].iov_base = u_hostname;
  401         error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
  402             &optiov[opt.uio_iovcnt].iov_len);
  403         if (error) {
  404                 free(u_path, M_TEMP);
  405                 return (error);
  406         }
  407         opt.uio_iovcnt++;
  408         if (j->jailname != NULL) {
  409                 optiov[opt.uio_iovcnt].iov_base = "name";
  410                 optiov[opt.uio_iovcnt].iov_len = sizeof("name");
  411                 opt.uio_iovcnt++;
  412                 optiov[opt.uio_iovcnt].iov_base = u_name;
  413                 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
  414                     &optiov[opt.uio_iovcnt].iov_len);
  415                 if (error) {
  416                         free(u_path, M_TEMP);
  417                         return (error);
  418                 }
  419                 opt.uio_iovcnt++;
  420         }
  421 #ifdef INET
  422         optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
  423         optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
  424         opt.uio_iovcnt++;
  425         optiov[opt.uio_iovcnt].iov_base = u_ip4;
  426         optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
  427         if (j->version == 0)
  428                 u_ip4->s_addr = j->ip4s;
  429         else {
  430                 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
  431                 if (error) {
  432                         free(u_path, M_TEMP);
  433                         return (error);
  434                 }
  435         }
  436         opt.uio_iovcnt++;
  437 #endif
  438 #ifdef INET6
  439         optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
  440         optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
  441         opt.uio_iovcnt++;
  442         optiov[opt.uio_iovcnt].iov_base = u_ip6;
  443         optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
  444         error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
  445         if (error) {
  446                 free(u_path, M_TEMP);
  447                 return (error);
  448         }
  449         opt.uio_iovcnt++;
  450 #endif
  451         KASSERT(opt.uio_iovcnt <= nitems(optiov),
  452                 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
  453         error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
  454         free(u_path, M_TEMP);
  455         return (error);
  456 }
  457 
  458 
  459 /*
  460  * struct jail_set_args {
  461  *      struct iovec *iovp;
  462  *      unsigned int iovcnt;
  463  *      int flags;
  464  * };
  465  */
  466 int
  467 sys_jail_set(struct thread *td, struct jail_set_args *uap)
  468 {
  469         struct uio *auio;
  470         int error;
  471 
  472         /* Check that we have an even number of iovecs. */
  473         if (uap->iovcnt & 1)
  474                 return (EINVAL);
  475 
  476         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
  477         if (error)
  478                 return (error);
  479         error = kern_jail_set(td, auio, uap->flags);
  480         free(auio, M_IOV);
  481         return (error);
  482 }
  483 
  484 int
  485 kern_jail_set(struct thread *td, struct uio *optuio, int flags)
  486 {
  487         struct nameidata nd;
  488 #ifdef INET
  489         struct in_addr *ip4;
  490 #endif
  491 #ifdef INET6
  492         struct in6_addr *ip6;
  493 #endif
  494         struct vfsopt *opt;
  495         struct vfsoptlist *opts;
  496         struct prison *pr, *deadpr, *mypr, *ppr, *tpr;
  497         struct vnode *root;
  498         char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
  499         char *g_path, *osrelstr;
  500 #if defined(INET) || defined(INET6)
  501         struct prison *tppr;
  502         void *op;
  503 #endif
  504         unsigned long hid;
  505         size_t namelen, onamelen, pnamelen;
  506         int born, created, cuflags, descend, enforce;
  507         int error, errmsg_len, errmsg_pos;
  508         int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
  509         int fi, jid, jsys, len, level;
  510         int childmax, osreldt, rsnum, slevel;
  511         int fullpath_disabled;
  512 #if defined(INET) || defined(INET6)
  513         int ii, ij;
  514 #endif
  515 #ifdef INET
  516         int ip4s, redo_ip4;
  517 #endif
  518 #ifdef INET6
  519         int ip6s, redo_ip6;
  520 #endif
  521         uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
  522         unsigned tallow;
  523         char numbuf[12];
  524 
  525         error = priv_check(td, PRIV_JAIL_SET);
  526         if (!error && (flags & JAIL_ATTACH))
  527                 error = priv_check(td, PRIV_JAIL_ATTACH);
  528         if (error)
  529                 return (error);
  530         mypr = td->td_ucred->cr_prison;
  531         if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
  532                 return (EPERM);
  533         if (flags & ~JAIL_SET_MASK)
  534                 return (EINVAL);
  535 
  536         /*
  537          * Check all the parameters before committing to anything.  Not all
  538          * errors can be caught early, but we may as well try.  Also, this
  539          * takes care of some expensive stuff (path lookup) before getting
  540          * the allprison lock.
  541          *
  542          * XXX Jails are not filesystems, and jail parameters are not mount
  543          *     options.  But it makes more sense to re-use the vfsopt code
  544          *     than duplicate it under a different name.
  545          */
  546         error = vfs_buildopts(optuio, &opts);
  547         if (error)
  548                 return (error);
  549 #ifdef INET
  550         ip4 = NULL;
  551 #endif
  552 #ifdef INET6
  553         ip6 = NULL;
  554 #endif
  555         g_path = NULL;
  556 
  557         cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
  558         if (!cuflags) {
  559                 error = EINVAL;
  560                 vfs_opterror(opts, "no valid operation (create or update)");
  561                 goto done_errmsg;
  562         }
  563 
  564         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
  565         if (error == ENOENT)
  566                 jid = 0;
  567         else if (error != 0)
  568                 goto done_free;
  569 
  570         error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
  571         if (error == ENOENT)
  572                 gotslevel = 0;
  573         else if (error != 0)
  574                 goto done_free;
  575         else
  576                 gotslevel = 1;
  577 
  578         error =
  579             vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
  580         if (error == ENOENT)
  581                 gotchildmax = 0;
  582         else if (error != 0)
  583                 goto done_free;
  584         else
  585                 gotchildmax = 1;
  586 
  587         error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
  588         if (error == ENOENT)
  589                 gotenforce = 0;
  590         else if (error != 0)
  591                 goto done_free;
  592         else if (enforce < 0 || enforce > 2) {
  593                 error = EINVAL;
  594                 goto done_free;
  595         } else
  596                 gotenforce = 1;
  597 
  598         error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum));
  599         if (error == ENOENT)
  600                 gotrsnum = 0;
  601         else if (error != 0)
  602                 goto done_free;
  603         else
  604                 gotrsnum = 1;
  605 
  606         pr_flags = ch_flags = 0;
  607         for (fi = 0; fi < nitems(pr_flag_names); fi++) {
  608                 if (pr_flag_names[fi] == NULL)
  609                         continue;
  610                 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi);
  611                 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi);
  612         }
  613         ch_flags |= pr_flags;
  614         for (fi = 0; fi < nitems(pr_flag_jailsys); fi++) {
  615                 error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys,
  616                     sizeof(jsys));
  617                 if (error == ENOENT)
  618                         continue;
  619                 if (error != 0)
  620                         goto done_free;
  621                 switch (jsys) {
  622                 case JAIL_SYS_DISABLE:
  623                         if (!pr_flag_jailsys[fi].disable) {
  624                                 error = EINVAL;
  625                                 goto done_free;
  626                         }
  627                         pr_flags |= pr_flag_jailsys[fi].disable;
  628                         break;
  629                 case JAIL_SYS_NEW:
  630                         pr_flags |= pr_flag_jailsys[fi].new;
  631                         break;
  632                 case JAIL_SYS_INHERIT:
  633                         break;
  634                 default:
  635                         error = EINVAL;
  636                         goto done_free;
  637                 }
  638                 ch_flags |=
  639                     pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable;
  640         }
  641         if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
  642             && !(pr_flags & PR_PERSIST)) {
  643                 error = EINVAL;
  644                 vfs_opterror(opts, "new jail must persist or attach");
  645                 goto done_errmsg;
  646         }
  647 #ifdef VIMAGE
  648         if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
  649                 error = EINVAL;
  650                 vfs_opterror(opts, "vnet cannot be changed after creation");
  651                 goto done_errmsg;
  652         }
  653 #endif
  654 #ifdef INET
  655         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
  656                 error = EINVAL;
  657                 vfs_opterror(opts, "ip4 cannot be changed after creation");
  658                 goto done_errmsg;
  659         }
  660 #endif
  661 #ifdef INET6
  662         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
  663                 error = EINVAL;
  664                 vfs_opterror(opts, "ip6 cannot be changed after creation");
  665                 goto done_errmsg;
  666         }
  667 #endif
  668 
  669         pr_allow = ch_allow = 0;
  670         for (fi = 0; fi < nitems(pr_allow_names); fi++) {
  671                 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi);
  672                 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi);
  673         }
  674         ch_allow |= pr_allow;
  675 
  676         error = vfs_getopt(opts, "name", (void **)&name, &len);
  677         if (error == ENOENT)
  678                 name = NULL;
  679         else if (error != 0)
  680                 goto done_free;
  681         else {
  682                 if (len == 0 || name[len - 1] != '\0') {
  683                         error = EINVAL;
  684                         goto done_free;
  685                 }
  686                 if (len > MAXHOSTNAMELEN) {
  687                         error = ENAMETOOLONG;
  688                         goto done_free;
  689                 }
  690         }
  691 
  692         error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
  693         if (error == ENOENT)
  694                 host = NULL;
  695         else if (error != 0)
  696                 goto done_free;
  697         else {
  698                 ch_flags |= PR_HOST;
  699                 pr_flags |= PR_HOST;
  700                 if (len == 0 || host[len - 1] != '\0') {
  701                         error = EINVAL;
  702                         goto done_free;
  703                 }
  704                 if (len > MAXHOSTNAMELEN) {
  705                         error = ENAMETOOLONG;
  706                         goto done_free;
  707                 }
  708         }
  709 
  710         error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
  711         if (error == ENOENT)
  712                 domain = NULL;
  713         else if (error != 0)
  714                 goto done_free;
  715         else {
  716                 ch_flags |= PR_HOST;
  717                 pr_flags |= PR_HOST;
  718                 if (len == 0 || domain[len - 1] != '\0') {
  719                         error = EINVAL;
  720                         goto done_free;
  721                 }
  722                 if (len > MAXHOSTNAMELEN) {
  723                         error = ENAMETOOLONG;
  724                         goto done_free;
  725                 }
  726         }
  727 
  728         error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
  729         if (error == ENOENT)
  730                 uuid = NULL;
  731         else if (error != 0)
  732                 goto done_free;
  733         else {
  734                 ch_flags |= PR_HOST;
  735                 pr_flags |= PR_HOST;
  736                 if (len == 0 || uuid[len - 1] != '\0') {
  737                         error = EINVAL;
  738                         goto done_free;
  739                 }
  740                 if (len > HOSTUUIDLEN) {
  741                         error = ENAMETOOLONG;
  742                         goto done_free;
  743                 }
  744         }
  745 
  746 #ifdef COMPAT_FREEBSD32
  747         if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
  748                 uint32_t hid32;
  749 
  750                 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
  751                 hid = hid32;
  752         } else
  753 #endif
  754                 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
  755         if (error == ENOENT)
  756                 gothid = 0;
  757         else if (error != 0)
  758                 goto done_free;
  759         else {
  760                 gothid = 1;
  761                 ch_flags |= PR_HOST;
  762                 pr_flags |= PR_HOST;
  763         }
  764 
  765 #ifdef INET
  766         error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
  767         if (error == ENOENT)
  768                 ip4s = 0;
  769         else if (error != 0)
  770                 goto done_free;
  771         else if (ip4s & (sizeof(*ip4) - 1)) {
  772                 error = EINVAL;
  773                 goto done_free;
  774         } else {
  775                 ch_flags |= PR_IP4_USER;
  776                 pr_flags |= PR_IP4_USER;
  777                 if (ip4s > 0) {
  778                         ip4s /= sizeof(*ip4);
  779                         if (ip4s > jail_max_af_ips) {
  780                                 error = EINVAL;
  781                                 vfs_opterror(opts, "too many IPv4 addresses");
  782                                 goto done_errmsg;
  783                         }
  784                         ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
  785                         bcopy(op, ip4, ip4s * sizeof(*ip4));
  786                         /*
  787                          * IP addresses are all sorted but ip[0] to preserve
  788                          * the primary IP address as given from userland.
  789                          * This special IP is used for unbound outgoing
  790                          * connections as well for "loopback" traffic in case
  791                          * source address selection cannot find any more fitting
  792                          * address to connect from.
  793                          */
  794                         if (ip4s > 1)
  795                                 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4),
  796                                     prison_qcmp_v4);
  797                         /*
  798                          * Check for duplicate addresses and do some simple
  799                          * zero and broadcast checks. If users give other bogus
  800                          * addresses it is their problem.
  801                          *
  802                          * We do not have to care about byte order for these
  803                          * checks so we will do them in NBO.
  804                          */
  805                         for (ii = 0; ii < ip4s; ii++) {
  806                                 if (ip4[ii].s_addr == INADDR_ANY ||
  807                                     ip4[ii].s_addr == INADDR_BROADCAST) {
  808                                         error = EINVAL;
  809                                         goto done_free;
  810                                 }
  811                                 if ((ii+1) < ip4s &&
  812                                     (ip4[0].s_addr == ip4[ii+1].s_addr ||
  813                                      ip4[ii].s_addr == ip4[ii+1].s_addr)) {
  814                                         error = EINVAL;
  815                                         goto done_free;
  816                                 }
  817                         }
  818                 }
  819         }
  820 #endif
  821 
  822 #ifdef INET6
  823         error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
  824         if (error == ENOENT)
  825                 ip6s = 0;
  826         else if (error != 0)
  827                 goto done_free;
  828         else if (ip6s & (sizeof(*ip6) - 1)) {
  829                 error = EINVAL;
  830                 goto done_free;
  831         } else {
  832                 ch_flags |= PR_IP6_USER;
  833                 pr_flags |= PR_IP6_USER;
  834                 if (ip6s > 0) {
  835                         ip6s /= sizeof(*ip6);
  836                         if (ip6s > jail_max_af_ips) {
  837                                 error = EINVAL;
  838                                 vfs_opterror(opts, "too many IPv6 addresses");
  839                                 goto done_errmsg;
  840                         }
  841                         ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
  842                         bcopy(op, ip6, ip6s * sizeof(*ip6));
  843                         if (ip6s > 1)
  844                                 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6),
  845                                     prison_qcmp_v6);
  846                         for (ii = 0; ii < ip6s; ii++) {
  847                                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
  848                                         error = EINVAL;
  849                                         goto done_free;
  850                                 }
  851                                 if ((ii+1) < ip6s &&
  852                                     (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
  853                                      IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
  854                                 {
  855                                         error = EINVAL;
  856                                         goto done_free;
  857                                 }
  858                         }
  859                 }
  860         }
  861 #endif
  862 
  863 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
  864         if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
  865                 error = EINVAL;
  866                 vfs_opterror(opts,
  867                     "vnet jails cannot have IP address restrictions");
  868                 goto done_errmsg;
  869         }
  870 #endif
  871 
  872         error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len);
  873         if (error == ENOENT)
  874                 osrelstr = NULL;
  875         else if (error != 0)
  876                 goto done_free;
  877         else {
  878                 if (flags & JAIL_UPDATE) {
  879                         error = EINVAL;
  880                         vfs_opterror(opts,
  881                             "osrelease cannot be changed after creation");
  882                         goto done_errmsg;
  883                 }
  884                 if (len == 0 || osrelstr[len - 1] != '\0') {
  885                         error = EINVAL;
  886                         goto done_free;
  887                 }
  888                 if (len >= OSRELEASELEN) {
  889                         error = ENAMETOOLONG;
  890                         vfs_opterror(opts,
  891                             "osrelease string must be 1-%d bytes long",
  892                             OSRELEASELEN - 1);
  893                         goto done_errmsg;
  894                 }
  895         }
  896 
  897         error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt));
  898         if (error == ENOENT)
  899                 osreldt = 0;
  900         else if (error != 0)
  901                 goto done_free;
  902         else {
  903                 if (flags & JAIL_UPDATE) {
  904                         error = EINVAL;
  905                         vfs_opterror(opts,
  906                             "osreldate cannot be changed after creation");
  907                         goto done_errmsg;
  908                 }
  909                 if (osreldt == 0) {
  910                         error = EINVAL;
  911                         vfs_opterror(opts, "osreldate cannot be 0");
  912                         goto done_errmsg;
  913                 }
  914         }
  915 
  916         fullpath_disabled = 0;
  917         root = NULL;
  918         error = vfs_getopt(opts, "path", (void **)&path, &len);
  919         if (error == ENOENT)
  920                 path = NULL;
  921         else if (error != 0)
  922                 goto done_free;
  923         else {
  924                 if (flags & JAIL_UPDATE) {
  925                         error = EINVAL;
  926                         vfs_opterror(opts,
  927                             "path cannot be changed after creation");
  928                         goto done_errmsg;
  929                 }
  930                 if (len == 0 || path[len - 1] != '\0') {
  931                         error = EINVAL;
  932                         goto done_free;
  933                 }
  934                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
  935                     path, td);
  936                 error = namei(&nd);
  937                 if (error)
  938                         goto done_free;
  939                 root = nd.ni_vp;
  940                 NDFREE(&nd, NDF_ONLY_PNBUF);
  941                 g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
  942                 strlcpy(g_path, path, MAXPATHLEN);
  943                 error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN);
  944                 if (error == 0)
  945                         path = g_path;
  946                 else if (error == ENODEV) {
  947                         /* proceed if sysctl debug.disablefullpath == 1 */
  948                         fullpath_disabled = 1;
  949                         if (len < 2 || (len == 2 && path[0] == '/'))
  950                                 path = NULL;
  951                 } else {
  952                         /* exit on other errors */
  953                         goto done_free;
  954                 }
  955                 if (root->v_type != VDIR) {
  956                         error = ENOTDIR;
  957                         vput(root);
  958                         goto done_free;
  959                 }
  960                 VOP_UNLOCK(root, 0);
  961                 if (fullpath_disabled) {
  962                         /* Leave room for a real-root full pathname. */
  963                         if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/")
  964                             ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) {
  965                                 error = ENAMETOOLONG;
  966                                 vrele(root);
  967                                 goto done_free;
  968                         }
  969                 }
  970         }
  971 
  972         /*
  973          * Find the specified jail, or at least its parent.
  974          * This abuses the file error codes ENOENT and EEXIST.
  975          */
  976         pr = NULL;
  977         ppr = mypr;
  978         if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
  979                 namelc = strrchr(name, '.');
  980                 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
  981                 if (*p != '\0')
  982                         jid = 0;
  983         }
  984         sx_xlock(&allprison_lock);
  985         if (jid != 0) {
  986                 /*
  987                  * See if a requested jid already exists.  There is an
  988                  * information leak here if the jid exists but is not within
  989                  * the caller's jail hierarchy.  Jail creators will get EEXIST
  990                  * even though they cannot see the jail, and CREATE | UPDATE
  991                  * will return ENOENT which is not normally a valid error.
  992                  */
  993                 if (jid < 0) {
  994                         error = EINVAL;
  995                         vfs_opterror(opts, "negative jid");
  996                         goto done_unlock_list;
  997                 }
  998                 pr = prison_find(jid);
  999                 if (pr != NULL) {
 1000                         ppr = pr->pr_parent;
 1001                         /* Create: jid must not exist. */
 1002                         if (cuflags == JAIL_CREATE) {
 1003                                 mtx_unlock(&pr->pr_mtx);
 1004                                 error = EEXIST;
 1005                                 vfs_opterror(opts, "jail %d already exists",
 1006                                     jid);
 1007                                 goto done_unlock_list;
 1008                         }
 1009                         if (!prison_ischild(mypr, pr)) {
 1010                                 mtx_unlock(&pr->pr_mtx);
 1011                                 pr = NULL;
 1012                         } else if (pr->pr_uref == 0) {
 1013                                 if (!(flags & JAIL_DYING)) {
 1014                                         mtx_unlock(&pr->pr_mtx);
 1015                                         error = ENOENT;
 1016                                         vfs_opterror(opts, "jail %d is dying",
 1017                                             jid);
 1018                                         goto done_unlock_list;
 1019                                 } else if ((flags & JAIL_ATTACH) ||
 1020                                     (pr_flags & PR_PERSIST)) {
 1021                                         /*
 1022                                          * A dying jail might be resurrected
 1023                                          * (via attach or persist), but first
 1024                                          * it must determine if another jail
 1025                                          * has claimed its name.  Accomplish
 1026                                          * this by implicitly re-setting the
 1027                                          * name.
 1028                                          */
 1029                                         if (name == NULL)
 1030                                                 name = prison_name(mypr, pr);
 1031                                 }
 1032                         }
 1033                 }
 1034                 if (pr == NULL) {
 1035                         /* Update: jid must exist. */
 1036                         if (cuflags == JAIL_UPDATE) {
 1037                                 error = ENOENT;
 1038                                 vfs_opterror(opts, "jail %d not found", jid);
 1039                                 goto done_unlock_list;
 1040                         }
 1041                 }
 1042         }
 1043         /*
 1044          * If the caller provided a name, look for a jail by that name.
 1045          * This has different semantics for creates and updates keyed by jid
 1046          * (where the name must not already exist in a different jail),
 1047          * and updates keyed by the name itself (where the name must exist
 1048          * because that is the jail being updated).
 1049          */
 1050         namelc = NULL;
 1051         if (name != NULL) {
 1052                 namelc = strrchr(name, '.');
 1053                 if (namelc == NULL)
 1054                         namelc = name;
 1055                 else {
 1056                         /*
 1057                          * This is a hierarchical name.  Split it into the
 1058                          * parent and child names, and make sure the parent
 1059                          * exists or matches an already found jail.
 1060                          */
 1061                         if (pr != NULL) {
 1062                                 if (strncmp(name, ppr->pr_name, namelc - name)
 1063                                     || ppr->pr_name[namelc - name] != '\0') {
 1064                                         mtx_unlock(&pr->pr_mtx);
 1065                                         error = EINVAL;
 1066                                         vfs_opterror(opts,
 1067                                             "cannot change jail's parent");
 1068                                         goto done_unlock_list;
 1069                                 }
 1070                         } else {
 1071                                 *namelc = '\0';
 1072                                 ppr = prison_find_name(mypr, name);
 1073                                 if (ppr == NULL) {
 1074                                         error = ENOENT;
 1075                                         vfs_opterror(opts,
 1076                                             "jail \"%s\" not found", name);
 1077                                         goto done_unlock_list;
 1078                                 }
 1079                                 mtx_unlock(&ppr->pr_mtx);
 1080                                 *namelc = '.';
 1081                         }
 1082                         namelc++;
 1083                 }
 1084                 if (namelc[0] != '\0') {
 1085                         pnamelen =
 1086                             (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 1087  name_again:
 1088                         deadpr = NULL;
 1089                         FOREACH_PRISON_CHILD(ppr, tpr) {
 1090                                 if (tpr != pr && tpr->pr_ref > 0 &&
 1091                                     !strcmp(tpr->pr_name + pnamelen, namelc)) {
 1092                                         if (pr == NULL &&
 1093                                             cuflags != JAIL_CREATE) {
 1094                                                 mtx_lock(&tpr->pr_mtx);
 1095                                                 if (tpr->pr_ref > 0) {
 1096                                                         /*
 1097                                                          * Use this jail
 1098                                                          * for updates.
 1099                                                          */
 1100                                                         if (tpr->pr_uref > 0) {
 1101                                                                 pr = tpr;
 1102                                                                 break;
 1103                                                         }
 1104                                                         deadpr = tpr;
 1105                                                 }
 1106                                                 mtx_unlock(&tpr->pr_mtx);
 1107                                         } else if (tpr->pr_uref > 0) {
 1108                                                 /*
 1109                                                  * Create, or update(jid):
 1110                                                  * name must not exist in an
 1111                                                  * active sibling jail.
 1112                                                  */
 1113                                                 error = EEXIST;
 1114                                                 if (pr != NULL)
 1115                                                         mtx_unlock(&pr->pr_mtx);
 1116                                                 vfs_opterror(opts,
 1117                                                    "jail \"%s\" already exists",
 1118                                                    name);
 1119                                                 goto done_unlock_list;
 1120                                         }
 1121                                 }
 1122                         }
 1123                         /* If no active jail is found, use a dying one. */
 1124                         if (deadpr != NULL && pr == NULL) {
 1125                                 if (flags & JAIL_DYING) {
 1126                                         mtx_lock(&deadpr->pr_mtx);
 1127                                         if (deadpr->pr_ref == 0) {
 1128                                                 mtx_unlock(&deadpr->pr_mtx);
 1129                                                 goto name_again;
 1130                                         }
 1131                                         pr = deadpr;
 1132                                 } else if (cuflags == JAIL_UPDATE) {
 1133                                         error = ENOENT;
 1134                                         vfs_opterror(opts,
 1135                                             "jail \"%s\" is dying", name);
 1136                                         goto done_unlock_list;
 1137                                 }
 1138                         }
 1139                         /* Update: name must exist if no jid. */
 1140                         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1141                                 error = ENOENT;
 1142                                 vfs_opterror(opts, "jail \"%s\" not found",
 1143                                     name);
 1144                                 goto done_unlock_list;
 1145                         }
 1146                 }
 1147         }
 1148         /* Update: must provide a jid or name. */
 1149         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1150                 error = ENOENT;
 1151                 vfs_opterror(opts, "update specified no jail");
 1152                 goto done_unlock_list;
 1153         }
 1154 
 1155         /* If there's no prison to update, create a new one and link it in. */
 1156         if (pr == NULL) {
 1157                 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
 1158                         if (tpr->pr_childcount >= tpr->pr_childmax) {
 1159                                 error = EPERM;
 1160                                 vfs_opterror(opts, "prison limit exceeded");
 1161                                 goto done_unlock_list;
 1162                         }
 1163                 created = 1;
 1164                 mtx_lock(&ppr->pr_mtx);
 1165                 if (ppr->pr_ref == 0) {
 1166                         mtx_unlock(&ppr->pr_mtx);
 1167                         error = ENOENT;
 1168                         vfs_opterror(opts, "jail \"%s\" not found",
 1169                             prison_name(mypr, ppr));
 1170                         goto done_unlock_list;
 1171                 }
 1172                 ppr->pr_ref++;
 1173                 ppr->pr_uref++;
 1174                 mtx_unlock(&ppr->pr_mtx);
 1175                 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
 1176                 if (jid == 0) {
 1177                         /* Find the next free jid. */
 1178                         jid = lastprid + 1;
 1179  findnext:
 1180                         if (jid == JAIL_MAX)
 1181                                 jid = 1;
 1182                         TAILQ_FOREACH(tpr, &allprison, pr_list) {
 1183                                 if (tpr->pr_id < jid)
 1184                                         continue;
 1185                                 if (tpr->pr_id > jid || tpr->pr_ref == 0) {
 1186                                         TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 1187                                         break;
 1188                                 }
 1189                                 if (jid == lastprid) {
 1190                                         error = EAGAIN;
 1191                                         vfs_opterror(opts,
 1192                                             "no available jail IDs");
 1193                                         free(pr, M_PRISON);
 1194                                         prison_deref(ppr, PD_DEREF |
 1195                                             PD_DEUREF | PD_LIST_XLOCKED);
 1196                                         goto done_releroot;
 1197                                 }
 1198                                 jid++;
 1199                                 goto findnext;
 1200                         }
 1201                         lastprid = jid;
 1202                 } else {
 1203                         /*
 1204                          * The jail already has a jid (that did not yet exist),
 1205                          * so just find where to insert it.
 1206                          */
 1207                         TAILQ_FOREACH(tpr, &allprison, pr_list)
 1208                                 if (tpr->pr_id >= jid) {
 1209                                         TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 1210                                         break;
 1211                                 }
 1212                 }
 1213                 if (tpr == NULL)
 1214                         TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
 1215                 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
 1216                 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 1217                         tpr->pr_childcount++;
 1218 
 1219                 pr->pr_parent = ppr;
 1220                 pr->pr_id = jid;
 1221 
 1222                 /* Set some default values, and inherit some from the parent. */
 1223                 if (namelc == NULL)
 1224                         namelc = "";
 1225                 if (path == NULL) {
 1226                         path = "/";
 1227                         root = mypr->pr_root;
 1228                         vref(root);
 1229                 }
 1230                 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
 1231                 pr->pr_flags |= PR_HOST;
 1232 #if defined(INET) || defined(INET6)
 1233 #ifdef VIMAGE
 1234                 if (!(pr_flags & PR_VNET))
 1235 #endif
 1236                 {
 1237 #ifdef INET
 1238                         if (!(ch_flags & PR_IP4_USER))
 1239                                 pr->pr_flags |= PR_IP4 | PR_IP4_USER;
 1240                         else if (!(pr_flags & PR_IP4_USER)) {
 1241                                 pr->pr_flags |= ppr->pr_flags & PR_IP4;
 1242                                 if (ppr->pr_ip4 != NULL) {
 1243                                         pr->pr_ip4s = ppr->pr_ip4s;
 1244                                         pr->pr_ip4 = malloc(pr->pr_ip4s *
 1245                                             sizeof(struct in_addr), M_PRISON,
 1246                                             M_WAITOK);
 1247                                         bcopy(ppr->pr_ip4, pr->pr_ip4,
 1248                                             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 1249                                 }
 1250                         }
 1251 #endif
 1252 #ifdef INET6
 1253                         if (!(ch_flags & PR_IP6_USER))
 1254                                 pr->pr_flags |= PR_IP6 | PR_IP6_USER;
 1255                         else if (!(pr_flags & PR_IP6_USER)) {
 1256                                 pr->pr_flags |= ppr->pr_flags & PR_IP6;
 1257                                 if (ppr->pr_ip6 != NULL) {
 1258                                         pr->pr_ip6s = ppr->pr_ip6s;
 1259                                         pr->pr_ip6 = malloc(pr->pr_ip6s *
 1260                                             sizeof(struct in6_addr), M_PRISON,
 1261                                             M_WAITOK);
 1262                                         bcopy(ppr->pr_ip6, pr->pr_ip6,
 1263                                             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 1264                                 }
 1265                         }
 1266 #endif
 1267                 }
 1268 #endif
 1269                 /* Source address selection is always on by default. */
 1270                 pr->pr_flags |= _PR_IP_SADDRSEL;
 1271 
 1272                 pr->pr_securelevel = ppr->pr_securelevel;
 1273                 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
 1274                 pr->pr_enforce_statfs = jail_default_enforce_statfs;
 1275                 pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum;
 1276 
 1277                 pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate;
 1278                 if (osrelstr == NULL)
 1279                         strlcpy(pr->pr_osrelease, ppr->pr_osrelease,
 1280                             sizeof(pr->pr_osrelease));
 1281                 else
 1282                         strlcpy(pr->pr_osrelease, osrelstr,
 1283                             sizeof(pr->pr_osrelease));
 1284 
 1285                 LIST_INIT(&pr->pr_children);
 1286                 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
 1287                 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 1288 
 1289 #ifdef VIMAGE
 1290                 /* Allocate a new vnet if specified. */
 1291                 pr->pr_vnet = (pr_flags & PR_VNET)
 1292                     ? vnet_alloc() : ppr->pr_vnet;
 1293 #endif
 1294                 /*
 1295                  * Allocate a dedicated cpuset for each jail.
 1296                  * Unlike other initial settings, this may return an erorr.
 1297                  */
 1298                 error = cpuset_create_root(ppr, &pr->pr_cpuset);
 1299                 if (error) {
 1300                         prison_deref(pr, PD_LIST_XLOCKED);
 1301                         goto done_releroot;
 1302                 }
 1303 
 1304                 mtx_lock(&pr->pr_mtx);
 1305                 /*
 1306                  * New prisons do not yet have a reference, because we do not
 1307                  * want others to see the incomplete prison once the
 1308                  * allprison_lock is downgraded.
 1309                  */
 1310         } else {
 1311                 created = 0;
 1312                 /*
 1313                  * Grab a reference for existing prisons, to ensure they
 1314                  * continue to exist for the duration of the call.
 1315                  */
 1316                 pr->pr_ref++;
 1317 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 1318                 if ((pr->pr_flags & PR_VNET) &&
 1319                     (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 1320                         error = EINVAL;
 1321                         vfs_opterror(opts,
 1322                             "vnet jails cannot have IP address restrictions");
 1323                         goto done_deref_locked;
 1324                 }
 1325 #endif
 1326 #ifdef INET
 1327                 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1328                         error = EINVAL;
 1329                         vfs_opterror(opts,
 1330                             "ip4 cannot be changed after creation");
 1331                         goto done_deref_locked;
 1332                 }
 1333 #endif
 1334 #ifdef INET6
 1335                 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1336                         error = EINVAL;
 1337                         vfs_opterror(opts,
 1338                             "ip6 cannot be changed after creation");
 1339                         goto done_deref_locked;
 1340                 }
 1341 #endif
 1342         }
 1343 
 1344         /* Do final error checking before setting anything. */
 1345         if (gotslevel) {
 1346                 if (slevel < ppr->pr_securelevel) {
 1347                         error = EPERM;
 1348                         goto done_deref_locked;
 1349                 }
 1350         }
 1351         if (gotchildmax) {
 1352                 if (childmax >= ppr->pr_childmax) {
 1353                         error = EPERM;
 1354                         goto done_deref_locked;
 1355                 }
 1356         }
 1357         if (gotenforce) {
 1358                 if (enforce < ppr->pr_enforce_statfs) {
 1359                         error = EPERM;
 1360                         goto done_deref_locked;
 1361                 }
 1362         }
 1363         if (gotrsnum) {
 1364                 /*
 1365                  * devfs_rsnum is a uint16_t
 1366                  */
 1367                 if (rsnum < 0 || rsnum > 65535) {
 1368                         error = EINVAL;
 1369                         goto done_deref_locked;
 1370                 }
 1371                 /*
 1372                  * Nested jails always inherit parent's devfs ruleset
 1373                  */
 1374                 if (jailed(td->td_ucred)) {
 1375                         if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) {
 1376                                 error = EPERM;
 1377                                 goto done_deref_locked;
 1378                         } else
 1379                                 rsnum = ppr->pr_devfs_rsnum;
 1380                 }
 1381         }
 1382 #ifdef INET
 1383         if (ip4s > 0) {
 1384                 if (ppr->pr_flags & PR_IP4) {
 1385                         /*
 1386                          * Make sure the new set of IP addresses is a
 1387                          * subset of the parent's list.  Don't worry
 1388                          * about the parent being unlocked, as any
 1389                          * setting is done with allprison_lock held.
 1390                          */
 1391                         for (ij = 0; ij < ppr->pr_ip4s; ij++)
 1392                                 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 1393                                         break;
 1394                         if (ij == ppr->pr_ip4s) {
 1395                                 error = EPERM;
 1396                                 goto done_deref_locked;
 1397                         }
 1398                         if (ip4s > 1) {
 1399                                 for (ii = ij = 1; ii < ip4s; ii++) {
 1400                                         if (ip4[ii].s_addr ==
 1401                                             ppr->pr_ip4[0].s_addr)
 1402                                                 continue;
 1403                                         for (; ij < ppr->pr_ip4s; ij++)
 1404                                                 if (ip4[ii].s_addr ==
 1405                                                     ppr->pr_ip4[ij].s_addr)
 1406                                                         break;
 1407                                         if (ij == ppr->pr_ip4s)
 1408                                                 break;
 1409                                 }
 1410                                 if (ij == ppr->pr_ip4s) {
 1411                                         error = EPERM;
 1412                                         goto done_deref_locked;
 1413                                 }
 1414                         }
 1415                 }
 1416                 /*
 1417                  * Check for conflicting IP addresses.  We permit them
 1418                  * if there is no more than one IP on each jail.  If
 1419                  * there is a duplicate on a jail with more than one
 1420                  * IP stop checking and return error.
 1421                  */
 1422 #ifdef VIMAGE
 1423                 for (tppr = ppr; tppr != &prison0; tppr = tppr->pr_parent)
 1424                         if (tppr->pr_flags & PR_VNET)
 1425                                 break;
 1426 #else
 1427                 tppr = &prison0;
 1428 #endif
 1429                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1430                         if (tpr == pr ||
 1431 #ifdef VIMAGE
 1432                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1433 #endif
 1434                             tpr->pr_uref == 0) {
 1435                                 descend = 0;
 1436                                 continue;
 1437                         }
 1438                         if (!(tpr->pr_flags & PR_IP4_USER))
 1439                                 continue;
 1440                         descend = 0;
 1441                         if (tpr->pr_ip4 == NULL ||
 1442                             (ip4s == 1 && tpr->pr_ip4s == 1))
 1443                                 continue;
 1444                         for (ii = 0; ii < ip4s; ii++) {
 1445                                 if (prison_check_ip4_locked(tpr, &ip4[ii]) ==
 1446                                     0) {
 1447                                         error = EADDRINUSE;
 1448                                         vfs_opterror(opts,
 1449                                             "IPv4 addresses clash");
 1450                                         goto done_deref_locked;
 1451                                 }
 1452                         }
 1453                 }
 1454         }
 1455 #endif
 1456 #ifdef INET6
 1457         if (ip6s > 0) {
 1458                 if (ppr->pr_flags & PR_IP6) {
 1459                         /*
 1460                          * Make sure the new set of IP addresses is a
 1461                          * subset of the parent's list.
 1462                          */
 1463                         for (ij = 0; ij < ppr->pr_ip6s; ij++)
 1464                                 if (IN6_ARE_ADDR_EQUAL(&ip6[0],
 1465                                     &ppr->pr_ip6[ij]))
 1466                                         break;
 1467                         if (ij == ppr->pr_ip6s) {
 1468                                 error = EPERM;
 1469                                 goto done_deref_locked;
 1470                         }
 1471                         if (ip6s > 1) {
 1472                                 for (ii = ij = 1; ii < ip6s; ii++) {
 1473                                         if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
 1474                                              &ppr->pr_ip6[0]))
 1475                                                 continue;
 1476                                         for (; ij < ppr->pr_ip6s; ij++)
 1477                                                 if (IN6_ARE_ADDR_EQUAL(
 1478                                                     &ip6[ii], &ppr->pr_ip6[ij]))
 1479                                                         break;
 1480                                         if (ij == ppr->pr_ip6s)
 1481                                                 break;
 1482                                 }
 1483                                 if (ij == ppr->pr_ip6s) {
 1484                                         error = EPERM;
 1485                                         goto done_deref_locked;
 1486                                 }
 1487                         }
 1488                 }
 1489                 /* Check for conflicting IP addresses. */
 1490 #ifdef VIMAGE
 1491                 for (tppr = ppr; tppr != &prison0; tppr = tppr->pr_parent)
 1492                         if (tppr->pr_flags & PR_VNET)
 1493                                 break;
 1494 #else
 1495                 tppr = &prison0;
 1496 #endif
 1497                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1498                         if (tpr == pr ||
 1499 #ifdef VIMAGE
 1500                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1501 #endif
 1502                             tpr->pr_uref == 0) {
 1503                                 descend = 0;
 1504                                 continue;
 1505                         }
 1506                         if (!(tpr->pr_flags & PR_IP6_USER))
 1507                                 continue;
 1508                         descend = 0;
 1509                         if (tpr->pr_ip6 == NULL ||
 1510                             (ip6s == 1 && tpr->pr_ip6s == 1))
 1511                                 continue;
 1512                         for (ii = 0; ii < ip6s; ii++) {
 1513                                 if (prison_check_ip6_locked(tpr, &ip6[ii]) ==
 1514                                     0) {
 1515                                         error = EADDRINUSE;
 1516                                         vfs_opterror(opts,
 1517                                             "IPv6 addresses clash");
 1518                                         goto done_deref_locked;
 1519                                 }
 1520                         }
 1521                 }
 1522         }
 1523 #endif
 1524         onamelen = namelen = 0;
 1525         if (namelc != NULL) {
 1526                 /* Give a default name of the jid.  Also allow the name to be
 1527                  * explicitly the jid - but not any other number, and only in
 1528                  * normal form (no leading zero/etc).
 1529                  */
 1530                 if (namelc[0] == '\0')
 1531                         snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
 1532                 else if ((strtoul(namelc, &p, 10) != jid ||
 1533                           namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
 1534                         error = EINVAL;
 1535                         vfs_opterror(opts,
 1536                             "name cannot be numeric (unless it is the jid)");
 1537                         goto done_deref_locked;
 1538                 }
 1539                 /*
 1540                  * Make sure the name isn't too long for the prison or its
 1541                  * children.
 1542                  */
 1543                 pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 1544                 onamelen = strlen(pr->pr_name + pnamelen);
 1545                 namelen = strlen(namelc);
 1546                 if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
 1547                         error = ENAMETOOLONG;
 1548                         goto done_deref_locked;
 1549                 }
 1550                 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
 1551                         if (strlen(tpr->pr_name) + (namelen - onamelen) >=
 1552                             sizeof(pr->pr_name)) {
 1553                                 error = ENAMETOOLONG;
 1554                                 goto done_deref_locked;
 1555                         }
 1556                 }
 1557         }
 1558         if (pr_allow & ~ppr->pr_allow) {
 1559                 error = EPERM;
 1560                 goto done_deref_locked;
 1561         }
 1562 
 1563         /*
 1564          * Let modules check their parameters.  This requires unlocking and
 1565          * then re-locking the prison, but this is still a valid state as long
 1566          * as allprison_lock remains xlocked.
 1567          */
 1568         mtx_unlock(&pr->pr_mtx);
 1569         error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
 1570         if (error != 0) {
 1571                 prison_deref(pr, created
 1572                     ? PD_LIST_XLOCKED
 1573                     : PD_DEREF | PD_LIST_XLOCKED);
 1574                 goto done_releroot;
 1575         }
 1576         mtx_lock(&pr->pr_mtx);
 1577 
 1578         /* At this point, all valid parameters should have been noted. */
 1579         TAILQ_FOREACH(opt, opts, link) {
 1580                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
 1581                         error = EINVAL;
 1582                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
 1583                         goto done_deref_locked;
 1584                 }
 1585         }
 1586 
 1587         /* Set the parameters of the prison. */
 1588 #ifdef INET
 1589         redo_ip4 = 0;
 1590         if (pr_flags & PR_IP4_USER) {
 1591                 pr->pr_flags |= PR_IP4;
 1592                 free(pr->pr_ip4, M_PRISON);
 1593                 pr->pr_ip4s = ip4s;
 1594                 pr->pr_ip4 = ip4;
 1595                 ip4 = NULL;
 1596                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1597 #ifdef VIMAGE
 1598                         if (tpr->pr_flags & PR_VNET) {
 1599                                 descend = 0;
 1600                                 continue;
 1601                         }
 1602 #endif
 1603                         if (prison_restrict_ip4(tpr, NULL)) {
 1604                                 redo_ip4 = 1;
 1605                                 descend = 0;
 1606                         }
 1607                 }
 1608         }
 1609 #endif
 1610 #ifdef INET6
 1611         redo_ip6 = 0;
 1612         if (pr_flags & PR_IP6_USER) {
 1613                 pr->pr_flags |= PR_IP6;
 1614                 free(pr->pr_ip6, M_PRISON);
 1615                 pr->pr_ip6s = ip6s;
 1616                 pr->pr_ip6 = ip6;
 1617                 ip6 = NULL;
 1618                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1619 #ifdef VIMAGE
 1620                         if (tpr->pr_flags & PR_VNET) {
 1621                                 descend = 0;
 1622                                 continue;
 1623                         }
 1624 #endif
 1625                         if (prison_restrict_ip6(tpr, NULL)) {
 1626                                 redo_ip6 = 1;
 1627                                 descend = 0;
 1628                         }
 1629                 }
 1630         }
 1631 #endif
 1632         if (gotslevel) {
 1633                 pr->pr_securelevel = slevel;
 1634                 /* Set all child jails to be at least this level. */
 1635                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1636                         if (tpr->pr_securelevel < slevel)
 1637                                 tpr->pr_securelevel = slevel;
 1638         }
 1639         if (gotchildmax) {
 1640                 pr->pr_childmax = childmax;
 1641                 /* Set all child jails to under this limit. */
 1642                 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
 1643                         if (tpr->pr_childmax > childmax - level)
 1644                                 tpr->pr_childmax = childmax > level
 1645                                     ? childmax - level : 0;
 1646         }
 1647         if (gotenforce) {
 1648                 pr->pr_enforce_statfs = enforce;
 1649                 /* Pass this restriction on to the children. */
 1650                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1651                         if (tpr->pr_enforce_statfs < enforce)
 1652                                 tpr->pr_enforce_statfs = enforce;
 1653         }
 1654         if (gotrsnum) {
 1655                 pr->pr_devfs_rsnum = rsnum;
 1656                 /* Pass this restriction on to the children. */
 1657                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1658                         tpr->pr_devfs_rsnum = rsnum;
 1659         }
 1660         if (namelc != NULL) {
 1661                 if (ppr == &prison0)
 1662                         strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
 1663                 else
 1664                         snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
 1665                             ppr->pr_name, namelc);
 1666                 /* Change this component of child names. */
 1667                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1668                         bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
 1669                             strlen(tpr->pr_name + onamelen) + 1);
 1670                         bcopy(pr->pr_name, tpr->pr_name, namelen);
 1671                 }
 1672         }
 1673         if (path != NULL) {
 1674                 /* Try to keep a real-rooted full pathname. */
 1675                 if (fullpath_disabled && path[0] == '/' &&
 1676                     strcmp(mypr->pr_path, "/"))
 1677                         snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s",
 1678                             mypr->pr_path, path);
 1679                 else
 1680                         strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
 1681                 pr->pr_root = root;
 1682         }
 1683         if (PR_HOST & ch_flags & ~pr_flags) {
 1684                 if (pr->pr_flags & PR_HOST) {
 1685                         /*
 1686                          * Copy the parent's host info.  As with pr_ip4 above,
 1687                          * the lack of a lock on the parent is not a problem;
 1688                          * it is always set with allprison_lock at least
 1689                          * shared, and is held exclusively here.
 1690                          */
 1691                         strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
 1692                             sizeof(pr->pr_hostname));
 1693                         strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
 1694                             sizeof(pr->pr_domainname));
 1695                         strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
 1696                             sizeof(pr->pr_hostuuid));
 1697                         pr->pr_hostid = pr->pr_parent->pr_hostid;
 1698                 }
 1699         } else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
 1700                 /* Set this prison, and any descendants without PR_HOST. */
 1701                 if (host != NULL)
 1702                         strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
 1703                 if (domain != NULL)
 1704                         strlcpy(pr->pr_domainname, domain, 
 1705                             sizeof(pr->pr_domainname));
 1706                 if (uuid != NULL)
 1707                         strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
 1708                 if (gothid)
 1709                         pr->pr_hostid = hid;
 1710                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1711                         if (tpr->pr_flags & PR_HOST)
 1712                                 descend = 0;
 1713                         else {
 1714                                 if (host != NULL)
 1715                                         strlcpy(tpr->pr_hostname,
 1716                                             pr->pr_hostname,
 1717                                             sizeof(tpr->pr_hostname));
 1718                                 if (domain != NULL)
 1719                                         strlcpy(tpr->pr_domainname, 
 1720                                             pr->pr_domainname,
 1721                                             sizeof(tpr->pr_domainname));
 1722                                 if (uuid != NULL)
 1723                                         strlcpy(tpr->pr_hostuuid,
 1724                                             pr->pr_hostuuid,
 1725                                             sizeof(tpr->pr_hostuuid));
 1726                                 if (gothid)
 1727                                         tpr->pr_hostid = hid;
 1728                         }
 1729                 }
 1730         }
 1731         if ((tallow = ch_allow & ~pr_allow)) {
 1732                 /* Clear allow bits in all children. */
 1733                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1734                         tpr->pr_allow &= ~tallow;
 1735         }
 1736         pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
 1737         /*
 1738          * Persistent prisons get an extra reference, and prisons losing their
 1739          * persist flag lose that reference.  Only do this for existing prisons
 1740          * for now, so new ones will remain unseen until after the module
 1741          * handlers have completed.
 1742          */
 1743         born = pr->pr_uref == 0;
 1744         if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
 1745                 if (pr_flags & PR_PERSIST) {
 1746                         pr->pr_ref++;
 1747                         pr->pr_uref++;
 1748                 } else {
 1749                         pr->pr_ref--;
 1750                         pr->pr_uref--;
 1751                 }
 1752         }
 1753         pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
 1754         pr->pr_flags &= ~PR_REMOVE;
 1755         mtx_unlock(&pr->pr_mtx);
 1756 
 1757 #ifdef RACCT
 1758         if (racct_enable && created)
 1759                 prison_racct_attach(pr);
 1760 #endif
 1761 
 1762         /* Locks may have prevented a complete restriction of child IP
 1763          * addresses.  If so, allocate some more memory and try again.
 1764          */
 1765 #ifdef INET
 1766         while (redo_ip4) {
 1767                 ip4s = pr->pr_ip4s;
 1768                 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 1769                 mtx_lock(&pr->pr_mtx);
 1770                 redo_ip4 = 0;
 1771                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1772 #ifdef VIMAGE
 1773                         if (tpr->pr_flags & PR_VNET) {
 1774                                 descend = 0;
 1775                                 continue;
 1776                         }
 1777 #endif
 1778                         if (prison_restrict_ip4(tpr, ip4)) {
 1779                                 if (ip4 != NULL)
 1780                                         ip4 = NULL;
 1781                                 else
 1782                                         redo_ip4 = 1;
 1783                         }
 1784                 }
 1785                 mtx_unlock(&pr->pr_mtx);
 1786         }
 1787 #endif
 1788 #ifdef INET6
 1789         while (redo_ip6) {
 1790                 ip6s = pr->pr_ip6s;
 1791                 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 1792                 mtx_lock(&pr->pr_mtx);
 1793                 redo_ip6 = 0;
 1794                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1795 #ifdef VIMAGE
 1796                         if (tpr->pr_flags & PR_VNET) {
 1797                                 descend = 0;
 1798                                 continue;
 1799                         }
 1800 #endif
 1801                         if (prison_restrict_ip6(tpr, ip6)) {
 1802                                 if (ip6 != NULL)
 1803                                         ip6 = NULL;
 1804                                 else
 1805                                         redo_ip6 = 1;
 1806                         }
 1807                 }
 1808                 mtx_unlock(&pr->pr_mtx);
 1809         }
 1810 #endif
 1811 
 1812         /* Let the modules do their work. */
 1813         sx_downgrade(&allprison_lock);
 1814         if (born) {
 1815                 error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
 1816                 if (error) {
 1817                         (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
 1818                         prison_deref(pr, created
 1819                             ? PD_LIST_SLOCKED
 1820                             : PD_DEREF | PD_LIST_SLOCKED);
 1821                         goto done_errmsg;
 1822                 }
 1823         }
 1824         error = osd_jail_call(pr, PR_METHOD_SET, opts);
 1825         if (error) {
 1826                 if (born)
 1827                         (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
 1828                 prison_deref(pr, created
 1829                     ? PD_LIST_SLOCKED
 1830                     : PD_DEREF | PD_LIST_SLOCKED);
 1831                 goto done_errmsg;
 1832         }
 1833 
 1834         /* Attach this process to the prison if requested. */
 1835         if (flags & JAIL_ATTACH) {
 1836                 mtx_lock(&pr->pr_mtx);
 1837                 error = do_jail_attach(td, pr);
 1838                 if (error) {
 1839                         vfs_opterror(opts, "attach failed");
 1840                         if (!created)
 1841                                 prison_deref(pr, PD_DEREF);
 1842                         goto done_errmsg;
 1843                 }
 1844         }
 1845 
 1846 #ifdef RACCT
 1847         if (racct_enable && !created) {
 1848                 if (!(flags & JAIL_ATTACH))
 1849                         sx_sunlock(&allprison_lock);
 1850                 prison_racct_modify(pr);
 1851                 if (!(flags & JAIL_ATTACH))
 1852                         sx_slock(&allprison_lock);
 1853         }
 1854 #endif
 1855 
 1856         td->td_retval[0] = pr->pr_id;
 1857 
 1858         /*
 1859          * Now that it is all there, drop the temporary reference from existing
 1860          * prisons.  Or add a reference to newly created persistent prisons
 1861          * (which was not done earlier so that the prison would not be publicly
 1862          * visible).
 1863          */
 1864         if (!created) {
 1865                 prison_deref(pr, (flags & JAIL_ATTACH)
 1866                     ? PD_DEREF
 1867                     : PD_DEREF | PD_LIST_SLOCKED);
 1868         } else {
 1869                 if (pr_flags & PR_PERSIST) {
 1870                         mtx_lock(&pr->pr_mtx);
 1871                         pr->pr_ref++;
 1872                         pr->pr_uref++;
 1873                         mtx_unlock(&pr->pr_mtx);
 1874                 }
 1875                 if (!(flags & JAIL_ATTACH))
 1876                         sx_sunlock(&allprison_lock);
 1877         }
 1878 
 1879         goto done_free;
 1880 
 1881  done_deref_locked:
 1882         prison_deref(pr, created
 1883             ? PD_LOCKED | PD_LIST_XLOCKED
 1884             : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 1885         goto done_releroot;
 1886  done_unlock_list:
 1887         sx_xunlock(&allprison_lock);
 1888  done_releroot:
 1889         if (root != NULL)
 1890                 vrele(root);
 1891  done_errmsg:
 1892         if (error) {
 1893                 if (vfs_getopt(opts, "errmsg", (void **)&errmsg,
 1894                     &errmsg_len) == 0 && errmsg_len > 0) {
 1895                         errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
 1896                         if (optuio->uio_segflg == UIO_SYSSPACE)
 1897                                 bcopy(errmsg,
 1898                                     optuio->uio_iov[errmsg_pos].iov_base,
 1899                                     errmsg_len);
 1900                         else
 1901                                 copyout(errmsg,
 1902                                     optuio->uio_iov[errmsg_pos].iov_base,
 1903                                     errmsg_len);
 1904                 }
 1905         }
 1906  done_free:
 1907 #ifdef INET
 1908         free(ip4, M_PRISON);
 1909 #endif
 1910 #ifdef INET6
 1911         free(ip6, M_PRISON);
 1912 #endif
 1913         if (g_path != NULL)
 1914                 free(g_path, M_TEMP);
 1915         vfs_freeopts(opts);
 1916         return (error);
 1917 }
 1918 
 1919 
 1920 /*
 1921  * struct jail_get_args {
 1922  *      struct iovec *iovp;
 1923  *      unsigned int iovcnt;
 1924  *      int flags;
 1925  * };
 1926  */
 1927 int
 1928 sys_jail_get(struct thread *td, struct jail_get_args *uap)
 1929 {
 1930         struct uio *auio;
 1931         int error;
 1932 
 1933         /* Check that we have an even number of iovecs. */
 1934         if (uap->iovcnt & 1)
 1935                 return (EINVAL);
 1936 
 1937         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 1938         if (error)
 1939                 return (error);
 1940         error = kern_jail_get(td, auio, uap->flags);
 1941         if (error == 0)
 1942                 error = copyout(auio->uio_iov, uap->iovp,
 1943                     uap->iovcnt * sizeof (struct iovec));
 1944         free(auio, M_IOV);
 1945         return (error);
 1946 }
 1947 
 1948 int
 1949 kern_jail_get(struct thread *td, struct uio *optuio, int flags)
 1950 {
 1951         struct prison *pr, *mypr;
 1952         struct vfsopt *opt;
 1953         struct vfsoptlist *opts;
 1954         char *errmsg, *name;
 1955         int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos;
 1956 
 1957         if (flags & ~JAIL_GET_MASK)
 1958                 return (EINVAL);
 1959 
 1960         /* Get the parameter list. */
 1961         error = vfs_buildopts(optuio, &opts);
 1962         if (error)
 1963                 return (error);
 1964         errmsg_pos = vfs_getopt_pos(opts, "errmsg");
 1965         mypr = td->td_ucred->cr_prison;
 1966 
 1967         /*
 1968          * Find the prison specified by one of: lastjid, jid, name.
 1969          */
 1970         sx_slock(&allprison_lock);
 1971         error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
 1972         if (error == 0) {
 1973                 TAILQ_FOREACH(pr, &allprison, pr_list) {
 1974                         if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
 1975                                 mtx_lock(&pr->pr_mtx);
 1976                                 if (pr->pr_ref > 0 &&
 1977                                     (pr->pr_uref > 0 || (flags & JAIL_DYING)))
 1978                                         break;
 1979                                 mtx_unlock(&pr->pr_mtx);
 1980                         }
 1981                 }
 1982                 if (pr != NULL)
 1983                         goto found_prison;
 1984                 error = ENOENT;
 1985                 vfs_opterror(opts, "no jail after %d", jid);
 1986                 goto done_unlock_list;
 1987         } else if (error != ENOENT)
 1988                 goto done_unlock_list;
 1989 
 1990         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 1991         if (error == 0) {
 1992                 if (jid != 0) {
 1993                         pr = prison_find_child(mypr, jid);
 1994                         if (pr != NULL) {
 1995                                 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 1996                                         mtx_unlock(&pr->pr_mtx);
 1997                                         error = ENOENT;
 1998                                         vfs_opterror(opts, "jail %d is dying",
 1999                                             jid);
 2000                                         goto done_unlock_list;
 2001                                 }
 2002                                 goto found_prison;
 2003                         }
 2004                         error = ENOENT;
 2005                         vfs_opterror(opts, "jail %d not found", jid);
 2006                         goto done_unlock_list;
 2007                 }
 2008         } else if (error != ENOENT)
 2009                 goto done_unlock_list;
 2010 
 2011         error = vfs_getopt(opts, "name", (void **)&name, &len);
 2012         if (error == 0) {
 2013                 if (len == 0 || name[len - 1] != '\0') {
 2014                         error = EINVAL;
 2015                         goto done_unlock_list;
 2016                 }
 2017                 pr = prison_find_name(mypr, name);
 2018                 if (pr != NULL) {
 2019                         if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 2020                                 mtx_unlock(&pr->pr_mtx);
 2021                                 error = ENOENT;
 2022                                 vfs_opterror(opts, "jail \"%s\" is dying",
 2023                                     name);
 2024                                 goto done_unlock_list;
 2025                         }
 2026                         goto found_prison;
 2027                 }
 2028                 error = ENOENT;
 2029                 vfs_opterror(opts, "jail \"%s\" not found", name);
 2030                 goto done_unlock_list;
 2031         } else if (error != ENOENT)
 2032                 goto done_unlock_list;
 2033 
 2034         vfs_opterror(opts, "no jail specified");
 2035         error = ENOENT;
 2036         goto done_unlock_list;
 2037 
 2038  found_prison:
 2039         /* Get the parameters of the prison. */
 2040         pr->pr_ref++;
 2041         locked = PD_LOCKED;
 2042         td->td_retval[0] = pr->pr_id;
 2043         error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
 2044         if (error != 0 && error != ENOENT)
 2045                 goto done_deref;
 2046         i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
 2047         error = vfs_setopt(opts, "parent", &i, sizeof(i));
 2048         if (error != 0 && error != ENOENT)
 2049                 goto done_deref;
 2050         error = vfs_setopts(opts, "name", prison_name(mypr, pr));
 2051         if (error != 0 && error != ENOENT)
 2052                 goto done_deref;
 2053         error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
 2054             sizeof(pr->pr_cpuset->cs_id));
 2055         if (error != 0 && error != ENOENT)
 2056                 goto done_deref;
 2057         error = vfs_setopts(opts, "path", prison_path(mypr, pr));
 2058         if (error != 0 && error != ENOENT)
 2059                 goto done_deref;
 2060 #ifdef INET
 2061         error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
 2062             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 2063         if (error != 0 && error != ENOENT)
 2064                 goto done_deref;
 2065 #endif
 2066 #ifdef INET6
 2067         error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
 2068             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 2069         if (error != 0 && error != ENOENT)
 2070                 goto done_deref;
 2071 #endif
 2072         error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
 2073             sizeof(pr->pr_securelevel));
 2074         if (error != 0 && error != ENOENT)
 2075                 goto done_deref;
 2076         error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
 2077             sizeof(pr->pr_childcount));
 2078         if (error != 0 && error != ENOENT)
 2079                 goto done_deref;
 2080         error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
 2081             sizeof(pr->pr_childmax));
 2082         if (error != 0 && error != ENOENT)
 2083                 goto done_deref;
 2084         error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
 2085         if (error != 0 && error != ENOENT)
 2086                 goto done_deref;
 2087         error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
 2088         if (error != 0 && error != ENOENT)
 2089                 goto done_deref;
 2090         error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
 2091         if (error != 0 && error != ENOENT)
 2092                 goto done_deref;
 2093 #ifdef COMPAT_FREEBSD32
 2094         if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 2095                 uint32_t hid32 = pr->pr_hostid;
 2096 
 2097                 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
 2098         } else
 2099 #endif
 2100         error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
 2101             sizeof(pr->pr_hostid));
 2102         if (error != 0 && error != ENOENT)
 2103                 goto done_deref;
 2104         error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
 2105             sizeof(pr->pr_enforce_statfs));
 2106         if (error != 0 && error != ENOENT)
 2107                 goto done_deref;
 2108         error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum,
 2109             sizeof(pr->pr_devfs_rsnum));
 2110         if (error != 0 && error != ENOENT)
 2111                 goto done_deref;
 2112         for (fi = 0; fi < nitems(pr_flag_names); fi++) {
 2113                 if (pr_flag_names[fi] == NULL)
 2114                         continue;
 2115                 i = (pr->pr_flags & (1 << fi)) ? 1 : 0;
 2116                 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i));
 2117                 if (error != 0 && error != ENOENT)
 2118                         goto done_deref;
 2119                 i = !i;
 2120                 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i));
 2121                 if (error != 0 && error != ENOENT)
 2122                         goto done_deref;
 2123         }
 2124         for (fi = 0; fi < nitems(pr_flag_jailsys); fi++) {
 2125                 i = pr->pr_flags &
 2126                     (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 2127                 i = pr_flag_jailsys[fi].disable &&
 2128                       (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE
 2129                     : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW
 2130                     : JAIL_SYS_INHERIT;
 2131                 error =
 2132                     vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i));
 2133                 if (error != 0 && error != ENOENT)
 2134                         goto done_deref;
 2135         }
 2136         for (fi = 0; fi < nitems(pr_allow_names); fi++) {
 2137                 if (pr_allow_names[fi] == NULL)
 2138                         continue;
 2139                 i = (pr->pr_allow & (1 << fi)) ? 1 : 0;
 2140                 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i));
 2141                 if (error != 0 && error != ENOENT)
 2142                         goto done_deref;
 2143                 i = !i;
 2144                 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i));
 2145                 if (error != 0 && error != ENOENT)
 2146                         goto done_deref;
 2147         }
 2148         i = (pr->pr_uref == 0);
 2149         error = vfs_setopt(opts, "dying", &i, sizeof(i));
 2150         if (error != 0 && error != ENOENT)
 2151                 goto done_deref;
 2152         i = !i;
 2153         error = vfs_setopt(opts, "nodying", &i, sizeof(i));
 2154         if (error != 0 && error != ENOENT)
 2155                 goto done_deref;
 2156         error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate,
 2157             sizeof(pr->pr_osreldate));
 2158         if (error != 0 && error != ENOENT)
 2159                 goto done_deref;
 2160         error = vfs_setopts(opts, "osrelease", pr->pr_osrelease);
 2161         if (error != 0 && error != ENOENT)
 2162                 goto done_deref;
 2163 
 2164         /* Get the module parameters. */
 2165         mtx_unlock(&pr->pr_mtx);
 2166         locked = 0;
 2167         error = osd_jail_call(pr, PR_METHOD_GET, opts);
 2168         if (error)
 2169                 goto done_deref;
 2170         prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED);
 2171 
 2172         /* By now, all parameters should have been noted. */
 2173         TAILQ_FOREACH(opt, opts, link) {
 2174                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
 2175                         error = EINVAL;
 2176                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
 2177                         goto done_errmsg;
 2178                 }
 2179         }
 2180 
 2181         /* Write the fetched parameters back to userspace. */
 2182         error = 0;
 2183         TAILQ_FOREACH(opt, opts, link) {
 2184                 if (opt->pos >= 0 && opt->pos != errmsg_pos) {
 2185                         pos = 2 * opt->pos + 1;
 2186                         optuio->uio_iov[pos].iov_len = opt->len;
 2187                         if (opt->value != NULL) {
 2188                                 if (optuio->uio_segflg == UIO_SYSSPACE) {
 2189                                         bcopy(opt->value,
 2190                                             optuio->uio_iov[pos].iov_base,
 2191                                             opt->len);
 2192                                 } else {
 2193                                         error = copyout(opt->value,
 2194                                             optuio->uio_iov[pos].iov_base,
 2195                                             opt->len);
 2196                                         if (error)
 2197                                                 break;
 2198                                 }
 2199                         }
 2200                 }
 2201         }
 2202         goto done_errmsg;
 2203 
 2204  done_deref:
 2205         prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED);
 2206         goto done_errmsg;
 2207 
 2208  done_unlock_list:
 2209         sx_sunlock(&allprison_lock);
 2210  done_errmsg:
 2211         if (error && errmsg_pos >= 0) {
 2212                 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 2213                 errmsg_pos = 2 * errmsg_pos + 1;
 2214                 if (errmsg_len > 0) {
 2215                         if (optuio->uio_segflg == UIO_SYSSPACE)
 2216                                 bcopy(errmsg,
 2217                                     optuio->uio_iov[errmsg_pos].iov_base,
 2218                                     errmsg_len);
 2219                         else
 2220                                 copyout(errmsg,
 2221                                     optuio->uio_iov[errmsg_pos].iov_base,
 2222                                     errmsg_len);
 2223                 }
 2224         }
 2225         vfs_freeopts(opts);
 2226         return (error);
 2227 }
 2228 
 2229 
 2230 /*
 2231  * struct jail_remove_args {
 2232  *      int jid;
 2233  * };
 2234  */
 2235 int
 2236 sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 2237 {
 2238         struct prison *pr, *cpr, *lpr, *tpr;
 2239         int descend, error;
 2240 
 2241         error = priv_check(td, PRIV_JAIL_REMOVE);
 2242         if (error)
 2243                 return (error);
 2244 
 2245         sx_xlock(&allprison_lock);
 2246         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2247         if (pr == NULL) {
 2248                 sx_xunlock(&allprison_lock);
 2249                 return (EINVAL);
 2250         }
 2251 
 2252         /* Remove all descendants of this prison, then remove this prison. */
 2253         pr->pr_ref++;
 2254         if (!LIST_EMPTY(&pr->pr_children)) {
 2255                 mtx_unlock(&pr->pr_mtx);
 2256                 lpr = NULL;
 2257                 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 2258                         mtx_lock(&cpr->pr_mtx);
 2259                         if (cpr->pr_ref > 0) {
 2260                                 tpr = cpr;
 2261                                 cpr->pr_ref++;
 2262                         } else {
 2263                                 /* Already removed - do not do it again. */
 2264                                 tpr = NULL;
 2265                         }
 2266                         mtx_unlock(&cpr->pr_mtx);
 2267                         if (lpr != NULL) {
 2268                                 mtx_lock(&lpr->pr_mtx);
 2269                                 prison_remove_one(lpr);
 2270                                 sx_xlock(&allprison_lock);
 2271                         }
 2272                         lpr = tpr;
 2273                 }
 2274                 if (lpr != NULL) {
 2275                         mtx_lock(&lpr->pr_mtx);
 2276                         prison_remove_one(lpr);
 2277                         sx_xlock(&allprison_lock);
 2278                 }
 2279                 mtx_lock(&pr->pr_mtx);
 2280         }
 2281         prison_remove_one(pr);
 2282         return (0);
 2283 }
 2284 
 2285 static void
 2286 prison_remove_one(struct prison *pr)
 2287 {
 2288         struct proc *p;
 2289         int deuref;
 2290 
 2291         /*
 2292          * Mark the prison as doomed, so it doesn't accidentally come back
 2293          * to life.  It may still be explicitly brought back by jail_set(2).
 2294          */
 2295         pr->pr_flags |= PR_REMOVE;
 2296 
 2297         /* If the prison was persistent, it is not anymore. */
 2298         deuref = 0;
 2299         if (pr->pr_flags & PR_PERSIST) {
 2300                 pr->pr_ref--;
 2301                 deuref = PD_DEUREF;
 2302                 pr->pr_flags &= ~PR_PERSIST;
 2303         }
 2304 
 2305         /*
 2306          * jail_remove added a reference.  If that's the only one, remove
 2307          * the prison now.
 2308          */
 2309         KASSERT(pr->pr_ref > 0,
 2310             ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
 2311         if (pr->pr_ref == 1) {
 2312                 prison_deref(pr,
 2313                     deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 2314                 return;
 2315         }
 2316 
 2317         mtx_unlock(&pr->pr_mtx);
 2318         sx_xunlock(&allprison_lock);
 2319         /*
 2320          * Kill all processes unfortunate enough to be attached to this prison.
 2321          */
 2322         sx_slock(&allproc_lock);
 2323         LIST_FOREACH(p, &allproc, p_list) {
 2324                 PROC_LOCK(p);
 2325                 if (p->p_state != PRS_NEW && p->p_ucred &&
 2326                     p->p_ucred->cr_prison == pr)
 2327                         kern_psignal(p, SIGKILL);
 2328                 PROC_UNLOCK(p);
 2329         }
 2330         sx_sunlock(&allproc_lock);
 2331         /* Remove the temporary reference added by jail_remove. */
 2332         prison_deref(pr, deuref | PD_DEREF);
 2333 }
 2334 
 2335 
 2336 /*
 2337  * struct jail_attach_args {
 2338  *      int jid;
 2339  * };
 2340  */
 2341 int
 2342 sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
 2343 {
 2344         struct prison *pr;
 2345         int error;
 2346 
 2347         error = priv_check(td, PRIV_JAIL_ATTACH);
 2348         if (error)
 2349                 return (error);
 2350 
 2351         /*
 2352          * Start with exclusive hold on allprison_lock to ensure that a possible
 2353          * PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove.
 2354          * But then immediately downgrade it since we don't need to stop
 2355          * readers.
 2356          */
 2357         sx_xlock(&allprison_lock);
 2358         sx_downgrade(&allprison_lock);
 2359         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2360         if (pr == NULL) {
 2361                 sx_sunlock(&allprison_lock);
 2362                 return (EINVAL);
 2363         }
 2364 
 2365         /*
 2366          * Do not allow a process to attach to a prison that is not
 2367          * considered to be "alive".
 2368          */
 2369         if (pr->pr_uref == 0) {
 2370                 mtx_unlock(&pr->pr_mtx);
 2371                 sx_sunlock(&allprison_lock);
 2372                 return (EINVAL);
 2373         }
 2374 
 2375         return (do_jail_attach(td, pr));
 2376 }
 2377 
 2378 static int
 2379 do_jail_attach(struct thread *td, struct prison *pr)
 2380 {
 2381         struct proc *p;
 2382         struct ucred *newcred, *oldcred;
 2383         int error;
 2384 
 2385         /*
 2386          * XXX: Note that there is a slight race here if two threads
 2387          * in the same privileged process attempt to attach to two
 2388          * different jails at the same time.  It is important for
 2389          * user processes not to do this, or they might end up with
 2390          * a process root from one prison, but attached to the jail
 2391          * of another.
 2392          */
 2393         pr->pr_ref++;
 2394         pr->pr_uref++;
 2395         mtx_unlock(&pr->pr_mtx);
 2396 
 2397         /* Let modules do whatever they need to prepare for attaching. */
 2398         error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
 2399         if (error) {
 2400                 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
 2401                 return (error);
 2402         }
 2403         sx_sunlock(&allprison_lock);
 2404 
 2405         /*
 2406          * Reparent the newly attached process to this jail.
 2407          */
 2408         p = td->td_proc;
 2409         error = cpuset_setproc_update_set(p, pr->pr_cpuset);
 2410         if (error)
 2411                 goto e_revert_osd;
 2412 
 2413         vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
 2414         if ((error = change_dir(pr->pr_root, td)) != 0)
 2415                 goto e_unlock;
 2416 #ifdef MAC
 2417         if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
 2418                 goto e_unlock;
 2419 #endif
 2420         VOP_UNLOCK(pr->pr_root, 0);
 2421         if ((error = pwd_chroot_chdir(td, pr->pr_root)))
 2422                 goto e_revert_osd;
 2423 
 2424         newcred = crget();
 2425         PROC_LOCK(p);
 2426         oldcred = crcopysafe(p, newcred);
 2427         newcred->cr_prison = pr;
 2428         proc_set_cred(p, newcred);
 2429         setsugid(p);
 2430 #ifdef RACCT
 2431         racct_proc_ucred_changed(p, oldcred, newcred);
 2432         crhold(newcred);
 2433 #endif
 2434         PROC_UNLOCK(p);
 2435 #ifdef RCTL
 2436         rctl_proc_ucred_changed(p, newcred);
 2437         crfree(newcred);
 2438 #endif
 2439         prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
 2440         crfree(oldcred);
 2441 
 2442         /*
 2443          * If the prison was killed while changing credentials, die along
 2444          * with it.
 2445          */
 2446         if (pr->pr_flags & PR_REMOVE) {
 2447                 PROC_LOCK(p);
 2448                 kern_psignal(p, SIGKILL);
 2449                 PROC_UNLOCK(p);
 2450         }
 2451 
 2452         return (0);
 2453 
 2454  e_unlock:
 2455         VOP_UNLOCK(pr->pr_root, 0);
 2456  e_revert_osd:
 2457         /* Tell modules this thread is still in its old jail after all. */
 2458         (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
 2459         prison_deref(pr, PD_DEREF | PD_DEUREF);
 2460         return (error);
 2461 }
 2462 
 2463 
 2464 /*
 2465  * Returns a locked prison instance, or NULL on failure.
 2466  */
 2467 struct prison *
 2468 prison_find(int prid)
 2469 {
 2470         struct prison *pr;
 2471 
 2472         sx_assert(&allprison_lock, SX_LOCKED);
 2473         TAILQ_FOREACH(pr, &allprison, pr_list) {
 2474                 if (pr->pr_id == prid) {
 2475                         mtx_lock(&pr->pr_mtx);
 2476                         if (pr->pr_ref > 0)
 2477                                 return (pr);
 2478                         mtx_unlock(&pr->pr_mtx);
 2479                 }
 2480         }
 2481         return (NULL);
 2482 }
 2483 
 2484 /*
 2485  * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
 2486  */
 2487 struct prison *
 2488 prison_find_child(struct prison *mypr, int prid)
 2489 {
 2490         struct prison *pr;
 2491         int descend;
 2492 
 2493         sx_assert(&allprison_lock, SX_LOCKED);
 2494         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2495                 if (pr->pr_id == prid) {
 2496                         mtx_lock(&pr->pr_mtx);
 2497                         if (pr->pr_ref > 0)
 2498                                 return (pr);
 2499                         mtx_unlock(&pr->pr_mtx);
 2500                 }
 2501         }
 2502         return (NULL);
 2503 }
 2504 
 2505 /*
 2506  * Look for the name relative to mypr.  Returns a locked prison or NULL.
 2507  */
 2508 struct prison *
 2509 prison_find_name(struct prison *mypr, const char *name)
 2510 {
 2511         struct prison *pr, *deadpr;
 2512         size_t mylen;
 2513         int descend;
 2514 
 2515         sx_assert(&allprison_lock, SX_LOCKED);
 2516         mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
 2517  again:
 2518         deadpr = NULL;
 2519         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2520                 if (!strcmp(pr->pr_name + mylen, name)) {
 2521                         mtx_lock(&pr->pr_mtx);
 2522                         if (pr->pr_ref > 0) {
 2523                                 if (pr->pr_uref > 0)
 2524                                         return (pr);
 2525                                 deadpr = pr;
 2526                         }
 2527                         mtx_unlock(&pr->pr_mtx);
 2528                 }
 2529         }
 2530         /* There was no valid prison - perhaps there was a dying one. */
 2531         if (deadpr != NULL) {
 2532                 mtx_lock(&deadpr->pr_mtx);
 2533                 if (deadpr->pr_ref == 0) {
 2534                         mtx_unlock(&deadpr->pr_mtx);
 2535                         goto again;
 2536                 }
 2537         }
 2538         return (deadpr);
 2539 }
 2540 
 2541 /*
 2542  * See if a prison has the specific flag set.
 2543  */
 2544 int
 2545 prison_flag(struct ucred *cred, unsigned flag)
 2546 {
 2547 
 2548         /* This is an atomic read, so no locking is necessary. */
 2549         return (cred->cr_prison->pr_flags & flag);
 2550 }
 2551 
 2552 int
 2553 prison_allow(struct ucred *cred, unsigned flag)
 2554 {
 2555 
 2556         /* This is an atomic read, so no locking is necessary. */
 2557         return (cred->cr_prison->pr_allow & flag);
 2558 }
 2559 
 2560 /*
 2561  * Remove a prison reference.  If that was the last reference, remove the
 2562  * prison itself - but not in this context in case there are locks held.
 2563  */
 2564 void
 2565 prison_free_locked(struct prison *pr)
 2566 {
 2567         int ref;
 2568 
 2569         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2570         ref = --pr->pr_ref;
 2571         mtx_unlock(&pr->pr_mtx);
 2572         if (ref == 0)
 2573                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 2574 }
 2575 
 2576 void
 2577 prison_free(struct prison *pr)
 2578 {
 2579 
 2580         mtx_lock(&pr->pr_mtx);
 2581         prison_free_locked(pr);
 2582 }
 2583 
 2584 /*
 2585  * Complete a call to either prison_free or prison_proc_free.
 2586  */
 2587 static void
 2588 prison_complete(void *context, int pending)
 2589 {
 2590         struct prison *pr = context;
 2591 
 2592         sx_xlock(&allprison_lock);
 2593         mtx_lock(&pr->pr_mtx);
 2594         prison_deref(pr, pr->pr_uref
 2595             ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
 2596             : PD_LOCKED | PD_LIST_XLOCKED);
 2597 }
 2598 
 2599 /*
 2600  * Remove a prison reference (usually).  This internal version assumes no
 2601  * mutexes are held, except perhaps the prison itself.  If there are no more
 2602  * references, release and delist the prison.  On completion, the prison lock
 2603  * and the allprison lock are both unlocked.
 2604  */
 2605 static void
 2606 prison_deref(struct prison *pr, int flags)
 2607 {
 2608         struct prison *ppr, *tpr;
 2609         int ref, lasturef;
 2610 
 2611         if (!(flags & PD_LOCKED))
 2612                 mtx_lock(&pr->pr_mtx);
 2613         for (;;) {
 2614                 if (flags & PD_DEUREF) {
 2615                         KASSERT(pr->pr_uref > 0,
 2616                             ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
 2617                              pr->pr_id));
 2618                         pr->pr_uref--;
 2619                         lasturef = pr->pr_uref == 0;
 2620                         if (lasturef)
 2621                                 pr->pr_ref++;
 2622                         KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
 2623                 } else
 2624                         lasturef = 0;
 2625                 if (flags & PD_DEREF) {
 2626                         KASSERT(pr->pr_ref > 0,
 2627                             ("prison_deref PD_DEREF on a dead prison (jid=%d)",
 2628                              pr->pr_id));
 2629                         pr->pr_ref--;
 2630                 }
 2631                 ref = pr->pr_ref;
 2632                 mtx_unlock(&pr->pr_mtx);
 2633 
 2634                 /*
 2635                  * Tell the modules if the last user reference was removed
 2636                  * (even it sticks around in dying state).
 2637                  */
 2638                 if (lasturef) {
 2639                         if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
 2640                                 sx_xlock(&allprison_lock);
 2641                                 flags |= PD_LIST_XLOCKED;
 2642                         }
 2643                         (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
 2644                         mtx_lock(&pr->pr_mtx);
 2645                         ref = --pr->pr_ref;
 2646                         mtx_unlock(&pr->pr_mtx);
 2647                 }
 2648 
 2649                 /* If the prison still has references, nothing else to do. */
 2650                 if (ref > 0) {
 2651                         if (flags & PD_LIST_SLOCKED)
 2652                                 sx_sunlock(&allprison_lock);
 2653                         else if (flags & PD_LIST_XLOCKED)
 2654                                 sx_xunlock(&allprison_lock);
 2655                         return;
 2656                 }
 2657 
 2658                 if (flags & PD_LIST_SLOCKED) {
 2659                         if (!sx_try_upgrade(&allprison_lock)) {
 2660                                 sx_sunlock(&allprison_lock);
 2661                                 sx_xlock(&allprison_lock);
 2662                         }
 2663                 } else if (!(flags & PD_LIST_XLOCKED))
 2664                         sx_xlock(&allprison_lock);
 2665 
 2666                 TAILQ_REMOVE(&allprison, pr, pr_list);
 2667                 LIST_REMOVE(pr, pr_sibling);
 2668                 ppr = pr->pr_parent;
 2669                 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 2670                         tpr->pr_childcount--;
 2671                 sx_xunlock(&allprison_lock);
 2672 
 2673 #ifdef VIMAGE
 2674                 if (pr->pr_vnet != ppr->pr_vnet)
 2675                         vnet_destroy(pr->pr_vnet);
 2676 #endif
 2677                 if (pr->pr_root != NULL)
 2678                         vrele(pr->pr_root);
 2679                 mtx_destroy(&pr->pr_mtx);
 2680 #ifdef INET
 2681                 free(pr->pr_ip4, M_PRISON);
 2682 #endif
 2683 #ifdef INET6
 2684                 free(pr->pr_ip6, M_PRISON);
 2685 #endif
 2686                 if (pr->pr_cpuset != NULL)
 2687                         cpuset_rel(pr->pr_cpuset);
 2688                 osd_jail_exit(pr);
 2689 #ifdef RACCT
 2690                 if (racct_enable)
 2691                         prison_racct_detach(pr);
 2692 #endif
 2693                 free(pr, M_PRISON);
 2694 
 2695                 /* Removing a prison frees a reference on its parent. */
 2696                 pr = ppr;
 2697                 mtx_lock(&pr->pr_mtx);
 2698                 flags = PD_DEREF | PD_DEUREF;
 2699         }
 2700 }
 2701 
 2702 void
 2703 prison_hold_locked(struct prison *pr)
 2704 {
 2705 
 2706         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2707         KASSERT(pr->pr_ref > 0,
 2708             ("Trying to hold dead prison (jid=%d).", pr->pr_id));
 2709         pr->pr_ref++;
 2710 }
 2711 
 2712 void
 2713 prison_hold(struct prison *pr)
 2714 {
 2715 
 2716         mtx_lock(&pr->pr_mtx);
 2717         prison_hold_locked(pr);
 2718         mtx_unlock(&pr->pr_mtx);
 2719 }
 2720 
 2721 void
 2722 prison_proc_hold(struct prison *pr)
 2723 {
 2724 
 2725         mtx_lock(&pr->pr_mtx);
 2726         KASSERT(pr->pr_uref > 0,
 2727             ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
 2728         pr->pr_uref++;
 2729         mtx_unlock(&pr->pr_mtx);
 2730 }
 2731 
 2732 void
 2733 prison_proc_free(struct prison *pr)
 2734 {
 2735 
 2736         mtx_lock(&pr->pr_mtx);
 2737         KASSERT(pr->pr_uref > 0,
 2738             ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
 2739         if (pr->pr_uref > 1)
 2740                 pr->pr_uref--;
 2741         else {
 2742                 /*
 2743                  * Don't remove the last user reference in this context, which
 2744                  * is expected to be a process that is not only locked, but
 2745                  * also half dead.
 2746                  */
 2747                 pr->pr_ref++;
 2748                 mtx_unlock(&pr->pr_mtx);
 2749                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 2750                 return;
 2751         }
 2752         mtx_unlock(&pr->pr_mtx);
 2753 }
 2754 
 2755 /*
 2756  * Check if a jail supports the given address family.
 2757  *
 2758  * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
 2759  * if not.
 2760  */
 2761 int
 2762 prison_check_af(struct ucred *cred, int af)
 2763 {
 2764         struct prison *pr;
 2765         int error;
 2766 
 2767         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2768 
 2769         pr = cred->cr_prison;
 2770 #ifdef VIMAGE
 2771         /* Prisons with their own network stack are not limited. */
 2772         if (prison_owns_vnet(cred))
 2773                 return (0);
 2774 #endif
 2775 
 2776         error = 0;
 2777         switch (af)
 2778         {
 2779 #ifdef INET
 2780         case AF_INET:
 2781                 if (pr->pr_flags & PR_IP4)
 2782                 {
 2783                         mtx_lock(&pr->pr_mtx);
 2784                         if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
 2785                                 error = EAFNOSUPPORT;
 2786                         mtx_unlock(&pr->pr_mtx);
 2787                 }
 2788                 break;
 2789 #endif
 2790 #ifdef INET6
 2791         case AF_INET6:
 2792                 if (pr->pr_flags & PR_IP6)
 2793                 {
 2794                         mtx_lock(&pr->pr_mtx);
 2795                         if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
 2796                                 error = EAFNOSUPPORT;
 2797                         mtx_unlock(&pr->pr_mtx);
 2798                 }
 2799                 break;
 2800 #endif
 2801         case AF_LOCAL:
 2802         case AF_ROUTE:
 2803                 break;
 2804         default:
 2805                 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
 2806                         error = EAFNOSUPPORT;
 2807         }
 2808         return (error);
 2809 }
 2810 
 2811 /*
 2812  * Check if given address belongs to the jail referenced by cred (wrapper to
 2813  * prison_check_ip[46]).
 2814  *
 2815  * Returns 0 if jail doesn't restrict the address family or if address belongs
 2816  * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
 2817  * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
 2818  */
 2819 int
 2820 prison_if(struct ucred *cred, struct sockaddr *sa)
 2821 {
 2822 #ifdef INET
 2823         struct sockaddr_in *sai;
 2824 #endif
 2825 #ifdef INET6
 2826         struct sockaddr_in6 *sai6;
 2827 #endif
 2828         int error;
 2829 
 2830         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2831         KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 2832 
 2833 #ifdef VIMAGE
 2834         if (prison_owns_vnet(cred))
 2835                 return (0);
 2836 #endif
 2837 
 2838         error = 0;
 2839         switch (sa->sa_family)
 2840         {
 2841 #ifdef INET
 2842         case AF_INET:
 2843                 sai = (struct sockaddr_in *)sa;
 2844                 error = prison_check_ip4(cred, &sai->sin_addr);
 2845                 break;
 2846 #endif
 2847 #ifdef INET6
 2848         case AF_INET6:
 2849                 sai6 = (struct sockaddr_in6 *)sa;
 2850                 error = prison_check_ip6(cred, &sai6->sin6_addr);
 2851                 break;
 2852 #endif
 2853         default:
 2854                 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
 2855                         error = EAFNOSUPPORT;
 2856         }
 2857         return (error);
 2858 }
 2859 
 2860 /*
 2861  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
 2862  */
 2863 int
 2864 prison_check(struct ucred *cred1, struct ucred *cred2)
 2865 {
 2866 
 2867         return ((cred1->cr_prison == cred2->cr_prison ||
 2868             prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
 2869 }
 2870 
 2871 /*
 2872  * Return 1 if p2 is a child of p1, otherwise 0.
 2873  */
 2874 int
 2875 prison_ischild(struct prison *pr1, struct prison *pr2)
 2876 {
 2877 
 2878         for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
 2879                 if (pr1 == pr2)
 2880                         return (1);
 2881         return (0);
 2882 }
 2883 
 2884 /*
 2885  * Return 1 if the passed credential is in a jail, otherwise 0.
 2886  */
 2887 int
 2888 jailed(struct ucred *cred)
 2889 {
 2890 
 2891         return (cred->cr_prison != &prison0);
 2892 }
 2893 
 2894 /*
 2895  * Return 1 if the passed credential is in a jail and that jail does not
 2896  * have its own virtual network stack, otherwise 0.
 2897  */
 2898 int
 2899 jailed_without_vnet(struct ucred *cred)
 2900 {
 2901 
 2902         if (!jailed(cred))
 2903                 return (0);
 2904 #ifdef VIMAGE
 2905         if (prison_owns_vnet(cred))
 2906                 return (0);
 2907 #endif
 2908 
 2909         return (1);
 2910 }
 2911 
 2912 /*
 2913  * Return the correct hostname (domainname, et al) for the passed credential.
 2914  */
 2915 void
 2916 getcredhostname(struct ucred *cred, char *buf, size_t size)
 2917 {
 2918         struct prison *pr;
 2919 
 2920         /*
 2921          * A NULL credential can be used to shortcut to the physical
 2922          * system's hostname.
 2923          */
 2924         pr = (cred != NULL) ? cred->cr_prison : &prison0;
 2925         mtx_lock(&pr->pr_mtx);
 2926         strlcpy(buf, pr->pr_hostname, size);
 2927         mtx_unlock(&pr->pr_mtx);
 2928 }
 2929 
 2930 void
 2931 getcreddomainname(struct ucred *cred, char *buf, size_t size)
 2932 {
 2933 
 2934         mtx_lock(&cred->cr_prison->pr_mtx);
 2935         strlcpy(buf, cred->cr_prison->pr_domainname, size);
 2936         mtx_unlock(&cred->cr_prison->pr_mtx);
 2937 }
 2938 
 2939 void
 2940 getcredhostuuid(struct ucred *cred, char *buf, size_t size)
 2941 {
 2942 
 2943         mtx_lock(&cred->cr_prison->pr_mtx);
 2944         strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
 2945         mtx_unlock(&cred->cr_prison->pr_mtx);
 2946 }
 2947 
 2948 void
 2949 getcredhostid(struct ucred *cred, unsigned long *hostid)
 2950 {
 2951 
 2952         mtx_lock(&cred->cr_prison->pr_mtx);
 2953         *hostid = cred->cr_prison->pr_hostid;
 2954         mtx_unlock(&cred->cr_prison->pr_mtx);
 2955 }
 2956 
 2957 void
 2958 getjailname(struct ucred *cred, char *name, size_t len)
 2959 {
 2960 
 2961         mtx_lock(&cred->cr_prison->pr_mtx);
 2962         strlcpy(name, cred->cr_prison->pr_name, len);
 2963         mtx_unlock(&cred->cr_prison->pr_mtx);
 2964 }
 2965 
 2966 #ifdef VIMAGE
 2967 /*
 2968  * Determine whether the prison represented by cred owns
 2969  * its vnet rather than having it inherited.
 2970  *
 2971  * Returns 1 in case the prison owns the vnet, 0 otherwise.
 2972  */
 2973 int
 2974 prison_owns_vnet(struct ucred *cred)
 2975 {
 2976 
 2977         /*
 2978          * vnets cannot be added/removed after jail creation,
 2979          * so no need to lock here.
 2980          */
 2981         return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
 2982 }
 2983 #endif
 2984 
 2985 /*
 2986  * Determine whether the subject represented by cred can "see"
 2987  * status of a mount point.
 2988  * Returns: 0 for permitted, ENOENT otherwise.
 2989  * XXX: This function should be called cr_canseemount() and should be
 2990  *      placed in kern_prot.c.
 2991  */
 2992 int
 2993 prison_canseemount(struct ucred *cred, struct mount *mp)
 2994 {
 2995         struct prison *pr;
 2996         struct statfs *sp;
 2997         size_t len;
 2998 
 2999         pr = cred->cr_prison;
 3000         if (pr->pr_enforce_statfs == 0)
 3001                 return (0);
 3002         if (pr->pr_root->v_mount == mp)
 3003                 return (0);
 3004         if (pr->pr_enforce_statfs == 2)
 3005                 return (ENOENT);
 3006         /*
 3007          * If jail's chroot directory is set to "/" we should be able to see
 3008          * all mount-points from inside a jail.
 3009          * This is ugly check, but this is the only situation when jail's
 3010          * directory ends with '/'.
 3011          */
 3012         if (strcmp(pr->pr_path, "/") == 0)
 3013                 return (0);
 3014         len = strlen(pr->pr_path);
 3015         sp = &mp->mnt_stat;
 3016         if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
 3017                 return (ENOENT);
 3018         /*
 3019          * Be sure that we don't have situation where jail's root directory
 3020          * is "/some/path" and mount point is "/some/pathpath".
 3021          */
 3022         if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
 3023                 return (ENOENT);
 3024         return (0);
 3025 }
 3026 
 3027 void
 3028 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
 3029 {
 3030         char jpath[MAXPATHLEN];
 3031         struct prison *pr;
 3032         size_t len;
 3033 
 3034         pr = cred->cr_prison;
 3035         if (pr->pr_enforce_statfs == 0)
 3036                 return;
 3037         if (prison_canseemount(cred, mp) != 0) {
 3038                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3039                 strlcpy(sp->f_mntonname, "[restricted]",
 3040                     sizeof(sp->f_mntonname));
 3041                 return;
 3042         }
 3043         if (pr->pr_root->v_mount == mp) {
 3044                 /*
 3045                  * Clear current buffer data, so we are sure nothing from
 3046                  * the valid path left there.
 3047                  */
 3048                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3049                 *sp->f_mntonname = '/';
 3050                 return;
 3051         }
 3052         /*
 3053          * If jail's chroot directory is set to "/" we should be able to see
 3054          * all mount-points from inside a jail.
 3055          */
 3056         if (strcmp(pr->pr_path, "/") == 0)
 3057                 return;
 3058         len = strlen(pr->pr_path);
 3059         strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
 3060         /*
 3061          * Clear current buffer data, so we are sure nothing from
 3062          * the valid path left there.
 3063          */
 3064         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3065         if (*jpath == '\0') {
 3066                 /* Should never happen. */
 3067                 *sp->f_mntonname = '/';
 3068         } else {
 3069                 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
 3070         }
 3071 }
 3072 
 3073 /*
 3074  * Check with permission for a specific privilege is granted within jail.  We
 3075  * have a specific list of accepted privileges; the rest are denied.
 3076  */
 3077 int
 3078 prison_priv_check(struct ucred *cred, int priv)
 3079 {
 3080 
 3081         if (!jailed(cred))
 3082                 return (0);
 3083 
 3084 #ifdef VIMAGE
 3085         /*
 3086          * Privileges specific to prisons with a virtual network stack.
 3087          * There might be a duplicate entry here in case the privilege
 3088          * is only granted conditionally in the legacy jail case.
 3089          */
 3090         switch (priv) {
 3091 #ifdef notyet
 3092                 /*
 3093                  * NFS-specific privileges.
 3094                  */
 3095         case PRIV_NFS_DAEMON:
 3096         case PRIV_NFS_LOCKD:
 3097 #endif
 3098                 /*
 3099                  * Network stack privileges.
 3100                  */
 3101         case PRIV_NET_BRIDGE:
 3102         case PRIV_NET_GRE:
 3103         case PRIV_NET_BPF:
 3104         case PRIV_NET_RAW:              /* Dup, cond. in legacy jail case. */
 3105         case PRIV_NET_ROUTE:
 3106         case PRIV_NET_TAP:
 3107         case PRIV_NET_SETIFMTU:
 3108         case PRIV_NET_SETIFFLAGS:
 3109         case PRIV_NET_SETIFCAP:
 3110         case PRIV_NET_SETIFDESCR:
 3111         case PRIV_NET_SETIFNAME :
 3112         case PRIV_NET_SETIFMETRIC:
 3113         case PRIV_NET_SETIFPHYS:
 3114         case PRIV_NET_SETIFMAC:
 3115         case PRIV_NET_ADDMULTI:
 3116         case PRIV_NET_DELMULTI:
 3117         case PRIV_NET_HWIOCTL:
 3118         case PRIV_NET_SETLLADDR:
 3119         case PRIV_NET_ADDIFGROUP:
 3120         case PRIV_NET_DELIFGROUP:
 3121         case PRIV_NET_IFCREATE:
 3122         case PRIV_NET_IFDESTROY:
 3123         case PRIV_NET_ADDIFADDR:
 3124         case PRIV_NET_DELIFADDR:
 3125         case PRIV_NET_LAGG:
 3126         case PRIV_NET_GIF:
 3127         case PRIV_NET_SETIFVNET:
 3128         case PRIV_NET_SETIFFIB:
 3129 
 3130                 /*
 3131                  * 802.11-related privileges.
 3132                  */
 3133         case PRIV_NET80211_GETKEY:
 3134 #ifdef notyet
 3135         case PRIV_NET80211_MANAGE:              /* XXX-BZ discuss with sam@ */
 3136 #endif
 3137 
 3138 #ifdef notyet
 3139                 /*
 3140                  * ATM privileges.
 3141                  */
 3142         case PRIV_NETATM_CFG:
 3143         case PRIV_NETATM_ADD:
 3144         case PRIV_NETATM_DEL:
 3145         case PRIV_NETATM_SET:
 3146 
 3147                 /*
 3148                  * Bluetooth privileges.
 3149                  */
 3150         case PRIV_NETBLUETOOTH_RAW:
 3151 #endif
 3152 
 3153                 /*
 3154                  * Netgraph and netgraph module privileges.
 3155                  */
 3156         case PRIV_NETGRAPH_CONTROL:
 3157 #ifdef notyet
 3158         case PRIV_NETGRAPH_TTY:
 3159 #endif
 3160 
 3161                 /*
 3162                  * IPv4 and IPv6 privileges.
 3163                  */
 3164         case PRIV_NETINET_IPFW:
 3165         case PRIV_NETINET_DIVERT:
 3166         case PRIV_NETINET_PF:
 3167         case PRIV_NETINET_DUMMYNET:
 3168         case PRIV_NETINET_CARP:
 3169         case PRIV_NETINET_MROUTE:
 3170         case PRIV_NETINET_RAW:
 3171         case PRIV_NETINET_ADDRCTRL6:
 3172         case PRIV_NETINET_ND6:
 3173         case PRIV_NETINET_SCOPE6:
 3174         case PRIV_NETINET_ALIFETIME6:
 3175         case PRIV_NETINET_IPSEC:
 3176         case PRIV_NETINET_BINDANY:
 3177 
 3178 #ifdef notyet
 3179                 /*
 3180                  * NCP privileges.
 3181                  */
 3182         case PRIV_NETNCP:
 3183 
 3184                 /*
 3185                  * SMB privileges.
 3186                  */
 3187         case PRIV_NETSMB:
 3188 #endif
 3189 
 3190         /*
 3191          * No default: or deny here.
 3192          * In case of no permit fall through to next switch().
 3193          */
 3194                 if (cred->cr_prison->pr_flags & PR_VNET)
 3195                         return (0);
 3196         }
 3197 #endif /* VIMAGE */
 3198 
 3199         switch (priv) {
 3200 
 3201                 /*
 3202                  * Allow ktrace privileges for root in jail.
 3203                  */
 3204         case PRIV_KTRACE:
 3205 
 3206 #if 0
 3207                 /*
 3208                  * Allow jailed processes to configure audit identity and
 3209                  * submit audit records (login, etc).  In the future we may
 3210                  * want to further refine the relationship between audit and
 3211                  * jail.
 3212                  */
 3213         case PRIV_AUDIT_GETAUDIT:
 3214         case PRIV_AUDIT_SETAUDIT:
 3215         case PRIV_AUDIT_SUBMIT:
 3216 #endif
 3217 
 3218                 /*
 3219                  * Allow jailed processes to manipulate process UNIX
 3220                  * credentials in any way they see fit.
 3221                  */
 3222         case PRIV_CRED_SETUID:
 3223         case PRIV_CRED_SETEUID:
 3224         case PRIV_CRED_SETGID:
 3225         case PRIV_CRED_SETEGID:
 3226         case PRIV_CRED_SETGROUPS:
 3227         case PRIV_CRED_SETREUID:
 3228         case PRIV_CRED_SETREGID:
 3229         case PRIV_CRED_SETRESUID:
 3230         case PRIV_CRED_SETRESGID:
 3231 
 3232                 /*
 3233                  * Jail implements visibility constraints already, so allow
 3234                  * jailed root to override uid/gid-based constraints.
 3235                  */
 3236         case PRIV_SEEOTHERGIDS:
 3237         case PRIV_SEEOTHERUIDS:
 3238 
 3239                 /*
 3240                  * Jail implements inter-process debugging limits already, so
 3241                  * allow jailed root various debugging privileges.
 3242                  */
 3243         case PRIV_DEBUG_DIFFCRED:
 3244         case PRIV_DEBUG_SUGID:
 3245         case PRIV_DEBUG_UNPRIV:
 3246 
 3247                 /*
 3248                  * Allow jail to set various resource limits and login
 3249                  * properties, and for now, exceed process resource limits.
 3250                  */
 3251         case PRIV_PROC_LIMIT:
 3252         case PRIV_PROC_SETLOGIN:
 3253         case PRIV_PROC_SETRLIMIT:
 3254 
 3255                 /*
 3256                  * System V and POSIX IPC privileges are granted in jail.
 3257                  */
 3258         case PRIV_IPC_READ:
 3259         case PRIV_IPC_WRITE:
 3260         case PRIV_IPC_ADMIN:
 3261         case PRIV_IPC_MSGSIZE:
 3262         case PRIV_MQ_ADMIN:
 3263 
 3264                 /*
 3265                  * Jail operations within a jail work on child jails.
 3266                  */
 3267         case PRIV_JAIL_ATTACH:
 3268         case PRIV_JAIL_SET:
 3269         case PRIV_JAIL_REMOVE:
 3270 
 3271                 /*
 3272                  * Jail implements its own inter-process limits, so allow
 3273                  * root processes in jail to change scheduling on other
 3274                  * processes in the same jail.  Likewise for signalling.
 3275                  */
 3276         case PRIV_SCHED_DIFFCRED:
 3277         case PRIV_SCHED_CPUSET:
 3278         case PRIV_SIGNAL_DIFFCRED:
 3279         case PRIV_SIGNAL_SUGID:
 3280 
 3281                 /*
 3282                  * Allow jailed processes to write to sysctls marked as jail
 3283                  * writable.
 3284                  */
 3285         case PRIV_SYSCTL_WRITEJAIL:
 3286 
 3287                 /*
 3288                  * Allow root in jail to manage a variety of quota
 3289                  * properties.  These should likely be conditional on a
 3290                  * configuration option.
 3291                  */
 3292         case PRIV_VFS_GETQUOTA:
 3293         case PRIV_VFS_SETQUOTA:
 3294 
 3295                 /*
 3296                  * Since Jail relies on chroot() to implement file system
 3297                  * protections, grant many VFS privileges to root in jail.
 3298                  * Be careful to exclude mount-related and NFS-related
 3299                  * privileges.
 3300                  */
 3301         case PRIV_VFS_READ:
 3302         case PRIV_VFS_WRITE:
 3303         case PRIV_VFS_ADMIN:
 3304         case PRIV_VFS_EXEC:
 3305         case PRIV_VFS_LOOKUP:
 3306         case PRIV_VFS_BLOCKRESERVE:     /* XXXRW: Slightly surprising. */
 3307         case PRIV_VFS_CHFLAGS_DEV:
 3308         case PRIV_VFS_CHOWN:
 3309         case PRIV_VFS_CHROOT:
 3310         case PRIV_VFS_RETAINSUGID:
 3311         case PRIV_VFS_FCHROOT:
 3312         case PRIV_VFS_LINK:
 3313         case PRIV_VFS_SETGID:
 3314         case PRIV_VFS_STAT:
 3315         case PRIV_VFS_STICKYFILE:
 3316 
 3317                 /*
 3318                  * As in the non-jail case, non-root users are expected to be
 3319                  * able to read kernel/phyiscal memory (provided /dev/[k]mem
 3320                  * exists in the jail and they have permission to access it).
 3321                  */
 3322         case PRIV_KMEM_READ:
 3323                 return (0);
 3324 
 3325                 /*
 3326                  * Depending on the global setting, allow privilege of
 3327                  * setting system flags.
 3328                  */
 3329         case PRIV_VFS_SYSFLAGS:
 3330                 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
 3331                         return (0);
 3332                 else
 3333                         return (EPERM);
 3334 
 3335                 /*
 3336                  * Depending on the global setting, allow privilege of
 3337                  * mounting/unmounting file systems.
 3338                  */
 3339         case PRIV_VFS_MOUNT:
 3340         case PRIV_VFS_UNMOUNT:
 3341         case PRIV_VFS_MOUNT_NONUSER:
 3342         case PRIV_VFS_MOUNT_OWNER:
 3343                 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT &&
 3344                     cred->cr_prison->pr_enforce_statfs < 2)
 3345                         return (0);
 3346                 else
 3347                         return (EPERM);
 3348 
 3349                 /*
 3350                  * Allow jailed root to bind reserved ports and reuse in-use
 3351                  * ports.
 3352                  */
 3353         case PRIV_NETINET_RESERVEDPORT:
 3354         case PRIV_NETINET_REUSEPORT:
 3355                 return (0);
 3356 
 3357                 /*
 3358                  * Allow jailed root to set certain IPv4/6 (option) headers.
 3359                  */
 3360         case PRIV_NETINET_SETHDROPTS:
 3361                 return (0);
 3362 
 3363                 /*
 3364                  * Conditionally allow creating raw sockets in jail.
 3365                  */
 3366         case PRIV_NETINET_RAW:
 3367                 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
 3368                         return (0);
 3369                 else
 3370                         return (EPERM);
 3371 
 3372                 /*
 3373                  * Since jail implements its own visibility limits on netstat
 3374                  * sysctls, allow getcred.  This allows identd to work in
 3375                  * jail.
 3376                  */
 3377         case PRIV_NETINET_GETCRED:
 3378                 return (0);
 3379 
 3380                 /*
 3381                  * Allow jailed root to set loginclass.
 3382                  */
 3383         case PRIV_PROC_SETLOGINCLASS:
 3384                 return (0);
 3385 
 3386                 /*
 3387                  * Do not allow a process inside a jail to read the kernel
 3388                  * message buffer unless explicitly permitted.
 3389                  */
 3390         case PRIV_MSGBUF:
 3391                 if (cred->cr_prison->pr_allow & PR_ALLOW_READ_MSGBUF)
 3392                         return (0);
 3393                 return (EPERM);
 3394 
 3395         default:
 3396                 /*
 3397                  * In all remaining cases, deny the privilege request.  This
 3398                  * includes almost all network privileges, many system
 3399                  * configuration privileges.
 3400                  */
 3401                 return (EPERM);
 3402         }
 3403 }
 3404 
 3405 /*
 3406  * Return the part of pr2's name that is relative to pr1, or the whole name
 3407  * if it does not directly follow.
 3408  */
 3409 
 3410 char *
 3411 prison_name(struct prison *pr1, struct prison *pr2)
 3412 {
 3413         char *name;
 3414 
 3415         /* Jails see themselves as "" (if they see themselves at all). */
 3416         if (pr1 == pr2)
 3417                 return "";
 3418         name = pr2->pr_name;
 3419         if (prison_ischild(pr1, pr2)) {
 3420                 /*
 3421                  * pr1 isn't locked (and allprison_lock may not be either)
 3422                  * so its length can't be counted on.  But the number of dots
 3423                  * can be counted on - and counted.
 3424                  */
 3425                 for (; pr1 != &prison0; pr1 = pr1->pr_parent)
 3426                         name = strchr(name, '.') + 1;
 3427         }
 3428         return (name);
 3429 }
 3430 
 3431 /*
 3432  * Return the part of pr2's path that is relative to pr1, or the whole path
 3433  * if it does not directly follow.
 3434  */
 3435 static char *
 3436 prison_path(struct prison *pr1, struct prison *pr2)
 3437 {
 3438         char *path1, *path2;
 3439         int len1;
 3440 
 3441         path1 = pr1->pr_path;
 3442         path2 = pr2->pr_path;
 3443         if (!strcmp(path1, "/"))
 3444                 return (path2);
 3445         len1 = strlen(path1);
 3446         if (strncmp(path1, path2, len1))
 3447                 return (path2);
 3448         if (path2[len1] == '\0')
 3449                 return "/";
 3450         if (path2[len1] == '/')
 3451                 return (path2 + len1);
 3452         return (path2);
 3453 }
 3454 
 3455 
 3456 /*
 3457  * Jail-related sysctls.
 3458  */
 3459 static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
 3460     "Jails");
 3461 
 3462 static int
 3463 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 3464 {
 3465         struct xprison *xp;
 3466         struct prison *pr, *cpr;
 3467 #ifdef INET
 3468         struct in_addr *ip4 = NULL;
 3469         int ip4s = 0;
 3470 #endif
 3471 #ifdef INET6
 3472         struct in6_addr *ip6 = NULL;
 3473         int ip6s = 0;
 3474 #endif
 3475         int descend, error;
 3476 
 3477         xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
 3478         pr = req->td->td_ucred->cr_prison;
 3479         error = 0;
 3480         sx_slock(&allprison_lock);
 3481         FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 3482 #if defined(INET) || defined(INET6)
 3483  again:
 3484 #endif
 3485                 mtx_lock(&cpr->pr_mtx);
 3486 #ifdef INET
 3487                 if (cpr->pr_ip4s > 0) {
 3488                         if (ip4s < cpr->pr_ip4s) {
 3489                                 ip4s = cpr->pr_ip4s;
 3490                                 mtx_unlock(&cpr->pr_mtx);
 3491                                 ip4 = realloc(ip4, ip4s *
 3492                                     sizeof(struct in_addr), M_TEMP, M_WAITOK);
 3493                                 goto again;
 3494                         }
 3495                         bcopy(cpr->pr_ip4, ip4,
 3496                             cpr->pr_ip4s * sizeof(struct in_addr));
 3497                 }
 3498 #endif
 3499 #ifdef INET6
 3500                 if (cpr->pr_ip6s > 0) {
 3501                         if (ip6s < cpr->pr_ip6s) {
 3502                                 ip6s = cpr->pr_ip6s;
 3503                                 mtx_unlock(&cpr->pr_mtx);
 3504                                 ip6 = realloc(ip6, ip6s *
 3505                                     sizeof(struct in6_addr), M_TEMP, M_WAITOK);
 3506                                 goto again;
 3507                         }
 3508                         bcopy(cpr->pr_ip6, ip6,
 3509                             cpr->pr_ip6s * sizeof(struct in6_addr));
 3510                 }
 3511 #endif
 3512                 if (cpr->pr_ref == 0) {
 3513                         mtx_unlock(&cpr->pr_mtx);
 3514                         continue;
 3515                 }
 3516                 bzero(xp, sizeof(*xp));
 3517                 xp->pr_version = XPRISON_VERSION;
 3518                 xp->pr_id = cpr->pr_id;
 3519                 xp->pr_state = cpr->pr_uref > 0
 3520                     ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
 3521                 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
 3522                 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
 3523                 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
 3524 #ifdef INET
 3525                 xp->pr_ip4s = cpr->pr_ip4s;
 3526 #endif
 3527 #ifdef INET6
 3528                 xp->pr_ip6s = cpr->pr_ip6s;
 3529 #endif
 3530                 mtx_unlock(&cpr->pr_mtx);
 3531                 error = SYSCTL_OUT(req, xp, sizeof(*xp));
 3532                 if (error)
 3533                         break;
 3534 #ifdef INET
 3535                 if (xp->pr_ip4s > 0) {
 3536                         error = SYSCTL_OUT(req, ip4,
 3537                             xp->pr_ip4s * sizeof(struct in_addr));
 3538                         if (error)
 3539                                 break;
 3540                 }
 3541 #endif
 3542 #ifdef INET6
 3543                 if (xp->pr_ip6s > 0) {
 3544                         error = SYSCTL_OUT(req, ip6,
 3545                             xp->pr_ip6s * sizeof(struct in6_addr));
 3546                         if (error)
 3547                                 break;
 3548                 }
 3549 #endif
 3550         }
 3551         sx_sunlock(&allprison_lock);
 3552         free(xp, M_TEMP);
 3553 #ifdef INET
 3554         free(ip4, M_TEMP);
 3555 #endif
 3556 #ifdef INET6
 3557         free(ip6, M_TEMP);
 3558 #endif
 3559         return (error);
 3560 }
 3561 
 3562 SYSCTL_OID(_security_jail, OID_AUTO, list,
 3563     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 3564     sysctl_jail_list, "S", "List of active jails");
 3565 
 3566 static int
 3567 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
 3568 {
 3569         int error, injail;
 3570 
 3571         injail = jailed(req->td->td_ucred);
 3572         error = SYSCTL_OUT(req, &injail, sizeof(injail));
 3573 
 3574         return (error);
 3575 }
 3576 
 3577 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
 3578     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 3579     sysctl_jail_jailed, "I", "Process in jail?");
 3580 
 3581 static int
 3582 sysctl_jail_vnet(SYSCTL_HANDLER_ARGS)
 3583 {
 3584         int error, havevnet;
 3585 #ifdef VIMAGE
 3586         struct ucred *cred = req->td->td_ucred;
 3587 
 3588         havevnet = jailed(cred) && prison_owns_vnet(cred);
 3589 #else
 3590         havevnet = 0;
 3591 #endif
 3592         error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet));
 3593 
 3594         return (error);
 3595 }
 3596 
 3597 SYSCTL_PROC(_security_jail, OID_AUTO, vnet,
 3598     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 3599     sysctl_jail_vnet, "I", "Jail owns VNET?");
 3600 
 3601 #if defined(INET) || defined(INET6)
 3602 SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
 3603     &jail_max_af_ips, 0,
 3604     "Number of IP addresses a jail may have at most per address family (deprecated)");
 3605 #endif
 3606 
 3607 /*
 3608  * Default parameters for jail(2) compatibility.  For historical reasons,
 3609  * the sysctl names have varying similarity to the parameter names.  Prisons
 3610  * just see their own parameters, and can't change them.
 3611  */
 3612 static int
 3613 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
 3614 {
 3615         struct prison *pr;
 3616         int allow, error, i;
 3617 
 3618         pr = req->td->td_ucred->cr_prison;
 3619         allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow;
 3620 
 3621         /* Get the current flag value, and convert it to a boolean. */
 3622         i = (allow & arg2) ? 1 : 0;
 3623         if (arg1 != NULL)
 3624                 i = !i;
 3625         error = sysctl_handle_int(oidp, &i, 0, req);
 3626         if (error || !req->newptr)
 3627                 return (error);
 3628         i = i ? arg2 : 0;
 3629         if (arg1 != NULL)
 3630                 i ^= arg2;
 3631         /*
 3632          * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
 3633          * for writing.
 3634          */
 3635         mtx_lock(&prison0.pr_mtx);
 3636         jail_default_allow = (jail_default_allow & ~arg2) | i;
 3637         mtx_unlock(&prison0.pr_mtx);
 3638         return (0);
 3639 }
 3640 
 3641 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
 3642     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3643     NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
 3644     "Processes in jail can set their hostnames (deprecated)");
 3645 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
 3646     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3647     (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
 3648     "Processes in jail are limited to creating UNIX/IP/route sockets only (deprecated)");
 3649 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
 3650     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3651     NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
 3652     "Processes in jail can use System V IPC primitives (deprecated)");
 3653 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
 3654     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3655     NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
 3656     "Prison root can create raw sockets (deprecated)");
 3657 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
 3658     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3659     NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
 3660     "Processes in jail can alter system file flags (deprecated)");
 3661 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
 3662     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3663     NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
 3664     "Processes in jail can mount/unmount jail-friendly file systems (deprecated)");
 3665 SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed,
 3666     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3667     NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I",
 3668     "Processes in jail can mount the devfs file system (deprecated)");
 3669 SYSCTL_PROC(_security_jail, OID_AUTO, mount_fdescfs_allowed,
 3670     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3671     NULL, PR_ALLOW_MOUNT_FDESCFS, sysctl_jail_default_allow, "I",
 3672     "Processes in jail can mount the fdescfs file system (deprecated)");
 3673 SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed,
 3674     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3675     NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I",
 3676     "Processes in jail can mount the nullfs file system (deprecated)");
 3677 SYSCTL_PROC(_security_jail, OID_AUTO, mount_procfs_allowed,
 3678     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3679     NULL, PR_ALLOW_MOUNT_PROCFS, sysctl_jail_default_allow, "I",
 3680     "Processes in jail can mount the procfs file system (deprecated)");
 3681 SYSCTL_PROC(_security_jail, OID_AUTO, mount_linprocfs_allowed,
 3682     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3683     NULL, PR_ALLOW_MOUNT_LINPROCFS, sysctl_jail_default_allow, "I",
 3684     "Processes in jail can mount the linprocfs file system (deprecated)");
 3685 SYSCTL_PROC(_security_jail, OID_AUTO, mount_linsysfs_allowed,
 3686     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3687     NULL, PR_ALLOW_MOUNT_LINSYSFS, sysctl_jail_default_allow, "I",
 3688     "Processes in jail can mount the linsysfs file system (deprecated)");
 3689 SYSCTL_PROC(_security_jail, OID_AUTO, mount_tmpfs_allowed,
 3690     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3691     NULL, PR_ALLOW_MOUNT_TMPFS, sysctl_jail_default_allow, "I",
 3692     "Processes in jail can mount the tmpfs file system (deprecated)");
 3693 SYSCTL_PROC(_security_jail, OID_AUTO, mount_zfs_allowed,
 3694     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3695     NULL, PR_ALLOW_MOUNT_ZFS, sysctl_jail_default_allow, "I",
 3696     "Processes in jail can mount the zfs file system (deprecated)");
 3697 
 3698 static int
 3699 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
 3700 {
 3701         struct prison *pr;
 3702         int level, error;
 3703 
 3704         pr = req->td->td_ucred->cr_prison;
 3705         level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
 3706         error = sysctl_handle_int(oidp, &level, 0, req);
 3707         if (error || !req->newptr)
 3708                 return (error);
 3709         *(int *)arg1 = level;
 3710         return (0);
 3711 }
 3712 
 3713 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
 3714     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 3715     &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
 3716     sysctl_jail_default_level, "I",
 3717     "Processes in jail cannot see all mounted file systems (deprecated)");
 3718 
 3719 SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset,
 3720     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 3721     &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum),
 3722     sysctl_jail_default_level, "I",
 3723     "Ruleset for the devfs filesystem in jail (deprecated)");
 3724 
 3725 /*
 3726  * Nodes to describe jail parameters.  Maximum length of string parameters
 3727  * is returned in the string itself, and the other parameters exist merely
 3728  * to make themselves and their types known.
 3729  */
 3730 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0,
 3731     "Jail parameters");
 3732 
 3733 int
 3734 sysctl_jail_param(SYSCTL_HANDLER_ARGS)
 3735 {
 3736         int i;
 3737         long l;
 3738         size_t s;
 3739         char numbuf[12];
 3740 
 3741         switch (oidp->oid_kind & CTLTYPE)
 3742         {
 3743         case CTLTYPE_LONG:
 3744         case CTLTYPE_ULONG:
 3745                 l = 0;
 3746 #ifdef SCTL_MASK32
 3747                 if (!(req->flags & SCTL_MASK32))
 3748 #endif
 3749                         return (SYSCTL_OUT(req, &l, sizeof(l)));
 3750         case CTLTYPE_INT:
 3751         case CTLTYPE_UINT:
 3752                 i = 0;
 3753                 return (SYSCTL_OUT(req, &i, sizeof(i)));
 3754         case CTLTYPE_STRING:
 3755                 snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2);
 3756                 return
 3757                     (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
 3758         case CTLTYPE_STRUCT:
 3759                 s = (size_t)arg2;
 3760                 return (SYSCTL_OUT(req, &s, sizeof(s)));
 3761         }
 3762         return (0);
 3763 }
 3764 
 3765 /*
 3766  * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at
 3767  * jail creation time but cannot be changed in an existing jail.
 3768  */
 3769 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
 3770 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
 3771 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
 3772 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
 3773 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
 3774     "I", "Jail secure level");
 3775 SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", 
 3776     "Jail value for kern.osreldate and uname -K");
 3777 SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, 
 3778     "Jail value for kern.osrelease and uname -r");
 3779 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
 3780     "I", "Jail cannot see all mounted file systems");
 3781 SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW,
 3782     "I", "Ruleset for in-jail devfs mounts");
 3783 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
 3784     "B", "Jail persistence");
 3785 #ifdef VIMAGE
 3786 SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
 3787     "E,jailsys", "Virtual network stack");
 3788 #endif
 3789 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
 3790     "B", "Jail is in the process of shutting down");
 3791 
 3792 SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
 3793 SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
 3794     "I", "Current number of child jails");
 3795 SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
 3796     "I", "Maximum number of child jails");
 3797 
 3798 SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
 3799 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
 3800     "Jail hostname");
 3801 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
 3802     "Jail NIS domainname");
 3803 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
 3804     "Jail host UUID");
 3805 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
 3806     "LU", "Jail host ID");
 3807 
 3808 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
 3809 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
 3810 
 3811 #ifdef INET
 3812 SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
 3813     "Jail IPv4 address virtualization");
 3814 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
 3815     "S,in_addr,a", "Jail IPv4 addresses");
 3816 SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 3817     "B", "Do (not) use IPv4 source address selection rather than the "
 3818     "primary jail IPv4 address.");
 3819 #endif
 3820 #ifdef INET6
 3821 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
 3822     "Jail IPv6 address virtualization");
 3823 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
 3824     "S,in6_addr,a", "Jail IPv6 addresses");
 3825 SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 3826     "B", "Do (not) use IPv6 source address selection rather than the "
 3827     "primary jail IPv6 address.");
 3828 #endif
 3829 
 3830 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
 3831 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
 3832     "B", "Jail may set hostname");
 3833 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
 3834     "B", "Jail may use SYSV IPC");
 3835 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
 3836     "B", "Jail may create raw sockets");
 3837 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
 3838     "B", "Jail may alter system file flags");
 3839 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
 3840     "B", "Jail may set file quotas");
 3841 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
 3842     "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
 3843 SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW,
 3844     "B", "Jail may read the kernel message buffer");
 3845 
 3846 SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags");
 3847 SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW,
 3848     "B", "Jail may mount/unmount jail-friendly file systems in general");
 3849 SYSCTL_JAIL_PARAM(_allow_mount, devfs, CTLTYPE_INT | CTLFLAG_RW,
 3850     "B", "Jail may mount the devfs file system");
 3851 SYSCTL_JAIL_PARAM(_allow_mount, fdescfs, CTLTYPE_INT | CTLFLAG_RW,
 3852     "B", "Jail may mount the fdescfs file system");
 3853 SYSCTL_JAIL_PARAM(_allow_mount, nullfs, CTLTYPE_INT | CTLFLAG_RW,
 3854     "B", "Jail may mount the nullfs file system");
 3855 SYSCTL_JAIL_PARAM(_allow_mount, procfs, CTLTYPE_INT | CTLFLAG_RW,
 3856     "B", "Jail may mount the procfs file system");
 3857 SYSCTL_JAIL_PARAM(_allow_mount, linprocfs, CTLTYPE_INT | CTLFLAG_RW,
 3858     "B", "Jail may mount the linprocfs file system");
 3859 SYSCTL_JAIL_PARAM(_allow_mount, linsysfs, CTLTYPE_INT | CTLFLAG_RW,
 3860     "B", "Jail may mount the linsysfs file system");
 3861 SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW,
 3862     "B", "Jail may mount the tmpfs file system");
 3863 SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW,
 3864     "B", "Jail may mount the zfs file system");
 3865 
 3866 #ifdef RACCT
 3867 void
 3868 prison_racct_foreach(void (*callback)(struct racct *racct,
 3869     void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
 3870     void *arg2, void *arg3)
 3871 {
 3872         struct prison_racct *prr;
 3873 
 3874         ASSERT_RACCT_ENABLED();
 3875 
 3876         sx_slock(&allprison_lock);
 3877         if (pre != NULL)
 3878                 (pre)();
 3879         LIST_FOREACH(prr, &allprison_racct, prr_next)
 3880                 (callback)(prr->prr_racct, arg2, arg3);
 3881         if (post != NULL)
 3882                 (post)();
 3883         sx_sunlock(&allprison_lock);
 3884 }
 3885 
 3886 static struct prison_racct *
 3887 prison_racct_find_locked(const char *name)
 3888 {
 3889         struct prison_racct *prr;
 3890 
 3891         ASSERT_RACCT_ENABLED();
 3892         sx_assert(&allprison_lock, SA_XLOCKED);
 3893 
 3894         if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN)
 3895                 return (NULL);
 3896 
 3897         LIST_FOREACH(prr, &allprison_racct, prr_next) {
 3898                 if (strcmp(name, prr->prr_name) != 0)
 3899                         continue;
 3900 
 3901                 /* Found prison_racct with a matching name? */
 3902                 prison_racct_hold(prr);
 3903                 return (prr);
 3904         }
 3905 
 3906         /* Add new prison_racct. */
 3907         prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK);
 3908         racct_create(&prr->prr_racct);
 3909 
 3910         strcpy(prr->prr_name, name);
 3911         refcount_init(&prr->prr_refcount, 1);
 3912         LIST_INSERT_HEAD(&allprison_racct, prr, prr_next);
 3913 
 3914         return (prr);
 3915 }
 3916 
 3917 struct prison_racct *
 3918 prison_racct_find(const char *name)
 3919 {
 3920         struct prison_racct *prr;
 3921 
 3922         ASSERT_RACCT_ENABLED();
 3923 
 3924         sx_xlock(&allprison_lock);
 3925         prr = prison_racct_find_locked(name);
 3926         sx_xunlock(&allprison_lock);
 3927         return (prr);
 3928 }
 3929 
 3930 void
 3931 prison_racct_hold(struct prison_racct *prr)
 3932 {
 3933 
 3934         ASSERT_RACCT_ENABLED();
 3935 
 3936         refcount_acquire(&prr->prr_refcount);
 3937 }
 3938 
 3939 static void
 3940 prison_racct_free_locked(struct prison_racct *prr)
 3941 {
 3942 
 3943         ASSERT_RACCT_ENABLED();
 3944         sx_assert(&allprison_lock, SA_XLOCKED);
 3945 
 3946         if (refcount_release(&prr->prr_refcount)) {
 3947                 racct_destroy(&prr->prr_racct);
 3948                 LIST_REMOVE(prr, prr_next);
 3949                 free(prr, M_PRISON_RACCT);
 3950         }
 3951 }
 3952 
 3953 void
 3954 prison_racct_free(struct prison_racct *prr)
 3955 {
 3956         int old;
 3957 
 3958         ASSERT_RACCT_ENABLED();
 3959         sx_assert(&allprison_lock, SA_UNLOCKED);
 3960 
 3961         old = prr->prr_refcount;
 3962         if (old > 1 && atomic_cmpset_int(&prr->prr_refcount, old, old - 1))
 3963                 return;
 3964 
 3965         sx_xlock(&allprison_lock);
 3966         prison_racct_free_locked(prr);
 3967         sx_xunlock(&allprison_lock);
 3968 }
 3969 
 3970 static void
 3971 prison_racct_attach(struct prison *pr)
 3972 {
 3973         struct prison_racct *prr;
 3974 
 3975         ASSERT_RACCT_ENABLED();
 3976         sx_assert(&allprison_lock, SA_XLOCKED);
 3977 
 3978         prr = prison_racct_find_locked(pr->pr_name);
 3979         KASSERT(prr != NULL, ("cannot find prison_racct"));
 3980 
 3981         pr->pr_prison_racct = prr;
 3982 }
 3983 
 3984 /*
 3985  * Handle jail renaming.  From the racct point of view, renaming means
 3986  * moving from one prison_racct to another.
 3987  */
 3988 static void
 3989 prison_racct_modify(struct prison *pr)
 3990 {
 3991 #ifdef RCTL
 3992         struct proc *p;
 3993         struct ucred *cred;
 3994 #endif
 3995         struct prison_racct *oldprr;
 3996 
 3997         ASSERT_RACCT_ENABLED();
 3998 
 3999         sx_slock(&allproc_lock);
 4000         sx_xlock(&allprison_lock);
 4001 
 4002         if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) {
 4003                 sx_xunlock(&allprison_lock);
 4004                 sx_sunlock(&allproc_lock);
 4005                 return;
 4006         }
 4007 
 4008         oldprr = pr->pr_prison_racct;
 4009         pr->pr_prison_racct = NULL;
 4010 
 4011         prison_racct_attach(pr);
 4012 
 4013         /*
 4014          * Move resource utilisation records.
 4015          */
 4016         racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct);
 4017 
 4018 #ifdef RCTL
 4019         /*
 4020          * Force rctl to reattach rules to processes.
 4021          */
 4022         FOREACH_PROC_IN_SYSTEM(p) {
 4023                 PROC_LOCK(p);
 4024                 cred = crhold(p->p_ucred);
 4025                 PROC_UNLOCK(p);
 4026                 rctl_proc_ucred_changed(p, cred);
 4027                 crfree(cred);
 4028         }
 4029 #endif
 4030 
 4031         sx_sunlock(&allproc_lock);
 4032         prison_racct_free_locked(oldprr);
 4033         sx_xunlock(&allprison_lock);
 4034 }
 4035 
 4036 static void
 4037 prison_racct_detach(struct prison *pr)
 4038 {
 4039 
 4040         ASSERT_RACCT_ENABLED();
 4041         sx_assert(&allprison_lock, SA_UNLOCKED);
 4042 
 4043         if (pr->pr_prison_racct == NULL)
 4044                 return;
 4045         prison_racct_free(pr->pr_prison_racct);
 4046         pr->pr_prison_racct = NULL;
 4047 }
 4048 #endif /* RACCT */
 4049 
 4050 #ifdef DDB
 4051 
 4052 static void
 4053 db_show_prison(struct prison *pr)
 4054 {
 4055         int fi;
 4056 #if defined(INET) || defined(INET6)
 4057         int ii;
 4058 #endif
 4059         unsigned jsf;
 4060 #ifdef INET
 4061         char ip4buf[INET_ADDRSTRLEN];
 4062 #endif
 4063 #ifdef INET6
 4064         char ip6buf[INET6_ADDRSTRLEN];
 4065 #endif
 4066 
 4067         db_printf("prison %p:\n", pr);
 4068         db_printf(" jid             = %d\n", pr->pr_id);
 4069         db_printf(" name            = %s\n", pr->pr_name);
 4070         db_printf(" parent          = %p\n", pr->pr_parent);
 4071         db_printf(" ref             = %d\n", pr->pr_ref);
 4072         db_printf(" uref            = %d\n", pr->pr_uref);
 4073         db_printf(" path            = %s\n", pr->pr_path);
 4074         db_printf(" cpuset          = %d\n", pr->pr_cpuset
 4075             ? pr->pr_cpuset->cs_id : -1);
 4076 #ifdef VIMAGE
 4077         db_printf(" vnet            = %p\n", pr->pr_vnet);
 4078 #endif
 4079         db_printf(" root            = %p\n", pr->pr_root);
 4080         db_printf(" securelevel     = %d\n", pr->pr_securelevel);
 4081         db_printf(" devfs_rsnum     = %d\n", pr->pr_devfs_rsnum);
 4082         db_printf(" children.max    = %d\n", pr->pr_childmax);
 4083         db_printf(" children.cur    = %d\n", pr->pr_childcount);
 4084         db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
 4085         db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
 4086         db_printf(" flags           = 0x%x", pr->pr_flags);
 4087         for (fi = 0; fi < nitems(pr_flag_names); fi++)
 4088                 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi)))
 4089                         db_printf(" %s", pr_flag_names[fi]);
 4090         for (fi = 0; fi < nitems(pr_flag_jailsys); fi++) {
 4091                 jsf = pr->pr_flags &
 4092                     (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 4093                 db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name,
 4094                     pr_flag_jailsys[fi].disable && 
 4095                       (jsf == pr_flag_jailsys[fi].disable) ? "disable"
 4096                     : (jsf == pr_flag_jailsys[fi].new) ? "new"
 4097                     : "inherit");
 4098         }
 4099         db_printf(" allow           = 0x%x", pr->pr_allow);
 4100         for (fi = 0; fi < nitems(pr_allow_names); fi++)
 4101                 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi)))
 4102                         db_printf(" %s", pr_allow_names[fi]);
 4103         db_printf("\n");
 4104         db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
 4105         db_printf(" host.hostname   = %s\n", pr->pr_hostname);
 4106         db_printf(" host.domainname = %s\n", pr->pr_domainname);
 4107         db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
 4108         db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
 4109 #ifdef INET
 4110         db_printf(" ip4s            = %d\n", pr->pr_ip4s);
 4111         for (ii = 0; ii < pr->pr_ip4s; ii++)
 4112                 db_printf(" %s %s\n",
 4113                     ii == 0 ? "ip4.addr        =" : "                 ",
 4114                     inet_ntoa_r(pr->pr_ip4[ii], ip4buf));
 4115 #endif
 4116 #ifdef INET6
 4117         db_printf(" ip6s            = %d\n", pr->pr_ip6s);
 4118         for (ii = 0; ii < pr->pr_ip6s; ii++)
 4119                 db_printf(" %s %s\n",
 4120                     ii == 0 ? "ip6.addr        =" : "                 ",
 4121                     ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
 4122 #endif
 4123 }
 4124 
 4125 DB_SHOW_COMMAND(prison, db_show_prison_command)
 4126 {
 4127         struct prison *pr;
 4128 
 4129         if (!have_addr) {
 4130                 /*
 4131                  * Show all prisons in the list, and prison0 which is not
 4132                  * listed.
 4133                  */
 4134                 db_show_prison(&prison0);
 4135                 if (!db_pager_quit) {
 4136                         TAILQ_FOREACH(pr, &allprison, pr_list) {
 4137                                 db_show_prison(pr);
 4138                                 if (db_pager_quit)
 4139                                         break;
 4140                         }
 4141                 }
 4142                 return;
 4143         }
 4144 
 4145         if (addr == 0)
 4146                 pr = &prison0;
 4147         else {
 4148                 /* Look for a prison with the ID and with references. */
 4149                 TAILQ_FOREACH(pr, &allprison, pr_list)
 4150                         if (pr->pr_id == addr && pr->pr_ref > 0)
 4151                                 break;
 4152                 if (pr == NULL)
 4153                         /* Look again, without requiring a reference. */
 4154                         TAILQ_FOREACH(pr, &allprison, pr_list)
 4155                                 if (pr->pr_id == addr)
 4156                                         break;
 4157                 if (pr == NULL)
 4158                         /* Assume address points to a valid prison. */
 4159                         pr = (struct prison *)addr;
 4160         }
 4161         db_show_prison(pr);
 4162 }
 4163 
 4164 #endif /* DDB */

Cache object: b0b1c02313458736b2adceb6f8cc8fef


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.