The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999 Poul-Henning Kamp.
    3  * Copyright (c) 2008 Bjoern A. Zeeb.
    4  * Copyright (c) 2009 James Gritton.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD: releng/10.2/sys/kern/kern_jail.c 284665 2015-06-21 06:28:26Z trasz $");
   31 
   32 #include "opt_compat.h"
   33 #include "opt_ddb.h"
   34 #include "opt_inet.h"
   35 #include "opt_inet6.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/types.h>
   39 #include <sys/kernel.h>
   40 #include <sys/systm.h>
   41 #include <sys/errno.h>
   42 #include <sys/sysproto.h>
   43 #include <sys/malloc.h>
   44 #include <sys/osd.h>
   45 #include <sys/priv.h>
   46 #include <sys/proc.h>
   47 #include <sys/taskqueue.h>
   48 #include <sys/fcntl.h>
   49 #include <sys/jail.h>
   50 #include <sys/lock.h>
   51 #include <sys/mutex.h>
   52 #include <sys/racct.h>
   53 #include <sys/refcount.h>
   54 #include <sys/sx.h>
   55 #include <sys/sysent.h>
   56 #include <sys/namei.h>
   57 #include <sys/mount.h>
   58 #include <sys/queue.h>
   59 #include <sys/socket.h>
   60 #include <sys/syscallsubr.h>
   61 #include <sys/sysctl.h>
   62 #include <sys/vnode.h>
   63 
   64 #include <net/if.h>
   65 #include <net/vnet.h>
   66 
   67 #include <netinet/in.h>
   68 
   69 #ifdef DDB
   70 #include <ddb/ddb.h>
   71 #ifdef INET6
   72 #include <netinet6/in6_var.h>
   73 #endif /* INET6 */
   74 #endif /* DDB */
   75 
   76 #include <security/mac/mac_framework.h>
   77 
   78 #define DEFAULT_HOSTUUID        "00000000-0000-0000-0000-000000000000"
   79 
   80 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
   81 static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures");
   82 
   83 /* Keep struct prison prison0 and some code in kern_jail_set() readable. */
   84 #ifdef INET
   85 #ifdef INET6
   86 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL
   87 #else
   88 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL
   89 #endif
   90 #else /* !INET */
   91 #ifdef INET6
   92 #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL
   93 #else
   94 #define _PR_IP_SADDRSEL 0
   95 #endif
   96 #endif
   97 
   98 /* prison0 describes what is "real" about the system. */
   99 struct prison prison0 = {
  100         .pr_id          = 0,
  101         .pr_name        = "",
  102         .pr_ref         = 1,
  103         .pr_uref        = 1,
  104         .pr_path        = "/",
  105         .pr_securelevel = -1,
  106         .pr_devfs_rsnum = 0,
  107         .pr_childmax    = JAIL_MAX,
  108         .pr_hostuuid    = DEFAULT_HOSTUUID,
  109         .pr_children    = LIST_HEAD_INITIALIZER(prison0.pr_children),
  110 #ifdef VIMAGE
  111         .pr_flags       = PR_HOST|PR_VNET|_PR_IP_SADDRSEL,
  112 #else
  113         .pr_flags       = PR_HOST|_PR_IP_SADDRSEL,
  114 #endif
  115         .pr_allow       = PR_ALLOW_ALL,
  116 };
  117 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
  118 
  119 /* allprison, allprison_racct and lastprid are protected by allprison_lock. */
  120 struct  sx allprison_lock;
  121 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
  122 struct  prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
  123 LIST_HEAD(, prison_racct) allprison_racct;
  124 int     lastprid = 0;
  125 
  126 static int do_jail_attach(struct thread *td, struct prison *pr);
  127 static void prison_complete(void *context, int pending);
  128 static void prison_deref(struct prison *pr, int flags);
  129 static char *prison_path(struct prison *pr1, struct prison *pr2);
  130 static void prison_remove_one(struct prison *pr);
  131 #ifdef RACCT
  132 static void prison_racct_attach(struct prison *pr);
  133 static void prison_racct_modify(struct prison *pr);
  134 static void prison_racct_detach(struct prison *pr);
  135 #endif
  136 #ifdef INET
  137 static int _prison_check_ip4(struct prison *pr, struct in_addr *ia);
  138 static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4);
  139 #endif
  140 #ifdef INET6
  141 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6);
  142 static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6);
  143 #endif
  144 
  145 /* Flags for prison_deref */
  146 #define PD_DEREF        0x01
  147 #define PD_DEUREF       0x02
  148 #define PD_LOCKED       0x04
  149 #define PD_LIST_SLOCKED 0x08
  150 #define PD_LIST_XLOCKED 0x10
  151 
  152 /*
  153  * Parameter names corresponding to PR_* flag values.  Size values are for kvm
  154  * as we cannot figure out the size of a sparse array, or an array without a
  155  * terminating entry.
  156  */
  157 static char *pr_flag_names[] = {
  158         [0] = "persist",
  159 #ifdef INET
  160         [7] = "ip4.saddrsel",
  161 #endif
  162 #ifdef INET6
  163         [8] = "ip6.saddrsel",
  164 #endif
  165 };
  166 const size_t pr_flag_names_size = sizeof(pr_flag_names);
  167 
  168 static char *pr_flag_nonames[] = {
  169         [0] = "nopersist",
  170 #ifdef INET
  171         [7] = "ip4.nosaddrsel",
  172 #endif
  173 #ifdef INET6
  174         [8] = "ip6.nosaddrsel",
  175 #endif
  176 };
  177 const size_t pr_flag_nonames_size = sizeof(pr_flag_nonames);
  178 
  179 struct jailsys_flags {
  180         const char      *name;
  181         unsigned         disable;
  182         unsigned         new;
  183 } pr_flag_jailsys[] = {
  184         { "host", 0, PR_HOST },
  185 #ifdef VIMAGE
  186         { "vnet", 0, PR_VNET },
  187 #endif
  188 #ifdef INET
  189         { "ip4", PR_IP4_USER | PR_IP4_DISABLE, PR_IP4_USER },
  190 #endif
  191 #ifdef INET6
  192         { "ip6", PR_IP6_USER | PR_IP6_DISABLE, PR_IP6_USER },
  193 #endif
  194 };
  195 const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys);
  196 
  197 static char *pr_allow_names[] = {
  198         "allow.set_hostname",
  199         "allow.sysvipc",
  200         "allow.raw_sockets",
  201         "allow.chflags",
  202         "allow.mount",
  203         "allow.quotas",
  204         "allow.socket_af",
  205         "allow.mount.devfs",
  206         "allow.mount.nullfs",
  207         "allow.mount.zfs",
  208         "allow.mount.procfs",
  209         "allow.mount.tmpfs",
  210         "allow.mount.fdescfs",
  211 };
  212 const size_t pr_allow_names_size = sizeof(pr_allow_names);
  213 
  214 static char *pr_allow_nonames[] = {
  215         "allow.noset_hostname",
  216         "allow.nosysvipc",
  217         "allow.noraw_sockets",
  218         "allow.nochflags",
  219         "allow.nomount",
  220         "allow.noquotas",
  221         "allow.nosocket_af",
  222         "allow.mount.nodevfs",
  223         "allow.mount.nonullfs",
  224         "allow.mount.nozfs",
  225         "allow.mount.noprocfs",
  226         "allow.mount.notmpfs",
  227         "allow.mount.nofdescfs",
  228 };
  229 const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames);
  230 
  231 #define JAIL_DEFAULT_ALLOW              PR_ALLOW_SET_HOSTNAME
  232 #define JAIL_DEFAULT_ENFORCE_STATFS     2
  233 #define JAIL_DEFAULT_DEVFS_RSNUM        0
  234 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
  235 static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
  236 static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM;
  237 #if defined(INET) || defined(INET6)
  238 static unsigned jail_max_af_ips = 255;
  239 #endif
  240 
  241 /*
  242  * Initialize the parts of prison0 that can't be static-initialized with
  243  * constants.  This is called from proc0_init() after creating thread0 cpuset.
  244  */
  245 void
  246 prison0_init(void)
  247 {
  248 
  249         prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset);
  250         prison0.pr_osreldate = osreldate;
  251         strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease));
  252 }
  253 
  254 #ifdef INET
  255 static int
  256 qcmp_v4(const void *ip1, const void *ip2)
  257 {
  258         in_addr_t iaa, iab;
  259 
  260         /*
  261          * We need to compare in HBO here to get the list sorted as expected
  262          * by the result of the code.  Sorting NBO addresses gives you
  263          * interesting results.  If you do not understand, do not try.
  264          */
  265         iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
  266         iab = ntohl(((const struct in_addr *)ip2)->s_addr);
  267 
  268         /*
  269          * Do not simply return the difference of the two numbers, the int is
  270          * not wide enough.
  271          */
  272         if (iaa > iab)
  273                 return (1);
  274         else if (iaa < iab)
  275                 return (-1);
  276         else
  277                 return (0);
  278 }
  279 #endif
  280 
  281 #ifdef INET6
  282 static int
  283 qcmp_v6(const void *ip1, const void *ip2)
  284 {
  285         const struct in6_addr *ia6a, *ia6b;
  286         int i, rc;
  287 
  288         ia6a = (const struct in6_addr *)ip1;
  289         ia6b = (const struct in6_addr *)ip2;
  290 
  291         rc = 0;
  292         for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) {
  293                 if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
  294                         rc = 1;
  295                 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
  296                         rc = -1;
  297         }
  298         return (rc);
  299 }
  300 #endif
  301 
  302 /*
  303  * struct jail_args {
  304  *      struct jail *jail;
  305  * };
  306  */
  307 int
  308 sys_jail(struct thread *td, struct jail_args *uap)
  309 {
  310         uint32_t version;
  311         int error;
  312         struct jail j;
  313 
  314         error = copyin(uap->jail, &version, sizeof(uint32_t));
  315         if (error)
  316                 return (error);
  317 
  318         switch (version) {
  319         case 0:
  320         {
  321                 struct jail_v0 j0;
  322 
  323                 /* FreeBSD single IPv4 jails. */
  324                 bzero(&j, sizeof(struct jail));
  325                 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
  326                 if (error)
  327                         return (error);
  328                 j.version = j0.version;
  329                 j.path = j0.path;
  330                 j.hostname = j0.hostname;
  331                 j.ip4s = htonl(j0.ip_number);   /* jail_v0 is host order */
  332                 break;
  333         }
  334 
  335         case 1:
  336                 /*
  337                  * Version 1 was used by multi-IPv4 jail implementations
  338                  * that never made it into the official kernel.
  339                  */
  340                 return (EINVAL);
  341 
  342         case 2: /* JAIL_API_VERSION */
  343                 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
  344                 error = copyin(uap->jail, &j, sizeof(struct jail));
  345                 if (error)
  346                         return (error);
  347                 break;
  348 
  349         default:
  350                 /* Sci-Fi jails are not supported, sorry. */
  351                 return (EINVAL);
  352         }
  353         return (kern_jail(td, &j));
  354 }
  355 
  356 int
  357 kern_jail(struct thread *td, struct jail *j)
  358 {
  359         struct iovec optiov[2 * (4
  360                             + sizeof(pr_allow_names) / sizeof(pr_allow_names[0])
  361 #ifdef INET
  362                             + 1
  363 #endif
  364 #ifdef INET6
  365                             + 1
  366 #endif
  367                             )];
  368         struct uio opt;
  369         char *u_path, *u_hostname, *u_name;
  370 #ifdef INET
  371         uint32_t ip4s;
  372         struct in_addr *u_ip4;
  373 #endif
  374 #ifdef INET6
  375         struct in6_addr *u_ip6;
  376 #endif
  377         size_t tmplen;
  378         int error, enforce_statfs, fi;
  379 
  380         bzero(&optiov, sizeof(optiov));
  381         opt.uio_iov = optiov;
  382         opt.uio_iovcnt = 0;
  383         opt.uio_offset = -1;
  384         opt.uio_resid = -1;
  385         opt.uio_segflg = UIO_SYSSPACE;
  386         opt.uio_rw = UIO_READ;
  387         opt.uio_td = td;
  388 
  389         /* Set permissions for top-level jails from sysctls. */
  390         if (!jailed(td->td_ucred)) {
  391                 for (fi = 0; fi < sizeof(pr_allow_names) /
  392                      sizeof(pr_allow_names[0]); fi++) {
  393                         optiov[opt.uio_iovcnt].iov_base =
  394                             (jail_default_allow & (1 << fi))
  395                             ? pr_allow_names[fi] : pr_allow_nonames[fi];
  396                         optiov[opt.uio_iovcnt].iov_len =
  397                             strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
  398                         opt.uio_iovcnt += 2;
  399                 }
  400                 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
  401                 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
  402                 opt.uio_iovcnt++;
  403                 enforce_statfs = jail_default_enforce_statfs;
  404                 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
  405                 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
  406                 opt.uio_iovcnt++;
  407         }
  408 
  409         tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
  410 #ifdef INET
  411         ip4s = (j->version == 0) ? 1 : j->ip4s;
  412         if (ip4s > jail_max_af_ips)
  413                 return (EINVAL);
  414         tmplen += ip4s * sizeof(struct in_addr);
  415 #else
  416         if (j->ip4s > 0)
  417                 return (EINVAL);
  418 #endif
  419 #ifdef INET6
  420         if (j->ip6s > jail_max_af_ips)
  421                 return (EINVAL);
  422         tmplen += j->ip6s * sizeof(struct in6_addr);
  423 #else
  424         if (j->ip6s > 0)
  425                 return (EINVAL);
  426 #endif
  427         u_path = malloc(tmplen, M_TEMP, M_WAITOK);
  428         u_hostname = u_path + MAXPATHLEN;
  429         u_name = u_hostname + MAXHOSTNAMELEN;
  430 #ifdef INET
  431         u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
  432 #endif
  433 #ifdef INET6
  434 #ifdef INET
  435         u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
  436 #else
  437         u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
  438 #endif
  439 #endif
  440         optiov[opt.uio_iovcnt].iov_base = "path";
  441         optiov[opt.uio_iovcnt].iov_len = sizeof("path");
  442         opt.uio_iovcnt++;
  443         optiov[opt.uio_iovcnt].iov_base = u_path;
  444         error = copyinstr(j->path, u_path, MAXPATHLEN,
  445             &optiov[opt.uio_iovcnt].iov_len);
  446         if (error) {
  447                 free(u_path, M_TEMP);
  448                 return (error);
  449         }
  450         opt.uio_iovcnt++;
  451         optiov[opt.uio_iovcnt].iov_base = "host.hostname";
  452         optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
  453         opt.uio_iovcnt++;
  454         optiov[opt.uio_iovcnt].iov_base = u_hostname;
  455         error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
  456             &optiov[opt.uio_iovcnt].iov_len);
  457         if (error) {
  458                 free(u_path, M_TEMP);
  459                 return (error);
  460         }
  461         opt.uio_iovcnt++;
  462         if (j->jailname != NULL) {
  463                 optiov[opt.uio_iovcnt].iov_base = "name";
  464                 optiov[opt.uio_iovcnt].iov_len = sizeof("name");
  465                 opt.uio_iovcnt++;
  466                 optiov[opt.uio_iovcnt].iov_base = u_name;
  467                 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
  468                     &optiov[opt.uio_iovcnt].iov_len);
  469                 if (error) {
  470                         free(u_path, M_TEMP);
  471                         return (error);
  472                 }
  473                 opt.uio_iovcnt++;
  474         }
  475 #ifdef INET
  476         optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
  477         optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
  478         opt.uio_iovcnt++;
  479         optiov[opt.uio_iovcnt].iov_base = u_ip4;
  480         optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
  481         if (j->version == 0)
  482                 u_ip4->s_addr = j->ip4s;
  483         else {
  484                 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
  485                 if (error) {
  486                         free(u_path, M_TEMP);
  487                         return (error);
  488                 }
  489         }
  490         opt.uio_iovcnt++;
  491 #endif
  492 #ifdef INET6
  493         optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
  494         optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
  495         opt.uio_iovcnt++;
  496         optiov[opt.uio_iovcnt].iov_base = u_ip6;
  497         optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
  498         error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
  499         if (error) {
  500                 free(u_path, M_TEMP);
  501                 return (error);
  502         }
  503         opt.uio_iovcnt++;
  504 #endif
  505         KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]),
  506             ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
  507         error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
  508         free(u_path, M_TEMP);
  509         return (error);
  510 }
  511 
  512 
  513 /*
  514  * struct jail_set_args {
  515  *      struct iovec *iovp;
  516  *      unsigned int iovcnt;
  517  *      int flags;
  518  * };
  519  */
  520 int
  521 sys_jail_set(struct thread *td, struct jail_set_args *uap)
  522 {
  523         struct uio *auio;
  524         int error;
  525 
  526         /* Check that we have an even number of iovecs. */
  527         if (uap->iovcnt & 1)
  528                 return (EINVAL);
  529 
  530         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
  531         if (error)
  532                 return (error);
  533         error = kern_jail_set(td, auio, uap->flags);
  534         free(auio, M_IOV);
  535         return (error);
  536 }
  537 
  538 int
  539 kern_jail_set(struct thread *td, struct uio *optuio, int flags)
  540 {
  541         struct nameidata nd;
  542 #ifdef INET
  543         struct in_addr *ip4;
  544 #endif
  545 #ifdef INET6
  546         struct in6_addr *ip6;
  547 #endif
  548         struct vfsopt *opt;
  549         struct vfsoptlist *opts;
  550         struct prison *pr, *deadpr, *mypr, *ppr, *tpr;
  551         struct vnode *root;
  552         char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
  553         char *g_path, *osrelstr;
  554 #if defined(INET) || defined(INET6)
  555         struct prison *tppr;
  556         void *op;
  557 #endif
  558         unsigned long hid;
  559         size_t namelen, onamelen;
  560         int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
  561         int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
  562         int fi, jid, jsys, len, level;
  563         int childmax, osreldt, rsnum, slevel;
  564         int fullpath_disabled;
  565 #if defined(INET) || defined(INET6)
  566         int ii, ij;
  567 #endif
  568 #ifdef INET
  569         int ip4s, redo_ip4;
  570 #endif
  571 #ifdef INET6
  572         int ip6s, redo_ip6;
  573 #endif
  574         uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
  575         unsigned tallow;
  576         char numbuf[12];
  577 
  578         error = priv_check(td, PRIV_JAIL_SET);
  579         if (!error && (flags & JAIL_ATTACH))
  580                 error = priv_check(td, PRIV_JAIL_ATTACH);
  581         if (error)
  582                 return (error);
  583         mypr = ppr = td->td_ucred->cr_prison;
  584         if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
  585                 return (EPERM);
  586         if (flags & ~JAIL_SET_MASK)
  587                 return (EINVAL);
  588 
  589         /*
  590          * Check all the parameters before committing to anything.  Not all
  591          * errors can be caught early, but we may as well try.  Also, this
  592          * takes care of some expensive stuff (path lookup) before getting
  593          * the allprison lock.
  594          *
  595          * XXX Jails are not filesystems, and jail parameters are not mount
  596          *     options.  But it makes more sense to re-use the vfsopt code
  597          *     than duplicate it under a different name.
  598          */
  599         error = vfs_buildopts(optuio, &opts);
  600         if (error)
  601                 return (error);
  602 #ifdef INET
  603         ip4 = NULL;
  604 #endif
  605 #ifdef INET6
  606         ip6 = NULL;
  607 #endif
  608         g_path = NULL;
  609 
  610         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
  611         if (error == ENOENT)
  612                 jid = 0;
  613         else if (error != 0)
  614                 goto done_free;
  615 
  616         error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
  617         if (error == ENOENT)
  618                 gotslevel = 0;
  619         else if (error != 0)
  620                 goto done_free;
  621         else
  622                 gotslevel = 1;
  623 
  624         error =
  625             vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
  626         if (error == ENOENT)
  627                 gotchildmax = 0;
  628         else if (error != 0)
  629                 goto done_free;
  630         else
  631                 gotchildmax = 1;
  632 
  633         error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
  634         if (error == ENOENT)
  635                 gotenforce = 0;
  636         else if (error != 0)
  637                 goto done_free;
  638         else if (enforce < 0 || enforce > 2) {
  639                 error = EINVAL;
  640                 goto done_free;
  641         } else
  642                 gotenforce = 1;
  643 
  644         error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum));
  645         if (error == ENOENT)
  646                 gotrsnum = 0;
  647         else if (error != 0)
  648                 goto done_free;
  649         else
  650                 gotrsnum = 1;
  651 
  652         pr_flags = ch_flags = 0;
  653         for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
  654             fi++) {
  655                 if (pr_flag_names[fi] == NULL)
  656                         continue;
  657                 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi);
  658                 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi);
  659         }
  660         ch_flags |= pr_flags;
  661         for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
  662             fi++) {
  663                 error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys,
  664                     sizeof(jsys));
  665                 if (error == ENOENT)
  666                         continue;
  667                 if (error != 0)
  668                         goto done_free;
  669                 switch (jsys) {
  670                 case JAIL_SYS_DISABLE:
  671                         if (!pr_flag_jailsys[fi].disable) {
  672                                 error = EINVAL;
  673                                 goto done_free;
  674                         }
  675                         pr_flags |= pr_flag_jailsys[fi].disable;
  676                         break;
  677                 case JAIL_SYS_NEW:
  678                         pr_flags |= pr_flag_jailsys[fi].new;
  679                         break;
  680                 case JAIL_SYS_INHERIT:
  681                         break;
  682                 default:
  683                         error = EINVAL;
  684                         goto done_free;
  685                 }
  686                 ch_flags |=
  687                     pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable;
  688         }
  689         if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
  690             && !(pr_flags & PR_PERSIST)) {
  691                 error = EINVAL;
  692                 vfs_opterror(opts, "new jail must persist or attach");
  693                 goto done_errmsg;
  694         }
  695 #ifdef VIMAGE
  696         if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
  697                 error = EINVAL;
  698                 vfs_opterror(opts, "vnet cannot be changed after creation");
  699                 goto done_errmsg;
  700         }
  701 #endif
  702 #ifdef INET
  703         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
  704                 error = EINVAL;
  705                 vfs_opterror(opts, "ip4 cannot be changed after creation");
  706                 goto done_errmsg;
  707         }
  708 #endif
  709 #ifdef INET6
  710         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
  711                 error = EINVAL;
  712                 vfs_opterror(opts, "ip6 cannot be changed after creation");
  713                 goto done_errmsg;
  714         }
  715 #endif
  716 
  717         pr_allow = ch_allow = 0;
  718         for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
  719             fi++) {
  720                 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi);
  721                 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi);
  722         }
  723         ch_allow |= pr_allow;
  724 
  725         error = vfs_getopt(opts, "name", (void **)&name, &len);
  726         if (error == ENOENT)
  727                 name = NULL;
  728         else if (error != 0)
  729                 goto done_free;
  730         else {
  731                 if (len == 0 || name[len - 1] != '\0') {
  732                         error = EINVAL;
  733                         goto done_free;
  734                 }
  735                 if (len > MAXHOSTNAMELEN) {
  736                         error = ENAMETOOLONG;
  737                         goto done_free;
  738                 }
  739         }
  740 
  741         error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
  742         if (error == ENOENT)
  743                 host = NULL;
  744         else if (error != 0)
  745                 goto done_free;
  746         else {
  747                 ch_flags |= PR_HOST;
  748                 pr_flags |= PR_HOST;
  749                 if (len == 0 || host[len - 1] != '\0') {
  750                         error = EINVAL;
  751                         goto done_free;
  752                 }
  753                 if (len > MAXHOSTNAMELEN) {
  754                         error = ENAMETOOLONG;
  755                         goto done_free;
  756                 }
  757         }
  758 
  759         error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
  760         if (error == ENOENT)
  761                 domain = NULL;
  762         else if (error != 0)
  763                 goto done_free;
  764         else {
  765                 ch_flags |= PR_HOST;
  766                 pr_flags |= PR_HOST;
  767                 if (len == 0 || domain[len - 1] != '\0') {
  768                         error = EINVAL;
  769                         goto done_free;
  770                 }
  771                 if (len > MAXHOSTNAMELEN) {
  772                         error = ENAMETOOLONG;
  773                         goto done_free;
  774                 }
  775         }
  776 
  777         error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
  778         if (error == ENOENT)
  779                 uuid = NULL;
  780         else if (error != 0)
  781                 goto done_free;
  782         else {
  783                 ch_flags |= PR_HOST;
  784                 pr_flags |= PR_HOST;
  785                 if (len == 0 || uuid[len - 1] != '\0') {
  786                         error = EINVAL;
  787                         goto done_free;
  788                 }
  789                 if (len > HOSTUUIDLEN) {
  790                         error = ENAMETOOLONG;
  791                         goto done_free;
  792                 }
  793         }
  794 
  795 #ifdef COMPAT_FREEBSD32
  796         if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
  797                 uint32_t hid32;
  798 
  799                 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
  800                 hid = hid32;
  801         } else
  802 #endif
  803                 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
  804         if (error == ENOENT)
  805                 gothid = 0;
  806         else if (error != 0)
  807                 goto done_free;
  808         else {
  809                 gothid = 1;
  810                 ch_flags |= PR_HOST;
  811                 pr_flags |= PR_HOST;
  812         }
  813 
  814 #ifdef INET
  815         error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
  816         if (error == ENOENT)
  817                 ip4s = 0;
  818         else if (error != 0)
  819                 goto done_free;
  820         else if (ip4s & (sizeof(*ip4) - 1)) {
  821                 error = EINVAL;
  822                 goto done_free;
  823         } else {
  824                 ch_flags |= PR_IP4_USER | PR_IP4_DISABLE;
  825                 if (ip4s == 0)
  826                         pr_flags |= PR_IP4_USER | PR_IP4_DISABLE;
  827                 else {
  828                         pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER;
  829                         ip4s /= sizeof(*ip4);
  830                         if (ip4s > jail_max_af_ips) {
  831                                 error = EINVAL;
  832                                 vfs_opterror(opts, "too many IPv4 addresses");
  833                                 goto done_errmsg;
  834                         }
  835                         ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
  836                         bcopy(op, ip4, ip4s * sizeof(*ip4));
  837                         /*
  838                          * IP addresses are all sorted but ip[0] to preserve
  839                          * the primary IP address as given from userland.
  840                          * This special IP is used for unbound outgoing
  841                          * connections as well for "loopback" traffic in case
  842                          * source address selection cannot find any more fitting
  843                          * address to connect from.
  844                          */
  845                         if (ip4s > 1)
  846                                 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4);
  847                         /*
  848                          * Check for duplicate addresses and do some simple
  849                          * zero and broadcast checks. If users give other bogus
  850                          * addresses it is their problem.
  851                          *
  852                          * We do not have to care about byte order for these
  853                          * checks so we will do them in NBO.
  854                          */
  855                         for (ii = 0; ii < ip4s; ii++) {
  856                                 if (ip4[ii].s_addr == INADDR_ANY ||
  857                                     ip4[ii].s_addr == INADDR_BROADCAST) {
  858                                         error = EINVAL;
  859                                         goto done_free;
  860                                 }
  861                                 if ((ii+1) < ip4s &&
  862                                     (ip4[0].s_addr == ip4[ii+1].s_addr ||
  863                                      ip4[ii].s_addr == ip4[ii+1].s_addr)) {
  864                                         error = EINVAL;
  865                                         goto done_free;
  866                                 }
  867                         }
  868                 }
  869         }
  870 #endif
  871 
  872 #ifdef INET6
  873         error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
  874         if (error == ENOENT)
  875                 ip6s = 0;
  876         else if (error != 0)
  877                 goto done_free;
  878         else if (ip6s & (sizeof(*ip6) - 1)) {
  879                 error = EINVAL;
  880                 goto done_free;
  881         } else {
  882                 ch_flags |= PR_IP6_USER | PR_IP6_DISABLE;
  883                 if (ip6s == 0)
  884                         pr_flags |= PR_IP6_USER | PR_IP6_DISABLE;
  885                 else {
  886                         pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER;
  887                         ip6s /= sizeof(*ip6);
  888                         if (ip6s > jail_max_af_ips) {
  889                                 error = EINVAL;
  890                                 vfs_opterror(opts, "too many IPv6 addresses");
  891                                 goto done_errmsg;
  892                         }
  893                         ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
  894                         bcopy(op, ip6, ip6s * sizeof(*ip6));
  895                         if (ip6s > 1)
  896                                 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6);
  897                         for (ii = 0; ii < ip6s; ii++) {
  898                                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
  899                                         error = EINVAL;
  900                                         goto done_free;
  901                                 }
  902                                 if ((ii+1) < ip6s &&
  903                                     (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
  904                                      IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
  905                                 {
  906                                         error = EINVAL;
  907                                         goto done_free;
  908                                 }
  909                         }
  910                 }
  911         }
  912 #endif
  913 
  914 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
  915         if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
  916                 error = EINVAL;
  917                 vfs_opterror(opts,
  918                     "vnet jails cannot have IP address restrictions");
  919                 goto done_errmsg;
  920         }
  921 #endif
  922 
  923         fullpath_disabled = 0;
  924         root = NULL;
  925         error = vfs_getopt(opts, "path", (void **)&path, &len);
  926         if (error == ENOENT)
  927                 path = NULL;
  928         else if (error != 0)
  929                 goto done_free;
  930         else {
  931                 if (flags & JAIL_UPDATE) {
  932                         error = EINVAL;
  933                         vfs_opterror(opts,
  934                             "path cannot be changed after creation");
  935                         goto done_errmsg;
  936                 }
  937                 if (len == 0 || path[len - 1] != '\0') {
  938                         error = EINVAL;
  939                         goto done_free;
  940                 }
  941                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
  942                     path, td);
  943                 error = namei(&nd);
  944                 if (error)
  945                         goto done_free;
  946                 root = nd.ni_vp;
  947                 NDFREE(&nd, NDF_ONLY_PNBUF);
  948                 g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
  949                 strlcpy(g_path, path, MAXPATHLEN);
  950                 error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN);
  951                 if (error == 0)
  952                         path = g_path;
  953                 else if (error == ENODEV) {
  954                         /* proceed if sysctl debug.disablefullpath == 1 */
  955                         fullpath_disabled = 1;
  956                         if (len < 2 || (len == 2 && path[0] == '/'))
  957                                 path = NULL;
  958                 } else {
  959                         /* exit on other errors */
  960                         goto done_free;
  961                 }
  962                 if (root->v_type != VDIR) {
  963                         error = ENOTDIR;
  964                         vput(root);
  965                         goto done_free;
  966                 }
  967                 VOP_UNLOCK(root, 0);
  968                 if (fullpath_disabled) {
  969                         /* Leave room for a real-root full pathname. */
  970                         if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/")
  971                             ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) {
  972                                 error = ENAMETOOLONG;
  973                                 goto done_free;
  974                         }
  975                 }
  976         }
  977 
  978         error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len);
  979         if (error == ENOENT)
  980                 osrelstr = NULL;
  981         else if (error != 0)
  982                 goto done_free;
  983         else {
  984                 if (flags & JAIL_UPDATE) {
  985                         error = EINVAL;
  986                         vfs_opterror(opts,
  987                             "osrelease cannot be changed after creation");
  988                         goto done_errmsg;
  989                 }
  990                 if (len == 0 || len >= OSRELEASELEN) {
  991                         error = EINVAL;
  992                         vfs_opterror(opts,
  993                             "osrelease string must be 1-%d bytes long",
  994                             OSRELEASELEN - 1);
  995                         goto done_errmsg;
  996                 }
  997         }
  998 
  999         error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt));
 1000         if (error == ENOENT)
 1001                 osreldt = 0;
 1002         else if (error != 0)
 1003                 goto done_free;
 1004         else {
 1005                 if (flags & JAIL_UPDATE) {
 1006                         error = EINVAL;
 1007                         vfs_opterror(opts,
 1008                             "osreldate cannot be changed after creation");
 1009                         goto done_errmsg;
 1010                 }
 1011                 if (osreldt == 0) {
 1012                         error = EINVAL;
 1013                         vfs_opterror(opts, "osreldate cannot be 0");
 1014                         goto done_errmsg;
 1015                 }
 1016         }
 1017 
 1018         /*
 1019          * Grab the allprison lock before letting modules check their
 1020          * parameters.  Once we have it, do not let go so we'll have a
 1021          * consistent view of the OSD list.
 1022          */
 1023         sx_xlock(&allprison_lock);
 1024         error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
 1025         if (error)
 1026                 goto done_unlock_list;
 1027 
 1028         /* By now, all parameters should have been noted. */
 1029         TAILQ_FOREACH(opt, opts, link) {
 1030                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
 1031                         error = EINVAL;
 1032                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
 1033                         goto done_unlock_list;
 1034                 }
 1035         }
 1036 
 1037         /*
 1038          * See if we are creating a new record or updating an existing one.
 1039          * This abuses the file error codes ENOENT and EEXIST.
 1040          */
 1041         cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
 1042         if (!cuflags) {
 1043                 error = EINVAL;
 1044                 vfs_opterror(opts, "no valid operation (create or update)");
 1045                 goto done_unlock_list;
 1046         }
 1047         pr = NULL;
 1048         namelc = NULL;
 1049         if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
 1050                 namelc = strrchr(name, '.');
 1051                 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
 1052                 if (*p != '\0')
 1053                         jid = 0;
 1054         }
 1055         if (jid != 0) {
 1056                 /*
 1057                  * See if a requested jid already exists.  There is an
 1058                  * information leak here if the jid exists but is not within
 1059                  * the caller's jail hierarchy.  Jail creators will get EEXIST
 1060                  * even though they cannot see the jail, and CREATE | UPDATE
 1061                  * will return ENOENT which is not normally a valid error.
 1062                  */
 1063                 if (jid < 0) {
 1064                         error = EINVAL;
 1065                         vfs_opterror(opts, "negative jid");
 1066                         goto done_unlock_list;
 1067                 }
 1068                 pr = prison_find(jid);
 1069                 if (pr != NULL) {
 1070                         ppr = pr->pr_parent;
 1071                         /* Create: jid must not exist. */
 1072                         if (cuflags == JAIL_CREATE) {
 1073                                 mtx_unlock(&pr->pr_mtx);
 1074                                 error = EEXIST;
 1075                                 vfs_opterror(opts, "jail %d already exists",
 1076                                     jid);
 1077                                 goto done_unlock_list;
 1078                         }
 1079                         if (!prison_ischild(mypr, pr)) {
 1080                                 mtx_unlock(&pr->pr_mtx);
 1081                                 pr = NULL;
 1082                         } else if (pr->pr_uref == 0) {
 1083                                 if (!(flags & JAIL_DYING)) {
 1084                                         mtx_unlock(&pr->pr_mtx);
 1085                                         error = ENOENT;
 1086                                         vfs_opterror(opts, "jail %d is dying",
 1087                                             jid);
 1088                                         goto done_unlock_list;
 1089                                 } else if ((flags & JAIL_ATTACH) ||
 1090                                     (pr_flags & PR_PERSIST)) {
 1091                                         /*
 1092                                          * A dying jail might be resurrected
 1093                                          * (via attach or persist), but first
 1094                                          * it must determine if another jail
 1095                                          * has claimed its name.  Accomplish
 1096                                          * this by implicitly re-setting the
 1097                                          * name.
 1098                                          */
 1099                                         if (name == NULL)
 1100                                                 name = prison_name(mypr, pr);
 1101                                 }
 1102                         }
 1103                 }
 1104                 if (pr == NULL) {
 1105                         /* Update: jid must exist. */
 1106                         if (cuflags == JAIL_UPDATE) {
 1107                                 error = ENOENT;
 1108                                 vfs_opterror(opts, "jail %d not found", jid);
 1109                                 goto done_unlock_list;
 1110                         }
 1111                 }
 1112         }
 1113         /*
 1114          * If the caller provided a name, look for a jail by that name.
 1115          * This has different semantics for creates and updates keyed by jid
 1116          * (where the name must not already exist in a different jail),
 1117          * and updates keyed by the name itself (where the name must exist
 1118          * because that is the jail being updated).
 1119          */
 1120         if (name != NULL) {
 1121                 namelc = strrchr(name, '.');
 1122                 if (namelc == NULL)
 1123                         namelc = name;
 1124                 else {
 1125                         /*
 1126                          * This is a hierarchical name.  Split it into the
 1127                          * parent and child names, and make sure the parent
 1128                          * exists or matches an already found jail.
 1129                          */
 1130                         *namelc = '\0';
 1131                         if (pr != NULL) {
 1132                                 if (strncmp(name, ppr->pr_name, namelc - name)
 1133                                     || ppr->pr_name[namelc - name] != '\0') {
 1134                                         mtx_unlock(&pr->pr_mtx);
 1135                                         error = EINVAL;
 1136                                         vfs_opterror(opts,
 1137                                             "cannot change jail's parent");
 1138                                         goto done_unlock_list;
 1139                                 }
 1140                         } else {
 1141                                 ppr = prison_find_name(mypr, name);
 1142                                 if (ppr == NULL) {
 1143                                         error = ENOENT;
 1144                                         vfs_opterror(opts,
 1145                                             "jail \"%s\" not found", name);
 1146                                         goto done_unlock_list;
 1147                                 }
 1148                                 mtx_unlock(&ppr->pr_mtx);
 1149                         }
 1150                         name = ++namelc;
 1151                 }
 1152                 if (name[0] != '\0') {
 1153                         namelen =
 1154                             (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 1155  name_again:
 1156                         deadpr = NULL;
 1157                         FOREACH_PRISON_CHILD(ppr, tpr) {
 1158                                 if (tpr != pr && tpr->pr_ref > 0 &&
 1159                                     !strcmp(tpr->pr_name + namelen, name)) {
 1160                                         if (pr == NULL &&
 1161                                             cuflags != JAIL_CREATE) {
 1162                                                 mtx_lock(&tpr->pr_mtx);
 1163                                                 if (tpr->pr_ref > 0) {
 1164                                                         /*
 1165                                                          * Use this jail
 1166                                                          * for updates.
 1167                                                          */
 1168                                                         if (tpr->pr_uref > 0) {
 1169                                                                 pr = tpr;
 1170                                                                 break;
 1171                                                         }
 1172                                                         deadpr = tpr;
 1173                                                 }
 1174                                                 mtx_unlock(&tpr->pr_mtx);
 1175                                         } else if (tpr->pr_uref > 0) {
 1176                                                 /*
 1177                                                  * Create, or update(jid):
 1178                                                  * name must not exist in an
 1179                                                  * active sibling jail.
 1180                                                  */
 1181                                                 error = EEXIST;
 1182                                                 if (pr != NULL)
 1183                                                         mtx_unlock(&pr->pr_mtx);
 1184                                                 vfs_opterror(opts,
 1185                                                    "jail \"%s\" already exists",
 1186                                                    name);
 1187                                                 goto done_unlock_list;
 1188                                         }
 1189                                 }
 1190                         }
 1191                         /* If no active jail is found, use a dying one. */
 1192                         if (deadpr != NULL && pr == NULL) {
 1193                                 if (flags & JAIL_DYING) {
 1194                                         mtx_lock(&deadpr->pr_mtx);
 1195                                         if (deadpr->pr_ref == 0) {
 1196                                                 mtx_unlock(&deadpr->pr_mtx);
 1197                                                 goto name_again;
 1198                                         }
 1199                                         pr = deadpr;
 1200                                 } else if (cuflags == JAIL_UPDATE) {
 1201                                         error = ENOENT;
 1202                                         vfs_opterror(opts,
 1203                                             "jail \"%s\" is dying", name);
 1204                                         goto done_unlock_list;
 1205                                 }
 1206                         }
 1207                         /* Update: name must exist if no jid. */
 1208                         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1209                                 error = ENOENT;
 1210                                 vfs_opterror(opts, "jail \"%s\" not found",
 1211                                     name);
 1212                                 goto done_unlock_list;
 1213                         }
 1214                 }
 1215         }
 1216         /* Update: must provide a jid or name. */
 1217         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1218                 error = ENOENT;
 1219                 vfs_opterror(opts, "update specified no jail");
 1220                 goto done_unlock_list;
 1221         }
 1222 
 1223         /* If there's no prison to update, create a new one and link it in. */
 1224         if (pr == NULL) {
 1225                 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
 1226                         if (tpr->pr_childcount >= tpr->pr_childmax) {
 1227                                 error = EPERM;
 1228                                 vfs_opterror(opts, "prison limit exceeded");
 1229                                 goto done_unlock_list;
 1230                         }
 1231                 created = 1;
 1232                 mtx_lock(&ppr->pr_mtx);
 1233                 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) {
 1234                         mtx_unlock(&ppr->pr_mtx);
 1235                         error = ENOENT;
 1236                         vfs_opterror(opts, "parent jail went away!");
 1237                         goto done_unlock_list;
 1238                 }
 1239                 ppr->pr_ref++;
 1240                 ppr->pr_uref++;
 1241                 mtx_unlock(&ppr->pr_mtx);
 1242                 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
 1243                 if (jid == 0) {
 1244                         /* Find the next free jid. */
 1245                         jid = lastprid + 1;
 1246  findnext:
 1247                         if (jid == JAIL_MAX)
 1248                                 jid = 1;
 1249                         TAILQ_FOREACH(tpr, &allprison, pr_list) {
 1250                                 if (tpr->pr_id < jid)
 1251                                         continue;
 1252                                 if (tpr->pr_id > jid || tpr->pr_ref == 0) {
 1253                                         TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 1254                                         break;
 1255                                 }
 1256                                 if (jid == lastprid) {
 1257                                         error = EAGAIN;
 1258                                         vfs_opterror(opts,
 1259                                             "no available jail IDs");
 1260                                         free(pr, M_PRISON);
 1261                                         prison_deref(ppr, PD_DEREF |
 1262                                             PD_DEUREF | PD_LIST_XLOCKED);
 1263                                         goto done_releroot;
 1264                                 }
 1265                                 jid++;
 1266                                 goto findnext;
 1267                         }
 1268                         lastprid = jid;
 1269                 } else {
 1270                         /*
 1271                          * The jail already has a jid (that did not yet exist),
 1272                          * so just find where to insert it.
 1273                          */
 1274                         TAILQ_FOREACH(tpr, &allprison, pr_list)
 1275                                 if (tpr->pr_id >= jid) {
 1276                                         TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 1277                                         break;
 1278                                 }
 1279                 }
 1280                 if (tpr == NULL)
 1281                         TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
 1282                 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
 1283                 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 1284                         tpr->pr_childcount++;
 1285 
 1286                 pr->pr_parent = ppr;
 1287                 pr->pr_id = jid;
 1288 
 1289                 /* Set some default values, and inherit some from the parent. */
 1290                 if (name == NULL)
 1291                         name = "";
 1292                 if (path == NULL) {
 1293                         path = "/";
 1294                         root = mypr->pr_root;
 1295                         vref(root);
 1296                 }
 1297                 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
 1298                 pr->pr_flags |= PR_HOST;
 1299 #if defined(INET) || defined(INET6)
 1300 #ifdef VIMAGE
 1301                 if (!(pr_flags & PR_VNET))
 1302 #endif
 1303                 {
 1304 #ifdef INET
 1305                         if (!(ch_flags & PR_IP4_USER))
 1306                                 pr->pr_flags |=
 1307                                     PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE;
 1308                         else if (!(pr_flags & PR_IP4_USER)) {
 1309                                 pr->pr_flags |= ppr->pr_flags & PR_IP4;
 1310                                 if (ppr->pr_ip4 != NULL) {
 1311                                         pr->pr_ip4s = ppr->pr_ip4s;
 1312                                         pr->pr_ip4 = malloc(pr->pr_ip4s *
 1313                                             sizeof(struct in_addr), M_PRISON,
 1314                                             M_WAITOK);
 1315                                         bcopy(ppr->pr_ip4, pr->pr_ip4,
 1316                                             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 1317                                 }
 1318                         }
 1319 #endif
 1320 #ifdef INET6
 1321                         if (!(ch_flags & PR_IP6_USER))
 1322                                 pr->pr_flags |=
 1323                                     PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE;
 1324                         else if (!(pr_flags & PR_IP6_USER)) {
 1325                                 pr->pr_flags |= ppr->pr_flags & PR_IP6;
 1326                                 if (ppr->pr_ip6 != NULL) {
 1327                                         pr->pr_ip6s = ppr->pr_ip6s;
 1328                                         pr->pr_ip6 = malloc(pr->pr_ip6s *
 1329                                             sizeof(struct in6_addr), M_PRISON,
 1330                                             M_WAITOK);
 1331                                         bcopy(ppr->pr_ip6, pr->pr_ip6,
 1332                                             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 1333                                 }
 1334                         }
 1335 #endif
 1336                 }
 1337 #endif
 1338                 /* Source address selection is always on by default. */
 1339                 pr->pr_flags |= _PR_IP_SADDRSEL;
 1340 
 1341                 pr->pr_securelevel = ppr->pr_securelevel;
 1342                 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
 1343                 pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
 1344                 pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum;
 1345 
 1346                 pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate;
 1347                 if (osrelstr == NULL)
 1348                     strcpy(pr->pr_osrelease, ppr->pr_osrelease);
 1349                 else
 1350                     strcpy(pr->pr_osrelease, osrelstr);
 1351 
 1352                 LIST_INIT(&pr->pr_children);
 1353                 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
 1354 
 1355 #ifdef VIMAGE
 1356                 /* Allocate a new vnet if specified. */
 1357                 pr->pr_vnet = (pr_flags & PR_VNET)
 1358                     ? vnet_alloc() : ppr->pr_vnet;
 1359 #endif
 1360                 /*
 1361                  * Allocate a dedicated cpuset for each jail.
 1362                  * Unlike other initial settings, this may return an erorr.
 1363                  */
 1364                 error = cpuset_create_root(ppr, &pr->pr_cpuset);
 1365                 if (error) {
 1366                         prison_deref(pr, PD_LIST_XLOCKED);
 1367                         goto done_releroot;
 1368                 }
 1369 
 1370                 mtx_lock(&pr->pr_mtx);
 1371                 /*
 1372                  * New prisons do not yet have a reference, because we do not
 1373                  * want other to see the incomplete prison once the
 1374                  * allprison_lock is downgraded.
 1375                  */
 1376         } else {
 1377                 created = 0;
 1378                 /*
 1379                  * Grab a reference for existing prisons, to ensure they
 1380                  * continue to exist for the duration of the call.
 1381                  */
 1382                 pr->pr_ref++;
 1383 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 1384                 if ((pr->pr_flags & PR_VNET) &&
 1385                     (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 1386                         error = EINVAL;
 1387                         vfs_opterror(opts,
 1388                             "vnet jails cannot have IP address restrictions");
 1389                         goto done_deref_locked;
 1390                 }
 1391 #endif
 1392 #ifdef INET
 1393                 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1394                         error = EINVAL;
 1395                         vfs_opterror(opts,
 1396                             "ip4 cannot be changed after creation");
 1397                         goto done_deref_locked;
 1398                 }
 1399 #endif
 1400 #ifdef INET6
 1401                 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1402                         error = EINVAL;
 1403                         vfs_opterror(opts,
 1404                             "ip6 cannot be changed after creation");
 1405                         goto done_deref_locked;
 1406                 }
 1407 #endif
 1408         }
 1409 
 1410         /* Do final error checking before setting anything. */
 1411         if (gotslevel) {
 1412                 if (slevel < ppr->pr_securelevel) {
 1413                         error = EPERM;
 1414                         goto done_deref_locked;
 1415                 }
 1416         }
 1417         if (gotchildmax) {
 1418                 if (childmax >= ppr->pr_childmax) {
 1419                         error = EPERM;
 1420                         goto done_deref_locked;
 1421                 }
 1422         }
 1423         if (gotenforce) {
 1424                 if (enforce < ppr->pr_enforce_statfs) {
 1425                         error = EPERM;
 1426                         goto done_deref_locked;
 1427                 }
 1428         }
 1429         if (gotrsnum) {
 1430                 /*
 1431                  * devfs_rsnum is a uint16_t
 1432                  */
 1433                 if (rsnum < 0 || rsnum > 65535) {
 1434                         error = EINVAL;
 1435                         goto done_deref_locked;
 1436                 }
 1437                 /*
 1438                  * Nested jails always inherit parent's devfs ruleset
 1439                  */
 1440                 if (jailed(td->td_ucred)) {
 1441                         if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) {
 1442                                 error = EPERM;
 1443                                 goto done_deref_locked;
 1444                         } else
 1445                                 rsnum = ppr->pr_devfs_rsnum;
 1446                 }
 1447         }
 1448 #ifdef INET
 1449         if (ip4s > 0) {
 1450                 if (ppr->pr_flags & PR_IP4) {
 1451                         /*
 1452                          * Make sure the new set of IP addresses is a
 1453                          * subset of the parent's list.  Don't worry
 1454                          * about the parent being unlocked, as any
 1455                          * setting is done with allprison_lock held.
 1456                          */
 1457                         for (ij = 0; ij < ppr->pr_ip4s; ij++)
 1458                                 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 1459                                         break;
 1460                         if (ij == ppr->pr_ip4s) {
 1461                                 error = EPERM;
 1462                                 goto done_deref_locked;
 1463                         }
 1464                         if (ip4s > 1) {
 1465                                 for (ii = ij = 1; ii < ip4s; ii++) {
 1466                                         if (ip4[ii].s_addr ==
 1467                                             ppr->pr_ip4[0].s_addr)
 1468                                                 continue;
 1469                                         for (; ij < ppr->pr_ip4s; ij++)
 1470                                                 if (ip4[ii].s_addr ==
 1471                                                     ppr->pr_ip4[ij].s_addr)
 1472                                                         break;
 1473                                         if (ij == ppr->pr_ip4s)
 1474                                                 break;
 1475                                 }
 1476                                 if (ij == ppr->pr_ip4s) {
 1477                                         error = EPERM;
 1478                                         goto done_deref_locked;
 1479                                 }
 1480                         }
 1481                 }
 1482                 /*
 1483                  * Check for conflicting IP addresses.  We permit them
 1484                  * if there is no more than one IP on each jail.  If
 1485                  * there is a duplicate on a jail with more than one
 1486                  * IP stop checking and return error.
 1487                  */
 1488                 tppr = ppr;
 1489 #ifdef VIMAGE
 1490                 for (; tppr != &prison0; tppr = tppr->pr_parent)
 1491                         if (tppr->pr_flags & PR_VNET)
 1492                                 break;
 1493 #endif
 1494                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1495                         if (tpr == pr ||
 1496 #ifdef VIMAGE
 1497                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1498 #endif
 1499                             tpr->pr_uref == 0) {
 1500                                 descend = 0;
 1501                                 continue;
 1502                         }
 1503                         if (!(tpr->pr_flags & PR_IP4_USER))
 1504                                 continue;
 1505                         descend = 0;
 1506                         if (tpr->pr_ip4 == NULL ||
 1507                             (ip4s == 1 && tpr->pr_ip4s == 1))
 1508                                 continue;
 1509                         for (ii = 0; ii < ip4s; ii++) {
 1510                                 if (_prison_check_ip4(tpr, &ip4[ii]) == 0) {
 1511                                         error = EADDRINUSE;
 1512                                         vfs_opterror(opts,
 1513                                             "IPv4 addresses clash");
 1514                                         goto done_deref_locked;
 1515                                 }
 1516                         }
 1517                 }
 1518         }
 1519 #endif
 1520 #ifdef INET6
 1521         if (ip6s > 0) {
 1522                 if (ppr->pr_flags & PR_IP6) {
 1523                         /*
 1524                          * Make sure the new set of IP addresses is a
 1525                          * subset of the parent's list.
 1526                          */
 1527                         for (ij = 0; ij < ppr->pr_ip6s; ij++)
 1528                                 if (IN6_ARE_ADDR_EQUAL(&ip6[0],
 1529                                     &ppr->pr_ip6[ij]))
 1530                                         break;
 1531                         if (ij == ppr->pr_ip6s) {
 1532                                 error = EPERM;
 1533                                 goto done_deref_locked;
 1534                         }
 1535                         if (ip6s > 1) {
 1536                                 for (ii = ij = 1; ii < ip6s; ii++) {
 1537                                         if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
 1538                                              &ppr->pr_ip6[0]))
 1539                                                 continue;
 1540                                         for (; ij < ppr->pr_ip6s; ij++)
 1541                                                 if (IN6_ARE_ADDR_EQUAL(
 1542                                                     &ip6[ii], &ppr->pr_ip6[ij]))
 1543                                                         break;
 1544                                         if (ij == ppr->pr_ip6s)
 1545                                                 break;
 1546                                 }
 1547                                 if (ij == ppr->pr_ip6s) {
 1548                                         error = EPERM;
 1549                                         goto done_deref_locked;
 1550                                 }
 1551                         }
 1552                 }
 1553                 /* Check for conflicting IP addresses. */
 1554                 tppr = ppr;
 1555 #ifdef VIMAGE
 1556                 for (; tppr != &prison0; tppr = tppr->pr_parent)
 1557                         if (tppr->pr_flags & PR_VNET)
 1558                                 break;
 1559 #endif
 1560                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1561                         if (tpr == pr ||
 1562 #ifdef VIMAGE
 1563                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1564 #endif
 1565                             tpr->pr_uref == 0) {
 1566                                 descend = 0;
 1567                                 continue;
 1568                         }
 1569                         if (!(tpr->pr_flags & PR_IP6_USER))
 1570                                 continue;
 1571                         descend = 0;
 1572                         if (tpr->pr_ip6 == NULL ||
 1573                             (ip6s == 1 && tpr->pr_ip6s == 1))
 1574                                 continue;
 1575                         for (ii = 0; ii < ip6s; ii++) {
 1576                                 if (_prison_check_ip6(tpr, &ip6[ii]) == 0) {
 1577                                         error = EADDRINUSE;
 1578                                         vfs_opterror(opts,
 1579                                             "IPv6 addresses clash");
 1580                                         goto done_deref_locked;
 1581                                 }
 1582                         }
 1583                 }
 1584         }
 1585 #endif
 1586         onamelen = namelen = 0;
 1587         if (name != NULL) {
 1588                 /* Give a default name of the jid. */
 1589                 if (name[0] == '\0')
 1590                         snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
 1591                 else if (*namelc == '' || (strtoul(namelc, &p, 10) != jid &&
 1592                     *p == '\0')) {
 1593                         error = EINVAL;
 1594                         vfs_opterror(opts,
 1595                             "name cannot be numeric (unless it is the jid)");
 1596                         goto done_deref_locked;
 1597                 }
 1598                 /*
 1599                  * Make sure the name isn't too long for the prison or its
 1600                  * children.
 1601                  */
 1602                 onamelen = strlen(pr->pr_name);
 1603                 namelen = strlen(name);
 1604                 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
 1605                         error = ENAMETOOLONG;
 1606                         goto done_deref_locked;
 1607                 }
 1608                 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
 1609                         if (strlen(tpr->pr_name) + (namelen - onamelen) >=
 1610                             sizeof(pr->pr_name)) {
 1611                                 error = ENAMETOOLONG;
 1612                                 goto done_deref_locked;
 1613                         }
 1614                 }
 1615         }
 1616         if (pr_allow & ~ppr->pr_allow) {
 1617                 error = EPERM;
 1618                 goto done_deref_locked;
 1619         }
 1620 
 1621         /* Set the parameters of the prison. */
 1622 #ifdef INET
 1623         redo_ip4 = 0;
 1624         if (pr_flags & PR_IP4_USER) {
 1625                 pr->pr_flags |= PR_IP4;
 1626                 free(pr->pr_ip4, M_PRISON);
 1627                 pr->pr_ip4s = ip4s;
 1628                 pr->pr_ip4 = ip4;
 1629                 ip4 = NULL;
 1630                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1631 #ifdef VIMAGE
 1632                         if (tpr->pr_flags & PR_VNET) {
 1633                                 descend = 0;
 1634                                 continue;
 1635                         }
 1636 #endif
 1637                         if (prison_restrict_ip4(tpr, NULL)) {
 1638                                 redo_ip4 = 1;
 1639                                 descend = 0;
 1640                         }
 1641                 }
 1642         }
 1643 #endif
 1644 #ifdef INET6
 1645         redo_ip6 = 0;
 1646         if (pr_flags & PR_IP6_USER) {
 1647                 pr->pr_flags |= PR_IP6;
 1648                 free(pr->pr_ip6, M_PRISON);
 1649                 pr->pr_ip6s = ip6s;
 1650                 pr->pr_ip6 = ip6;
 1651                 ip6 = NULL;
 1652                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1653 #ifdef VIMAGE
 1654                         if (tpr->pr_flags & PR_VNET) {
 1655                                 descend = 0;
 1656                                 continue;
 1657                         }
 1658 #endif
 1659                         if (prison_restrict_ip6(tpr, NULL)) {
 1660                                 redo_ip6 = 1;
 1661                                 descend = 0;
 1662                         }
 1663                 }
 1664         }
 1665 #endif
 1666         if (gotslevel) {
 1667                 pr->pr_securelevel = slevel;
 1668                 /* Set all child jails to be at least this level. */
 1669                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1670                         if (tpr->pr_securelevel < slevel)
 1671                                 tpr->pr_securelevel = slevel;
 1672         }
 1673         if (gotchildmax) {
 1674                 pr->pr_childmax = childmax;
 1675                 /* Set all child jails to under this limit. */
 1676                 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
 1677                         if (tpr->pr_childmax > childmax - level)
 1678                                 tpr->pr_childmax = childmax > level
 1679                                     ? childmax - level : 0;
 1680         }
 1681         if (gotenforce) {
 1682                 pr->pr_enforce_statfs = enforce;
 1683                 /* Pass this restriction on to the children. */
 1684                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1685                         if (tpr->pr_enforce_statfs < enforce)
 1686                                 tpr->pr_enforce_statfs = enforce;
 1687         }
 1688         if (gotrsnum) {
 1689                 pr->pr_devfs_rsnum = rsnum;
 1690                 /* Pass this restriction on to the children. */
 1691                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1692                         tpr->pr_devfs_rsnum = rsnum;
 1693         }
 1694         if (name != NULL) {
 1695                 if (ppr == &prison0)
 1696                         strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
 1697                 else
 1698                         snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
 1699                             ppr->pr_name, name);
 1700                 /* Change this component of child names. */
 1701                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1702                         bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
 1703                             strlen(tpr->pr_name + onamelen) + 1);
 1704                         bcopy(pr->pr_name, tpr->pr_name, namelen);
 1705                 }
 1706         }
 1707         if (path != NULL) {
 1708                 /* Try to keep a real-rooted full pathname. */
 1709                 if (fullpath_disabled && path[0] == '/' &&
 1710                     strcmp(mypr->pr_path, "/"))
 1711                         snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s",
 1712                             mypr->pr_path, path);
 1713                 else
 1714                         strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
 1715                 pr->pr_root = root;
 1716         }
 1717         if (PR_HOST & ch_flags & ~pr_flags) {
 1718                 if (pr->pr_flags & PR_HOST) {
 1719                         /*
 1720                          * Copy the parent's host info.  As with pr_ip4 above,
 1721                          * the lack of a lock on the parent is not a problem;
 1722                          * it is always set with allprison_lock at least
 1723                          * shared, and is held exclusively here.
 1724                          */
 1725                         strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
 1726                             sizeof(pr->pr_hostname));
 1727                         strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
 1728                             sizeof(pr->pr_domainname));
 1729                         strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
 1730                             sizeof(pr->pr_hostuuid));
 1731                         pr->pr_hostid = pr->pr_parent->pr_hostid;
 1732                 }
 1733         } else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
 1734                 /* Set this prison, and any descendants without PR_HOST. */
 1735                 if (host != NULL)
 1736                         strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
 1737                 if (domain != NULL)
 1738                         strlcpy(pr->pr_domainname, domain, 
 1739                             sizeof(pr->pr_domainname));
 1740                 if (uuid != NULL)
 1741                         strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
 1742                 if (gothid)
 1743                         pr->pr_hostid = hid;
 1744                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1745                         if (tpr->pr_flags & PR_HOST)
 1746                                 descend = 0;
 1747                         else {
 1748                                 if (host != NULL)
 1749                                         strlcpy(tpr->pr_hostname,
 1750                                             pr->pr_hostname,
 1751                                             sizeof(tpr->pr_hostname));
 1752                                 if (domain != NULL)
 1753                                         strlcpy(tpr->pr_domainname, 
 1754                                             pr->pr_domainname,
 1755                                             sizeof(tpr->pr_domainname));
 1756                                 if (uuid != NULL)
 1757                                         strlcpy(tpr->pr_hostuuid,
 1758                                             pr->pr_hostuuid,
 1759                                             sizeof(tpr->pr_hostuuid));
 1760                                 if (gothid)
 1761                                         tpr->pr_hostid = hid;
 1762                         }
 1763                 }
 1764         }
 1765         if ((tallow = ch_allow & ~pr_allow)) {
 1766                 /* Clear allow bits in all children. */
 1767                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1768                         tpr->pr_allow &= ~tallow;
 1769         }
 1770         pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
 1771         /*
 1772          * Persistent prisons get an extra reference, and prisons losing their
 1773          * persist flag lose that reference.  Only do this for existing prisons
 1774          * for now, so new ones will remain unseen until after the module
 1775          * handlers have completed.
 1776          */
 1777         if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
 1778                 if (pr_flags & PR_PERSIST) {
 1779                         pr->pr_ref++;
 1780                         pr->pr_uref++;
 1781                 } else {
 1782                         pr->pr_ref--;
 1783                         pr->pr_uref--;
 1784                 }
 1785         }
 1786         pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
 1787         mtx_unlock(&pr->pr_mtx);
 1788 
 1789 #ifdef RACCT
 1790         if (racct_enable && created)
 1791                 prison_racct_attach(pr);
 1792 #endif
 1793 
 1794         /* Locks may have prevented a complete restriction of child IP
 1795          * addresses.  If so, allocate some more memory and try again.
 1796          */
 1797 #ifdef INET
 1798         while (redo_ip4) {
 1799                 ip4s = pr->pr_ip4s;
 1800                 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 1801                 mtx_lock(&pr->pr_mtx);
 1802                 redo_ip4 = 0;
 1803                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1804 #ifdef VIMAGE
 1805                         if (tpr->pr_flags & PR_VNET) {
 1806                                 descend = 0;
 1807                                 continue;
 1808                         }
 1809 #endif
 1810                         if (prison_restrict_ip4(tpr, ip4)) {
 1811                                 if (ip4 != NULL)
 1812                                         ip4 = NULL;
 1813                                 else
 1814                                         redo_ip4 = 1;
 1815                         }
 1816                 }
 1817                 mtx_unlock(&pr->pr_mtx);
 1818         }
 1819 #endif
 1820 #ifdef INET6
 1821         while (redo_ip6) {
 1822                 ip6s = pr->pr_ip6s;
 1823                 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 1824                 mtx_lock(&pr->pr_mtx);
 1825                 redo_ip6 = 0;
 1826                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1827 #ifdef VIMAGE
 1828                         if (tpr->pr_flags & PR_VNET) {
 1829                                 descend = 0;
 1830                                 continue;
 1831                         }
 1832 #endif
 1833                         if (prison_restrict_ip6(tpr, ip6)) {
 1834                                 if (ip6 != NULL)
 1835                                         ip6 = NULL;
 1836                                 else
 1837                                         redo_ip6 = 1;
 1838                         }
 1839                 }
 1840                 mtx_unlock(&pr->pr_mtx);
 1841         }
 1842 #endif
 1843 
 1844         /* Let the modules do their work. */
 1845         sx_downgrade(&allprison_lock);
 1846         if (created) {
 1847                 error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
 1848                 if (error) {
 1849                         prison_deref(pr, PD_LIST_SLOCKED);
 1850                         goto done_errmsg;
 1851                 }
 1852         }
 1853         error = osd_jail_call(pr, PR_METHOD_SET, opts);
 1854         if (error) {
 1855                 prison_deref(pr, created
 1856                     ? PD_LIST_SLOCKED
 1857                     : PD_DEREF | PD_LIST_SLOCKED);
 1858                 goto done_errmsg;
 1859         }
 1860 
 1861         /* Attach this process to the prison if requested. */
 1862         if (flags & JAIL_ATTACH) {
 1863                 mtx_lock(&pr->pr_mtx);
 1864                 error = do_jail_attach(td, pr);
 1865                 if (error) {
 1866                         vfs_opterror(opts, "attach failed");
 1867                         if (!created)
 1868                                 prison_deref(pr, PD_DEREF);
 1869                         goto done_errmsg;
 1870                 }
 1871         }
 1872 
 1873 #ifdef RACCT
 1874         if (racct_enable && !created) {
 1875                 if (!(flags & JAIL_ATTACH))
 1876                         sx_sunlock(&allprison_lock);
 1877                 prison_racct_modify(pr);
 1878                 if (!(flags & JAIL_ATTACH))
 1879                         sx_slock(&allprison_lock);
 1880         }
 1881 #endif
 1882 
 1883         td->td_retval[0] = pr->pr_id;
 1884 
 1885         /*
 1886          * Now that it is all there, drop the temporary reference from existing
 1887          * prisons.  Or add a reference to newly created persistent prisons
 1888          * (which was not done earlier so that the prison would not be publicly
 1889          * visible).
 1890          */
 1891         if (!created) {
 1892                 prison_deref(pr, (flags & JAIL_ATTACH)
 1893                     ? PD_DEREF
 1894                     : PD_DEREF | PD_LIST_SLOCKED);
 1895         } else {
 1896                 if (pr_flags & PR_PERSIST) {
 1897                         mtx_lock(&pr->pr_mtx);
 1898                         pr->pr_ref++;
 1899                         pr->pr_uref++;
 1900                         mtx_unlock(&pr->pr_mtx);
 1901                 }
 1902                 if (!(flags & JAIL_ATTACH))
 1903                         sx_sunlock(&allprison_lock);
 1904         }
 1905 
 1906         goto done_errmsg;
 1907 
 1908  done_deref_locked:
 1909         prison_deref(pr, created
 1910             ? PD_LOCKED | PD_LIST_XLOCKED
 1911             : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 1912         goto done_releroot;
 1913  done_unlock_list:
 1914         sx_xunlock(&allprison_lock);
 1915  done_releroot:
 1916         if (root != NULL)
 1917                 vrele(root);
 1918  done_errmsg:
 1919         if (error) {
 1920                 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 1921                 if (errmsg_len > 0) {
 1922                         errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
 1923                         if (errmsg_pos > 0) {
 1924                                 if (optuio->uio_segflg == UIO_SYSSPACE)
 1925                                         bcopy(errmsg,
 1926                                            optuio->uio_iov[errmsg_pos].iov_base,
 1927                                            errmsg_len);
 1928                                 else
 1929                                         copyout(errmsg,
 1930                                            optuio->uio_iov[errmsg_pos].iov_base,
 1931                                            errmsg_len);
 1932                         }
 1933                 }
 1934         }
 1935  done_free:
 1936 #ifdef INET
 1937         free(ip4, M_PRISON);
 1938 #endif
 1939 #ifdef INET6
 1940         free(ip6, M_PRISON);
 1941 #endif
 1942         if (g_path != NULL)
 1943                 free(g_path, M_TEMP);
 1944         vfs_freeopts(opts);
 1945         return (error);
 1946 }
 1947 
 1948 
 1949 /*
 1950  * struct jail_get_args {
 1951  *      struct iovec *iovp;
 1952  *      unsigned int iovcnt;
 1953  *      int flags;
 1954  * };
 1955  */
 1956 int
 1957 sys_jail_get(struct thread *td, struct jail_get_args *uap)
 1958 {
 1959         struct uio *auio;
 1960         int error;
 1961 
 1962         /* Check that we have an even number of iovecs. */
 1963         if (uap->iovcnt & 1)
 1964                 return (EINVAL);
 1965 
 1966         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 1967         if (error)
 1968                 return (error);
 1969         error = kern_jail_get(td, auio, uap->flags);
 1970         if (error == 0)
 1971                 error = copyout(auio->uio_iov, uap->iovp,
 1972                     uap->iovcnt * sizeof (struct iovec));
 1973         free(auio, M_IOV);
 1974         return (error);
 1975 }
 1976 
 1977 int
 1978 kern_jail_get(struct thread *td, struct uio *optuio, int flags)
 1979 {
 1980         struct prison *pr, *mypr;
 1981         struct vfsopt *opt;
 1982         struct vfsoptlist *opts;
 1983         char *errmsg, *name;
 1984         int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos;
 1985 
 1986         if (flags & ~JAIL_GET_MASK)
 1987                 return (EINVAL);
 1988 
 1989         /* Get the parameter list. */
 1990         error = vfs_buildopts(optuio, &opts);
 1991         if (error)
 1992                 return (error);
 1993         errmsg_pos = vfs_getopt_pos(opts, "errmsg");
 1994         mypr = td->td_ucred->cr_prison;
 1995 
 1996         /*
 1997          * Find the prison specified by one of: lastjid, jid, name.
 1998          */
 1999         sx_slock(&allprison_lock);
 2000         error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
 2001         if (error == 0) {
 2002                 TAILQ_FOREACH(pr, &allprison, pr_list) {
 2003                         if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
 2004                                 mtx_lock(&pr->pr_mtx);
 2005                                 if (pr->pr_ref > 0 &&
 2006                                     (pr->pr_uref > 0 || (flags & JAIL_DYING)))
 2007                                         break;
 2008                                 mtx_unlock(&pr->pr_mtx);
 2009                         }
 2010                 }
 2011                 if (pr != NULL)
 2012                         goto found_prison;
 2013                 error = ENOENT;
 2014                 vfs_opterror(opts, "no jail after %d", jid);
 2015                 goto done_unlock_list;
 2016         } else if (error != ENOENT)
 2017                 goto done_unlock_list;
 2018 
 2019         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 2020         if (error == 0) {
 2021                 if (jid != 0) {
 2022                         pr = prison_find_child(mypr, jid);
 2023                         if (pr != NULL) {
 2024                                 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 2025                                         mtx_unlock(&pr->pr_mtx);
 2026                                         error = ENOENT;
 2027                                         vfs_opterror(opts, "jail %d is dying",
 2028                                             jid);
 2029                                         goto done_unlock_list;
 2030                                 }
 2031                                 goto found_prison;
 2032                         }
 2033                         error = ENOENT;
 2034                         vfs_opterror(opts, "jail %d not found", jid);
 2035                         goto done_unlock_list;
 2036                 }
 2037         } else if (error != ENOENT)
 2038                 goto done_unlock_list;
 2039 
 2040         error = vfs_getopt(opts, "name", (void **)&name, &len);
 2041         if (error == 0) {
 2042                 if (len == 0 || name[len - 1] != '\0') {
 2043                         error = EINVAL;
 2044                         goto done_unlock_list;
 2045                 }
 2046                 pr = prison_find_name(mypr, name);
 2047                 if (pr != NULL) {
 2048                         if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 2049                                 mtx_unlock(&pr->pr_mtx);
 2050                                 error = ENOENT;
 2051                                 vfs_opterror(opts, "jail \"%s\" is dying",
 2052                                     name);
 2053                                 goto done_unlock_list;
 2054                         }
 2055                         goto found_prison;
 2056                 }
 2057                 error = ENOENT;
 2058                 vfs_opterror(opts, "jail \"%s\" not found", name);
 2059                 goto done_unlock_list;
 2060         } else if (error != ENOENT)
 2061                 goto done_unlock_list;
 2062 
 2063         vfs_opterror(opts, "no jail specified");
 2064         error = ENOENT;
 2065         goto done_unlock_list;
 2066 
 2067  found_prison:
 2068         /* Get the parameters of the prison. */
 2069         pr->pr_ref++;
 2070         locked = PD_LOCKED;
 2071         td->td_retval[0] = pr->pr_id;
 2072         error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
 2073         if (error != 0 && error != ENOENT)
 2074                 goto done_deref;
 2075         i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
 2076         error = vfs_setopt(opts, "parent", &i, sizeof(i));
 2077         if (error != 0 && error != ENOENT)
 2078                 goto done_deref;
 2079         error = vfs_setopts(opts, "name", prison_name(mypr, pr));
 2080         if (error != 0 && error != ENOENT)
 2081                 goto done_deref;
 2082         error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
 2083             sizeof(pr->pr_cpuset->cs_id));
 2084         if (error != 0 && error != ENOENT)
 2085                 goto done_deref;
 2086         error = vfs_setopts(opts, "path", prison_path(mypr, pr));
 2087         if (error != 0 && error != ENOENT)
 2088                 goto done_deref;
 2089 #ifdef INET
 2090         error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
 2091             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 2092         if (error != 0 && error != ENOENT)
 2093                 goto done_deref;
 2094 #endif
 2095 #ifdef INET6
 2096         error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
 2097             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 2098         if (error != 0 && error != ENOENT)
 2099                 goto done_deref;
 2100 #endif
 2101         error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
 2102             sizeof(pr->pr_securelevel));
 2103         if (error != 0 && error != ENOENT)
 2104                 goto done_deref;
 2105         error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
 2106             sizeof(pr->pr_childcount));
 2107         if (error != 0 && error != ENOENT)
 2108                 goto done_deref;
 2109         error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
 2110             sizeof(pr->pr_childmax));
 2111         if (error != 0 && error != ENOENT)
 2112                 goto done_deref;
 2113         error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
 2114         if (error != 0 && error != ENOENT)
 2115                 goto done_deref;
 2116         error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
 2117         if (error != 0 && error != ENOENT)
 2118                 goto done_deref;
 2119         error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
 2120         if (error != 0 && error != ENOENT)
 2121                 goto done_deref;
 2122 #ifdef COMPAT_FREEBSD32
 2123         if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 2124                 uint32_t hid32 = pr->pr_hostid;
 2125 
 2126                 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
 2127         } else
 2128 #endif
 2129         error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
 2130             sizeof(pr->pr_hostid));
 2131         if (error != 0 && error != ENOENT)
 2132                 goto done_deref;
 2133         error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
 2134             sizeof(pr->pr_enforce_statfs));
 2135         if (error != 0 && error != ENOENT)
 2136                 goto done_deref;
 2137         error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum,
 2138             sizeof(pr->pr_devfs_rsnum));
 2139         if (error != 0 && error != ENOENT)
 2140                 goto done_deref;
 2141         for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
 2142             fi++) {
 2143                 if (pr_flag_names[fi] == NULL)
 2144                         continue;
 2145                 i = (pr->pr_flags & (1 << fi)) ? 1 : 0;
 2146                 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i));
 2147                 if (error != 0 && error != ENOENT)
 2148                         goto done_deref;
 2149                 i = !i;
 2150                 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i));
 2151                 if (error != 0 && error != ENOENT)
 2152                         goto done_deref;
 2153         }
 2154         for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
 2155             fi++) {
 2156                 i = pr->pr_flags &
 2157                     (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 2158                 i = pr_flag_jailsys[fi].disable &&
 2159                       (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE
 2160                     : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW
 2161                     : JAIL_SYS_INHERIT;
 2162                 error =
 2163                     vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i));
 2164                 if (error != 0 && error != ENOENT)
 2165                         goto done_deref;
 2166         }
 2167         for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
 2168             fi++) {
 2169                 if (pr_allow_names[fi] == NULL)
 2170                         continue;
 2171                 i = (pr->pr_allow & (1 << fi)) ? 1 : 0;
 2172                 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i));
 2173                 if (error != 0 && error != ENOENT)
 2174                         goto done_deref;
 2175                 i = !i;
 2176                 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i));
 2177                 if (error != 0 && error != ENOENT)
 2178                         goto done_deref;
 2179         }
 2180         i = (pr->pr_uref == 0);
 2181         error = vfs_setopt(opts, "dying", &i, sizeof(i));
 2182         if (error != 0 && error != ENOENT)
 2183                 goto done_deref;
 2184         i = !i;
 2185         error = vfs_setopt(opts, "nodying", &i, sizeof(i));
 2186         if (error != 0 && error != ENOENT)
 2187                 goto done_deref;
 2188         error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate,
 2189             sizeof(pr->pr_osreldate));
 2190         if (error != 0 && error != ENOENT)
 2191                 goto done_deref;
 2192         error = vfs_setopts(opts, "osrelease", pr->pr_osrelease);
 2193         if (error != 0 && error != ENOENT)
 2194                 goto done_deref;
 2195 
 2196         /* Get the module parameters. */
 2197         mtx_unlock(&pr->pr_mtx);
 2198         locked = 0;
 2199         error = osd_jail_call(pr, PR_METHOD_GET, opts);
 2200         if (error)
 2201                 goto done_deref;
 2202         prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED);
 2203 
 2204         /* By now, all parameters should have been noted. */
 2205         TAILQ_FOREACH(opt, opts, link) {
 2206                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
 2207                         error = EINVAL;
 2208                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
 2209                         goto done_errmsg;
 2210                 }
 2211         }
 2212 
 2213         /* Write the fetched parameters back to userspace. */
 2214         error = 0;
 2215         TAILQ_FOREACH(opt, opts, link) {
 2216                 if (opt->pos >= 0 && opt->pos != errmsg_pos) {
 2217                         pos = 2 * opt->pos + 1;
 2218                         optuio->uio_iov[pos].iov_len = opt->len;
 2219                         if (opt->value != NULL) {
 2220                                 if (optuio->uio_segflg == UIO_SYSSPACE) {
 2221                                         bcopy(opt->value,
 2222                                             optuio->uio_iov[pos].iov_base,
 2223                                             opt->len);
 2224                                 } else {
 2225                                         error = copyout(opt->value,
 2226                                             optuio->uio_iov[pos].iov_base,
 2227                                             opt->len);
 2228                                         if (error)
 2229                                                 break;
 2230                                 }
 2231                         }
 2232                 }
 2233         }
 2234         goto done_errmsg;
 2235 
 2236  done_deref:
 2237         prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED);
 2238         goto done_errmsg;
 2239 
 2240  done_unlock_list:
 2241         sx_sunlock(&allprison_lock);
 2242  done_errmsg:
 2243         if (error && errmsg_pos >= 0) {
 2244                 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 2245                 errmsg_pos = 2 * errmsg_pos + 1;
 2246                 if (errmsg_len > 0) {
 2247                         if (optuio->uio_segflg == UIO_SYSSPACE)
 2248                                 bcopy(errmsg,
 2249                                     optuio->uio_iov[errmsg_pos].iov_base,
 2250                                     errmsg_len);
 2251                         else
 2252                                 copyout(errmsg,
 2253                                     optuio->uio_iov[errmsg_pos].iov_base,
 2254                                     errmsg_len);
 2255                 }
 2256         }
 2257         vfs_freeopts(opts);
 2258         return (error);
 2259 }
 2260 
 2261 
 2262 /*
 2263  * struct jail_remove_args {
 2264  *      int jid;
 2265  * };
 2266  */
 2267 int
 2268 sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 2269 {
 2270         struct prison *pr, *cpr, *lpr, *tpr;
 2271         int descend, error;
 2272 
 2273         error = priv_check(td, PRIV_JAIL_REMOVE);
 2274         if (error)
 2275                 return (error);
 2276 
 2277         sx_xlock(&allprison_lock);
 2278         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2279         if (pr == NULL) {
 2280                 sx_xunlock(&allprison_lock);
 2281                 return (EINVAL);
 2282         }
 2283 
 2284         /* Remove all descendants of this prison, then remove this prison. */
 2285         pr->pr_ref++;
 2286         pr->pr_flags |= PR_REMOVE;
 2287         if (!LIST_EMPTY(&pr->pr_children)) {
 2288                 mtx_unlock(&pr->pr_mtx);
 2289                 lpr = NULL;
 2290                 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 2291                         mtx_lock(&cpr->pr_mtx);
 2292                         if (cpr->pr_ref > 0) {
 2293                                 tpr = cpr;
 2294                                 cpr->pr_ref++;
 2295                                 cpr->pr_flags |= PR_REMOVE;
 2296                         } else {
 2297                                 /* Already removed - do not do it again. */
 2298                                 tpr = NULL;
 2299                         }
 2300                         mtx_unlock(&cpr->pr_mtx);
 2301                         if (lpr != NULL) {
 2302                                 mtx_lock(&lpr->pr_mtx);
 2303                                 prison_remove_one(lpr);
 2304                                 sx_xlock(&allprison_lock);
 2305                         }
 2306                         lpr = tpr;
 2307                 }
 2308                 if (lpr != NULL) {
 2309                         mtx_lock(&lpr->pr_mtx);
 2310                         prison_remove_one(lpr);
 2311                         sx_xlock(&allprison_lock);
 2312                 }
 2313                 mtx_lock(&pr->pr_mtx);
 2314         }
 2315         prison_remove_one(pr);
 2316         return (0);
 2317 }
 2318 
 2319 static void
 2320 prison_remove_one(struct prison *pr)
 2321 {
 2322         struct proc *p;
 2323         int deuref;
 2324 
 2325         /* If the prison was persistent, it is not anymore. */
 2326         deuref = 0;
 2327         if (pr->pr_flags & PR_PERSIST) {
 2328                 pr->pr_ref--;
 2329                 deuref = PD_DEUREF;
 2330                 pr->pr_flags &= ~PR_PERSIST;
 2331         }
 2332 
 2333         /*
 2334          * jail_remove added a reference.  If that's the only one, remove
 2335          * the prison now.
 2336          */
 2337         KASSERT(pr->pr_ref > 0,
 2338             ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
 2339         if (pr->pr_ref == 1) {
 2340                 prison_deref(pr,
 2341                     deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 2342                 return;
 2343         }
 2344 
 2345         mtx_unlock(&pr->pr_mtx);
 2346         sx_xunlock(&allprison_lock);
 2347         /*
 2348          * Kill all processes unfortunate enough to be attached to this prison.
 2349          */
 2350         sx_slock(&allproc_lock);
 2351         LIST_FOREACH(p, &allproc, p_list) {
 2352                 PROC_LOCK(p);
 2353                 if (p->p_state != PRS_NEW && p->p_ucred &&
 2354                     p->p_ucred->cr_prison == pr)
 2355                         kern_psignal(p, SIGKILL);
 2356                 PROC_UNLOCK(p);
 2357         }
 2358         sx_sunlock(&allproc_lock);
 2359         /* Remove the temporary reference added by jail_remove. */
 2360         prison_deref(pr, deuref | PD_DEREF);
 2361 }
 2362 
 2363 
 2364 /*
 2365  * struct jail_attach_args {
 2366  *      int jid;
 2367  * };
 2368  */
 2369 int
 2370 sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
 2371 {
 2372         struct prison *pr;
 2373         int error;
 2374 
 2375         error = priv_check(td, PRIV_JAIL_ATTACH);
 2376         if (error)
 2377                 return (error);
 2378 
 2379         sx_slock(&allprison_lock);
 2380         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2381         if (pr == NULL) {
 2382                 sx_sunlock(&allprison_lock);
 2383                 return (EINVAL);
 2384         }
 2385 
 2386         /*
 2387          * Do not allow a process to attach to a prison that is not
 2388          * considered to be "alive".
 2389          */
 2390         if (pr->pr_uref == 0) {
 2391                 mtx_unlock(&pr->pr_mtx);
 2392                 sx_sunlock(&allprison_lock);
 2393                 return (EINVAL);
 2394         }
 2395 
 2396         return (do_jail_attach(td, pr));
 2397 }
 2398 
 2399 static int
 2400 do_jail_attach(struct thread *td, struct prison *pr)
 2401 {
 2402         struct prison *ppr;
 2403         struct proc *p;
 2404         struct ucred *newcred, *oldcred;
 2405         int error;
 2406 
 2407         /*
 2408          * XXX: Note that there is a slight race here if two threads
 2409          * in the same privileged process attempt to attach to two
 2410          * different jails at the same time.  It is important for
 2411          * user processes not to do this, or they might end up with
 2412          * a process root from one prison, but attached to the jail
 2413          * of another.
 2414          */
 2415         pr->pr_ref++;
 2416         pr->pr_uref++;
 2417         mtx_unlock(&pr->pr_mtx);
 2418 
 2419         /* Let modules do whatever they need to prepare for attaching. */
 2420         error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
 2421         if (error) {
 2422                 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
 2423                 return (error);
 2424         }
 2425         sx_sunlock(&allprison_lock);
 2426 
 2427         /*
 2428          * Reparent the newly attached process to this jail.
 2429          */
 2430         ppr = td->td_ucred->cr_prison;
 2431         p = td->td_proc;
 2432         error = cpuset_setproc_update_set(p, pr->pr_cpuset);
 2433         if (error)
 2434                 goto e_revert_osd;
 2435 
 2436         vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
 2437         if ((error = change_dir(pr->pr_root, td)) != 0)
 2438                 goto e_unlock;
 2439 #ifdef MAC
 2440         if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
 2441                 goto e_unlock;
 2442 #endif
 2443         VOP_UNLOCK(pr->pr_root, 0);
 2444         if ((error = change_root(pr->pr_root, td)))
 2445                 goto e_revert_osd;
 2446 
 2447         newcred = crget();
 2448         PROC_LOCK(p);
 2449         oldcred = p->p_ucred;
 2450         setsugid(p);
 2451         crcopy(newcred, oldcred);
 2452         newcred->cr_prison = pr;
 2453         p->p_ucred = newcred;
 2454         PROC_UNLOCK(p);
 2455 #ifdef RACCT
 2456         racct_proc_ucred_changed(p, oldcred, newcred);
 2457 #endif
 2458         crfree(oldcred);
 2459         prison_deref(ppr, PD_DEREF | PD_DEUREF);
 2460         return (0);
 2461  e_unlock:
 2462         VOP_UNLOCK(pr->pr_root, 0);
 2463  e_revert_osd:
 2464         /* Tell modules this thread is still in its old jail after all. */
 2465         (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
 2466         prison_deref(pr, PD_DEREF | PD_DEUREF);
 2467         return (error);
 2468 }
 2469 
 2470 
 2471 /*
 2472  * Returns a locked prison instance, or NULL on failure.
 2473  */
 2474 struct prison *
 2475 prison_find(int prid)
 2476 {
 2477         struct prison *pr;
 2478 
 2479         sx_assert(&allprison_lock, SX_LOCKED);
 2480         TAILQ_FOREACH(pr, &allprison, pr_list) {
 2481                 if (pr->pr_id == prid) {
 2482                         mtx_lock(&pr->pr_mtx);
 2483                         if (pr->pr_ref > 0)
 2484                                 return (pr);
 2485                         mtx_unlock(&pr->pr_mtx);
 2486                 }
 2487         }
 2488         return (NULL);
 2489 }
 2490 
 2491 /*
 2492  * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
 2493  */
 2494 struct prison *
 2495 prison_find_child(struct prison *mypr, int prid)
 2496 {
 2497         struct prison *pr;
 2498         int descend;
 2499 
 2500         sx_assert(&allprison_lock, SX_LOCKED);
 2501         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2502                 if (pr->pr_id == prid) {
 2503                         mtx_lock(&pr->pr_mtx);
 2504                         if (pr->pr_ref > 0)
 2505                                 return (pr);
 2506                         mtx_unlock(&pr->pr_mtx);
 2507                 }
 2508         }
 2509         return (NULL);
 2510 }
 2511 
 2512 /*
 2513  * Look for the name relative to mypr.  Returns a locked prison or NULL.
 2514  */
 2515 struct prison *
 2516 prison_find_name(struct prison *mypr, const char *name)
 2517 {
 2518         struct prison *pr, *deadpr;
 2519         size_t mylen;
 2520         int descend;
 2521 
 2522         sx_assert(&allprison_lock, SX_LOCKED);
 2523         mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
 2524  again:
 2525         deadpr = NULL;
 2526         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2527                 if (!strcmp(pr->pr_name + mylen, name)) {
 2528                         mtx_lock(&pr->pr_mtx);
 2529                         if (pr->pr_ref > 0) {
 2530                                 if (pr->pr_uref > 0)
 2531                                         return (pr);
 2532                                 deadpr = pr;
 2533                         }
 2534                         mtx_unlock(&pr->pr_mtx);
 2535                 }
 2536         }
 2537         /* There was no valid prison - perhaps there was a dying one. */
 2538         if (deadpr != NULL) {
 2539                 mtx_lock(&deadpr->pr_mtx);
 2540                 if (deadpr->pr_ref == 0) {
 2541                         mtx_unlock(&deadpr->pr_mtx);
 2542                         goto again;
 2543                 }
 2544         }
 2545         return (deadpr);
 2546 }
 2547 
 2548 /*
 2549  * See if a prison has the specific flag set.
 2550  */
 2551 int
 2552 prison_flag(struct ucred *cred, unsigned flag)
 2553 {
 2554 
 2555         /* This is an atomic read, so no locking is necessary. */
 2556         return (cred->cr_prison->pr_flags & flag);
 2557 }
 2558 
 2559 int
 2560 prison_allow(struct ucred *cred, unsigned flag)
 2561 {
 2562 
 2563         /* This is an atomic read, so no locking is necessary. */
 2564         return (cred->cr_prison->pr_allow & flag);
 2565 }
 2566 
 2567 /*
 2568  * Remove a prison reference.  If that was the last reference, remove the
 2569  * prison itself - but not in this context in case there are locks held.
 2570  */
 2571 void
 2572 prison_free_locked(struct prison *pr)
 2573 {
 2574 
 2575         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2576         pr->pr_ref--;
 2577         if (pr->pr_ref == 0) {
 2578                 mtx_unlock(&pr->pr_mtx);
 2579                 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 2580                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 2581                 return;
 2582         }
 2583         mtx_unlock(&pr->pr_mtx);
 2584 }
 2585 
 2586 void
 2587 prison_free(struct prison *pr)
 2588 {
 2589 
 2590         mtx_lock(&pr->pr_mtx);
 2591         prison_free_locked(pr);
 2592 }
 2593 
 2594 static void
 2595 prison_complete(void *context, int pending)
 2596 {
 2597 
 2598         prison_deref((struct prison *)context, 0);
 2599 }
 2600 
 2601 /*
 2602  * Remove a prison reference (usually).  This internal version assumes no
 2603  * mutexes are held, except perhaps the prison itself.  If there are no more
 2604  * references, release and delist the prison.  On completion, the prison lock
 2605  * and the allprison lock are both unlocked.
 2606  */
 2607 static void
 2608 prison_deref(struct prison *pr, int flags)
 2609 {
 2610         struct prison *ppr, *tpr;
 2611 
 2612         if (!(flags & PD_LOCKED))
 2613                 mtx_lock(&pr->pr_mtx);
 2614         for (;;) {
 2615                 if (flags & PD_DEUREF) {
 2616                         pr->pr_uref--;
 2617                         KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
 2618                 }
 2619                 if (flags & PD_DEREF)
 2620                         pr->pr_ref--;
 2621                 /* If the prison still has references, nothing else to do. */
 2622                 if (pr->pr_ref > 0) {
 2623                         mtx_unlock(&pr->pr_mtx);
 2624                         if (flags & PD_LIST_SLOCKED)
 2625                                 sx_sunlock(&allprison_lock);
 2626                         else if (flags & PD_LIST_XLOCKED)
 2627                                 sx_xunlock(&allprison_lock);
 2628                         return;
 2629                 }
 2630 
 2631                 mtx_unlock(&pr->pr_mtx);
 2632                 if (flags & PD_LIST_SLOCKED) {
 2633                         if (!sx_try_upgrade(&allprison_lock)) {
 2634                                 sx_sunlock(&allprison_lock);
 2635                                 sx_xlock(&allprison_lock);
 2636                         }
 2637                 } else if (!(flags & PD_LIST_XLOCKED))
 2638                         sx_xlock(&allprison_lock);
 2639 
 2640                 TAILQ_REMOVE(&allprison, pr, pr_list);
 2641                 LIST_REMOVE(pr, pr_sibling);
 2642                 ppr = pr->pr_parent;
 2643                 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 2644                         tpr->pr_childcount--;
 2645                 sx_xunlock(&allprison_lock);
 2646 
 2647 #ifdef VIMAGE
 2648                 if (pr->pr_vnet != ppr->pr_vnet)
 2649                         vnet_destroy(pr->pr_vnet);
 2650 #endif
 2651                 if (pr->pr_root != NULL)
 2652                         vrele(pr->pr_root);
 2653                 mtx_destroy(&pr->pr_mtx);
 2654 #ifdef INET
 2655                 free(pr->pr_ip4, M_PRISON);
 2656 #endif
 2657 #ifdef INET6
 2658                 free(pr->pr_ip6, M_PRISON);
 2659 #endif
 2660                 if (pr->pr_cpuset != NULL)
 2661                         cpuset_rel(pr->pr_cpuset);
 2662                 osd_jail_exit(pr);
 2663 #ifdef RACCT
 2664                 if (racct_enable)
 2665                         prison_racct_detach(pr);
 2666 #endif
 2667                 free(pr, M_PRISON);
 2668 
 2669                 /* Removing a prison frees a reference on its parent. */
 2670                 pr = ppr;
 2671                 mtx_lock(&pr->pr_mtx);
 2672                 flags = PD_DEREF | PD_DEUREF;
 2673         }
 2674 }
 2675 
 2676 void
 2677 prison_hold_locked(struct prison *pr)
 2678 {
 2679 
 2680         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2681         KASSERT(pr->pr_ref > 0,
 2682             ("Trying to hold dead prison (jid=%d).", pr->pr_id));
 2683         pr->pr_ref++;
 2684 }
 2685 
 2686 void
 2687 prison_hold(struct prison *pr)
 2688 {
 2689 
 2690         mtx_lock(&pr->pr_mtx);
 2691         prison_hold_locked(pr);
 2692         mtx_unlock(&pr->pr_mtx);
 2693 }
 2694 
 2695 void
 2696 prison_proc_hold(struct prison *pr)
 2697 {
 2698 
 2699         mtx_lock(&pr->pr_mtx);
 2700         KASSERT(pr->pr_uref > 0,
 2701             ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
 2702         pr->pr_uref++;
 2703         mtx_unlock(&pr->pr_mtx);
 2704 }
 2705 
 2706 void
 2707 prison_proc_free(struct prison *pr)
 2708 {
 2709 
 2710         mtx_lock(&pr->pr_mtx);
 2711         KASSERT(pr->pr_uref > 0,
 2712             ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
 2713         prison_deref(pr, PD_DEUREF | PD_LOCKED);
 2714 }
 2715 
 2716 
 2717 #ifdef INET
 2718 /*
 2719  * Restrict a prison's IP address list with its parent's, possibly replacing
 2720  * it.  Return true if the replacement buffer was used (or would have been).
 2721  */
 2722 static int
 2723 prison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
 2724 {
 2725         int ii, ij, used;
 2726         struct prison *ppr;
 2727 
 2728         ppr = pr->pr_parent;
 2729         if (!(pr->pr_flags & PR_IP4_USER)) {
 2730                 /* This has no user settings, so just copy the parent's list. */
 2731                 if (pr->pr_ip4s < ppr->pr_ip4s) {
 2732                         /*
 2733                          * There's no room for the parent's list.  Use the
 2734                          * new list buffer, which is assumed to be big enough
 2735                          * (if it was passed).  If there's no buffer, try to
 2736                          * allocate one.
 2737                          */
 2738                         used = 1;
 2739                         if (newip4 == NULL) {
 2740                                 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
 2741                                     M_PRISON, M_NOWAIT);
 2742                                 if (newip4 != NULL)
 2743                                         used = 0;
 2744                         }
 2745                         if (newip4 != NULL) {
 2746                                 bcopy(ppr->pr_ip4, newip4,
 2747                                     ppr->pr_ip4s * sizeof(*newip4));
 2748                                 free(pr->pr_ip4, M_PRISON);
 2749                                 pr->pr_ip4 = newip4;
 2750                                 pr->pr_ip4s = ppr->pr_ip4s;
 2751                         }
 2752                         return (used);
 2753                 }
 2754                 pr->pr_ip4s = ppr->pr_ip4s;
 2755                 if (pr->pr_ip4s > 0)
 2756                         bcopy(ppr->pr_ip4, pr->pr_ip4,
 2757                             pr->pr_ip4s * sizeof(*newip4));
 2758                 else if (pr->pr_ip4 != NULL) {
 2759                         free(pr->pr_ip4, M_PRISON);
 2760                         pr->pr_ip4 = NULL;
 2761                 }
 2762         } else if (pr->pr_ip4s > 0) {
 2763                 /* Remove addresses that aren't in the parent. */
 2764                 for (ij = 0; ij < ppr->pr_ip4s; ij++)
 2765                         if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 2766                                 break;
 2767                 if (ij < ppr->pr_ip4s)
 2768                         ii = 1;
 2769                 else {
 2770                         bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
 2771                             --pr->pr_ip4s * sizeof(*pr->pr_ip4));
 2772                         ii = 0;
 2773                 }
 2774                 for (ij = 1; ii < pr->pr_ip4s; ) {
 2775                         if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
 2776                                 ii++;
 2777                                 continue;
 2778                         }
 2779                         switch (ij >= ppr->pr_ip4s ? -1 :
 2780                                 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
 2781                         case -1:
 2782                                 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
 2783                                     (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
 2784                                 break;
 2785                         case 0:
 2786                                 ii++;
 2787                                 ij++;
 2788                                 break;
 2789                         case 1:
 2790                                 ij++;
 2791                                 break;
 2792                         }
 2793                 }
 2794                 if (pr->pr_ip4s == 0) {
 2795                         pr->pr_flags |= PR_IP4_DISABLE;
 2796                         free(pr->pr_ip4, M_PRISON);
 2797                         pr->pr_ip4 = NULL;
 2798                 }
 2799         }
 2800         return (0);
 2801 }
 2802 
 2803 /*
 2804  * Pass back primary IPv4 address of this jail.
 2805  *
 2806  * If not restricted return success but do not alter the address.  Caller has
 2807  * to make sure to initialize it correctly (e.g. INADDR_ANY).
 2808  *
 2809  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
 2810  * Address returned in NBO.
 2811  */
 2812 int
 2813 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
 2814 {
 2815         struct prison *pr;
 2816 
 2817         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2818         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2819 
 2820         pr = cred->cr_prison;
 2821         if (!(pr->pr_flags & PR_IP4))
 2822                 return (0);
 2823         mtx_lock(&pr->pr_mtx);
 2824         if (!(pr->pr_flags & PR_IP4)) {
 2825                 mtx_unlock(&pr->pr_mtx);
 2826                 return (0);
 2827         }
 2828         if (pr->pr_ip4 == NULL) {
 2829                 mtx_unlock(&pr->pr_mtx);
 2830                 return (EAFNOSUPPORT);
 2831         }
 2832 
 2833         ia->s_addr = pr->pr_ip4[0].s_addr;
 2834         mtx_unlock(&pr->pr_mtx);
 2835         return (0);
 2836 }
 2837 
 2838 /*
 2839  * Return 1 if we should do proper source address selection or are not jailed.
 2840  * We will return 0 if we should bypass source address selection in favour
 2841  * of the primary jail IPv4 address. Only in this case *ia will be updated and
 2842  * returned in NBO.
 2843  * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
 2844  */
 2845 int
 2846 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
 2847 {
 2848         struct prison *pr;
 2849         struct in_addr lia;
 2850         int error;
 2851 
 2852         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2853         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2854 
 2855         if (!jailed(cred))
 2856                 return (1);
 2857 
 2858         pr = cred->cr_prison;
 2859         if (pr->pr_flags & PR_IP4_SADDRSEL)
 2860                 return (1);
 2861 
 2862         lia.s_addr = INADDR_ANY;
 2863         error = prison_get_ip4(cred, &lia);
 2864         if (error)
 2865                 return (error);
 2866         if (lia.s_addr == INADDR_ANY)
 2867                 return (1);
 2868 
 2869         ia->s_addr = lia.s_addr;
 2870         return (0);
 2871 }
 2872 
 2873 /*
 2874  * Return true if pr1 and pr2 have the same IPv4 address restrictions.
 2875  */
 2876 int
 2877 prison_equal_ip4(struct prison *pr1, struct prison *pr2)
 2878 {
 2879 
 2880         if (pr1 == pr2)
 2881                 return (1);
 2882 
 2883         /*
 2884          * No need to lock since the PR_IP4_USER flag can't be altered for
 2885          * existing prisons.
 2886          */
 2887         while (pr1 != &prison0 &&
 2888 #ifdef VIMAGE
 2889                !(pr1->pr_flags & PR_VNET) &&
 2890 #endif
 2891                !(pr1->pr_flags & PR_IP4_USER))
 2892                 pr1 = pr1->pr_parent;
 2893         while (pr2 != &prison0 &&
 2894 #ifdef VIMAGE
 2895                !(pr2->pr_flags & PR_VNET) &&
 2896 #endif
 2897                !(pr2->pr_flags & PR_IP4_USER))
 2898                 pr2 = pr2->pr_parent;
 2899         return (pr1 == pr2);
 2900 }
 2901 
 2902 /*
 2903  * Make sure our (source) address is set to something meaningful to this
 2904  * jail.
 2905  *
 2906  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
 2907  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 2908  * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
 2909  */
 2910 int
 2911 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
 2912 {
 2913         struct prison *pr;
 2914         struct in_addr ia0;
 2915         int error;
 2916 
 2917         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2918         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2919 
 2920         pr = cred->cr_prison;
 2921         if (!(pr->pr_flags & PR_IP4))
 2922                 return (0);
 2923         mtx_lock(&pr->pr_mtx);
 2924         if (!(pr->pr_flags & PR_IP4)) {
 2925                 mtx_unlock(&pr->pr_mtx);
 2926                 return (0);
 2927         }
 2928         if (pr->pr_ip4 == NULL) {
 2929                 mtx_unlock(&pr->pr_mtx);
 2930                 return (EAFNOSUPPORT);
 2931         }
 2932 
 2933         ia0.s_addr = ntohl(ia->s_addr);
 2934         if (ia0.s_addr == INADDR_LOOPBACK) {
 2935                 ia->s_addr = pr->pr_ip4[0].s_addr;
 2936                 mtx_unlock(&pr->pr_mtx);
 2937                 return (0);
 2938         }
 2939 
 2940         if (ia0.s_addr == INADDR_ANY) {
 2941                 /*
 2942                  * In case there is only 1 IPv4 address, bind directly.
 2943                  */
 2944                 if (pr->pr_ip4s == 1)
 2945                         ia->s_addr = pr->pr_ip4[0].s_addr;
 2946                 mtx_unlock(&pr->pr_mtx);
 2947                 return (0);
 2948         }
 2949 
 2950         error = _prison_check_ip4(pr, ia);
 2951         mtx_unlock(&pr->pr_mtx);
 2952         return (error);
 2953 }
 2954 
 2955 /*
 2956  * Rewrite destination address in case we will connect to loopback address.
 2957  *
 2958  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
 2959  * Address passed in in NBO and returned in NBO.
 2960  */
 2961 int
 2962 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
 2963 {
 2964         struct prison *pr;
 2965 
 2966         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2967         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2968 
 2969         pr = cred->cr_prison;
 2970         if (!(pr->pr_flags & PR_IP4))
 2971                 return (0);
 2972         mtx_lock(&pr->pr_mtx);
 2973         if (!(pr->pr_flags & PR_IP4)) {
 2974                 mtx_unlock(&pr->pr_mtx);
 2975                 return (0);
 2976         }
 2977         if (pr->pr_ip4 == NULL) {
 2978                 mtx_unlock(&pr->pr_mtx);
 2979                 return (EAFNOSUPPORT);
 2980         }
 2981 
 2982         if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
 2983                 ia->s_addr = pr->pr_ip4[0].s_addr;
 2984                 mtx_unlock(&pr->pr_mtx);
 2985                 return (0);
 2986         }
 2987 
 2988         /*
 2989          * Return success because nothing had to be changed.
 2990          */
 2991         mtx_unlock(&pr->pr_mtx);
 2992         return (0);
 2993 }
 2994 
 2995 /*
 2996  * Check if given address belongs to the jail referenced by cred/prison.
 2997  *
 2998  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
 2999  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 3000  * doesn't allow IPv4.  Address passed in in NBO.
 3001  */
 3002 static int
 3003 _prison_check_ip4(struct prison *pr, struct in_addr *ia)
 3004 {
 3005         int i, a, z, d;
 3006 
 3007         /*
 3008          * Check the primary IP.
 3009          */
 3010         if (pr->pr_ip4[0].s_addr == ia->s_addr)
 3011                 return (0);
 3012 
 3013         /*
 3014          * All the other IPs are sorted so we can do a binary search.
 3015          */
 3016         a = 0;
 3017         z = pr->pr_ip4s - 2;
 3018         while (a <= z) {
 3019                 i = (a + z) / 2;
 3020                 d = qcmp_v4(&pr->pr_ip4[i+1], ia);
 3021                 if (d > 0)
 3022                         z = i - 1;
 3023                 else if (d < 0)
 3024                         a = i + 1;
 3025                 else
 3026                         return (0);
 3027         }
 3028 
 3029         return (EADDRNOTAVAIL);
 3030 }
 3031 
 3032 int
 3033 prison_check_ip4(struct ucred *cred, struct in_addr *ia)
 3034 {
 3035         struct prison *pr;
 3036         int error;
 3037 
 3038         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3039         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 3040 
 3041         pr = cred->cr_prison;
 3042         if (!(pr->pr_flags & PR_IP4))
 3043                 return (0);
 3044         mtx_lock(&pr->pr_mtx);
 3045         if (!(pr->pr_flags & PR_IP4)) {
 3046                 mtx_unlock(&pr->pr_mtx);
 3047                 return (0);
 3048         }
 3049         if (pr->pr_ip4 == NULL) {
 3050                 mtx_unlock(&pr->pr_mtx);
 3051                 return (EAFNOSUPPORT);
 3052         }
 3053 
 3054         error = _prison_check_ip4(pr, ia);
 3055         mtx_unlock(&pr->pr_mtx);
 3056         return (error);
 3057 }
 3058 #endif
 3059 
 3060 #ifdef INET6
 3061 static int
 3062 prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6)
 3063 {
 3064         int ii, ij, used;
 3065         struct prison *ppr;
 3066 
 3067         ppr = pr->pr_parent;
 3068         if (!(pr->pr_flags & PR_IP6_USER)) {
 3069                 /* This has no user settings, so just copy the parent's list. */
 3070                 if (pr->pr_ip6s < ppr->pr_ip6s) {
 3071                         /*
 3072                          * There's no room for the parent's list.  Use the
 3073                          * new list buffer, which is assumed to be big enough
 3074                          * (if it was passed).  If there's no buffer, try to
 3075                          * allocate one.
 3076                          */
 3077                         used = 1;
 3078                         if (newip6 == NULL) {
 3079                                 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6),
 3080                                     M_PRISON, M_NOWAIT);
 3081                                 if (newip6 != NULL)
 3082                                         used = 0;
 3083                         }
 3084                         if (newip6 != NULL) {
 3085                                 bcopy(ppr->pr_ip6, newip6,
 3086                                     ppr->pr_ip6s * sizeof(*newip6));
 3087                                 free(pr->pr_ip6, M_PRISON);
 3088                                 pr->pr_ip6 = newip6;
 3089                                 pr->pr_ip6s = ppr->pr_ip6s;
 3090                         }
 3091                         return (used);
 3092                 }
 3093                 pr->pr_ip6s = ppr->pr_ip6s;
 3094                 if (pr->pr_ip6s > 0)
 3095                         bcopy(ppr->pr_ip6, pr->pr_ip6,
 3096                             pr->pr_ip6s * sizeof(*newip6));
 3097                 else if (pr->pr_ip6 != NULL) {
 3098                         free(pr->pr_ip6, M_PRISON);
 3099                         pr->pr_ip6 = NULL;
 3100                 }
 3101         } else if (pr->pr_ip6s > 0) {
 3102                 /* Remove addresses that aren't in the parent. */
 3103                 for (ij = 0; ij < ppr->pr_ip6s; ij++)
 3104                         if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0],
 3105                             &ppr->pr_ip6[ij]))
 3106                                 break;
 3107                 if (ij < ppr->pr_ip6s)
 3108                         ii = 1;
 3109                 else {
 3110                         bcopy(pr->pr_ip6 + 1, pr->pr_ip6,
 3111                             --pr->pr_ip6s * sizeof(*pr->pr_ip6));
 3112                         ii = 0;
 3113                 }
 3114                 for (ij = 1; ii < pr->pr_ip6s; ) {
 3115                         if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii],
 3116                             &ppr->pr_ip6[0])) {
 3117                                 ii++;
 3118                                 continue;
 3119                         }
 3120                         switch (ij >= ppr->pr_ip6s ? -1 :
 3121                                 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) {
 3122                         case -1:
 3123                                 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii,
 3124                                     (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6));
 3125                                 break;
 3126                         case 0:
 3127                                 ii++;
 3128                                 ij++;
 3129                                 break;
 3130                         case 1:
 3131                                 ij++;
 3132                                 break;
 3133                         }
 3134                 }
 3135                 if (pr->pr_ip6s == 0) {
 3136                         pr->pr_flags |= PR_IP6_DISABLE;
 3137                         free(pr->pr_ip6, M_PRISON);
 3138                         pr->pr_ip6 = NULL;
 3139                 }
 3140         }
 3141         return 0;
 3142 }
 3143 
 3144 /*
 3145  * Pass back primary IPv6 address for this jail.
 3146  *
 3147  * If not restricted return success but do not alter the address.  Caller has
 3148  * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT).
 3149  *
 3150  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
 3151  */
 3152 int
 3153 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
 3154 {
 3155         struct prison *pr;
 3156 
 3157         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3158         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3159 
 3160         pr = cred->cr_prison;
 3161         if (!(pr->pr_flags & PR_IP6))
 3162                 return (0);
 3163         mtx_lock(&pr->pr_mtx);
 3164         if (!(pr->pr_flags & PR_IP6)) {
 3165                 mtx_unlock(&pr->pr_mtx);
 3166                 return (0);
 3167         }
 3168         if (pr->pr_ip6 == NULL) {
 3169                 mtx_unlock(&pr->pr_mtx);
 3170                 return (EAFNOSUPPORT);
 3171         }
 3172 
 3173         bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3174         mtx_unlock(&pr->pr_mtx);
 3175         return (0);
 3176 }
 3177 
 3178 /*
 3179  * Return 1 if we should do proper source address selection or are not jailed.
 3180  * We will return 0 if we should bypass source address selection in favour
 3181  * of the primary jail IPv6 address. Only in this case *ia will be updated and
 3182  * returned in NBO.
 3183  * Return EAFNOSUPPORT, in case this jail does not allow IPv6.
 3184  */
 3185 int
 3186 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6)
 3187 {
 3188         struct prison *pr;
 3189         struct in6_addr lia6;
 3190         int error;
 3191 
 3192         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3193         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3194 
 3195         if (!jailed(cred))
 3196                 return (1);
 3197 
 3198         pr = cred->cr_prison;
 3199         if (pr->pr_flags & PR_IP6_SADDRSEL)
 3200                 return (1);
 3201 
 3202         lia6 = in6addr_any;
 3203         error = prison_get_ip6(cred, &lia6);
 3204         if (error)
 3205                 return (error);
 3206         if (IN6_IS_ADDR_UNSPECIFIED(&lia6))
 3207                 return (1);
 3208 
 3209         bcopy(&lia6, ia6, sizeof(struct in6_addr));
 3210         return (0);
 3211 }
 3212 
 3213 /*
 3214  * Return true if pr1 and pr2 have the same IPv6 address restrictions.
 3215  */
 3216 int
 3217 prison_equal_ip6(struct prison *pr1, struct prison *pr2)
 3218 {
 3219 
 3220         if (pr1 == pr2)
 3221                 return (1);
 3222 
 3223         while (pr1 != &prison0 &&
 3224 #ifdef VIMAGE
 3225                !(pr1->pr_flags & PR_VNET) &&
 3226 #endif
 3227                !(pr1->pr_flags & PR_IP6_USER))
 3228                 pr1 = pr1->pr_parent;
 3229         while (pr2 != &prison0 &&
 3230 #ifdef VIMAGE
 3231                !(pr2->pr_flags & PR_VNET) &&
 3232 #endif
 3233                !(pr2->pr_flags & PR_IP6_USER))
 3234                 pr2 = pr2->pr_parent;
 3235         return (pr1 == pr2);
 3236 }
 3237 
 3238 /*
 3239  * Make sure our (source) address is set to something meaningful to this jail.
 3240  *
 3241  * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
 3242  * when needed while binding.
 3243  *
 3244  * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
 3245  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 3246  * doesn't allow IPv6.
 3247  */
 3248 int
 3249 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
 3250 {
 3251         struct prison *pr;
 3252         int error;
 3253 
 3254         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3255         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3256 
 3257         pr = cred->cr_prison;
 3258         if (!(pr->pr_flags & PR_IP6))
 3259                 return (0);
 3260         mtx_lock(&pr->pr_mtx);
 3261         if (!(pr->pr_flags & PR_IP6)) {
 3262                 mtx_unlock(&pr->pr_mtx);
 3263                 return (0);
 3264         }
 3265         if (pr->pr_ip6 == NULL) {
 3266                 mtx_unlock(&pr->pr_mtx);
 3267                 return (EAFNOSUPPORT);
 3268         }
 3269 
 3270         if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 3271                 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3272                 mtx_unlock(&pr->pr_mtx);
 3273                 return (0);
 3274         }
 3275 
 3276         if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
 3277                 /*
 3278                  * In case there is only 1 IPv6 address, and v6only is true,
 3279                  * then bind directly.
 3280                  */
 3281                 if (v6only != 0 && pr->pr_ip6s == 1)
 3282                         bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3283                 mtx_unlock(&pr->pr_mtx);
 3284                 return (0);
 3285         }
 3286 
 3287         error = _prison_check_ip6(pr, ia6);
 3288         mtx_unlock(&pr->pr_mtx);
 3289         return (error);
 3290 }
 3291 
 3292 /*
 3293  * Rewrite destination address in case we will connect to loopback address.
 3294  *
 3295  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
 3296  */
 3297 int
 3298 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
 3299 {
 3300         struct prison *pr;
 3301 
 3302         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3303         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3304 
 3305         pr = cred->cr_prison;
 3306         if (!(pr->pr_flags & PR_IP6))
 3307                 return (0);
 3308         mtx_lock(&pr->pr_mtx);
 3309         if (!(pr->pr_flags & PR_IP6)) {
 3310                 mtx_unlock(&pr->pr_mtx);
 3311                 return (0);
 3312         }
 3313         if (pr->pr_ip6 == NULL) {
 3314                 mtx_unlock(&pr->pr_mtx);
 3315                 return (EAFNOSUPPORT);
 3316         }
 3317 
 3318         if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 3319                 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3320                 mtx_unlock(&pr->pr_mtx);
 3321                 return (0);
 3322         }
 3323 
 3324         /*
 3325          * Return success because nothing had to be changed.
 3326          */
 3327         mtx_unlock(&pr->pr_mtx);
 3328         return (0);
 3329 }
 3330 
 3331 /*
 3332  * Check if given address belongs to the jail referenced by cred/prison.
 3333  *
 3334  * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
 3335  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 3336  * doesn't allow IPv6.
 3337  */
 3338 static int
 3339 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
 3340 {
 3341         int i, a, z, d;
 3342 
 3343         /*
 3344          * Check the primary IP.
 3345          */
 3346         if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
 3347                 return (0);
 3348 
 3349         /*
 3350          * All the other IPs are sorted so we can do a binary search.
 3351          */
 3352         a = 0;
 3353         z = pr->pr_ip6s - 2;
 3354         while (a <= z) {
 3355                 i = (a + z) / 2;
 3356                 d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
 3357                 if (d > 0)
 3358                         z = i - 1;
 3359                 else if (d < 0)
 3360                         a = i + 1;
 3361                 else
 3362                         return (0);
 3363         }
 3364 
 3365         return (EADDRNOTAVAIL);
 3366 }
 3367 
 3368 int
 3369 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
 3370 {
 3371         struct prison *pr;
 3372         int error;
 3373 
 3374         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3375         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3376 
 3377         pr = cred->cr_prison;
 3378         if (!(pr->pr_flags & PR_IP6))
 3379                 return (0);
 3380         mtx_lock(&pr->pr_mtx);
 3381         if (!(pr->pr_flags & PR_IP6)) {
 3382                 mtx_unlock(&pr->pr_mtx);
 3383                 return (0);
 3384         }
 3385         if (pr->pr_ip6 == NULL) {
 3386                 mtx_unlock(&pr->pr_mtx);
 3387                 return (EAFNOSUPPORT);
 3388         }
 3389 
 3390         error = _prison_check_ip6(pr, ia6);
 3391         mtx_unlock(&pr->pr_mtx);
 3392         return (error);
 3393 }
 3394 #endif
 3395 
 3396 /*
 3397  * Check if a jail supports the given address family.
 3398  *
 3399  * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
 3400  * if not.
 3401  */
 3402 int
 3403 prison_check_af(struct ucred *cred, int af)
 3404 {
 3405         struct prison *pr;
 3406         int error;
 3407 
 3408         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3409 
 3410         pr = cred->cr_prison;
 3411 #ifdef VIMAGE
 3412         /* Prisons with their own network stack are not limited. */
 3413         if (prison_owns_vnet(cred))
 3414                 return (0);
 3415 #endif
 3416 
 3417         error = 0;
 3418         switch (af)
 3419         {
 3420 #ifdef INET
 3421         case AF_INET:
 3422                 if (pr->pr_flags & PR_IP4)
 3423                 {
 3424                         mtx_lock(&pr->pr_mtx);
 3425                         if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
 3426                                 error = EAFNOSUPPORT;
 3427                         mtx_unlock(&pr->pr_mtx);
 3428                 }
 3429                 break;
 3430 #endif
 3431 #ifdef INET6
 3432         case AF_INET6:
 3433                 if (pr->pr_flags & PR_IP6)
 3434                 {
 3435                         mtx_lock(&pr->pr_mtx);
 3436                         if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
 3437                                 error = EAFNOSUPPORT;
 3438                         mtx_unlock(&pr->pr_mtx);
 3439                 }
 3440                 break;
 3441 #endif
 3442         case AF_LOCAL:
 3443         case AF_ROUTE:
 3444                 break;
 3445         default:
 3446                 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
 3447                         error = EAFNOSUPPORT;
 3448         }
 3449         return (error);
 3450 }
 3451 
 3452 /*
 3453  * Check if given address belongs to the jail referenced by cred (wrapper to
 3454  * prison_check_ip[46]).
 3455  *
 3456  * Returns 0 if jail doesn't restrict the address family or if address belongs
 3457  * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
 3458  * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
 3459  */
 3460 int
 3461 prison_if(struct ucred *cred, struct sockaddr *sa)
 3462 {
 3463 #ifdef INET
 3464         struct sockaddr_in *sai;
 3465 #endif
 3466 #ifdef INET6
 3467         struct sockaddr_in6 *sai6;
 3468 #endif
 3469         int error;
 3470 
 3471         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3472         KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 3473 
 3474 #ifdef VIMAGE
 3475         if (prison_owns_vnet(cred))
 3476                 return (0);
 3477 #endif
 3478 
 3479         error = 0;
 3480         switch (sa->sa_family)
 3481         {
 3482 #ifdef INET
 3483         case AF_INET:
 3484                 sai = (struct sockaddr_in *)sa;
 3485                 error = prison_check_ip4(cred, &sai->sin_addr);
 3486                 break;
 3487 #endif
 3488 #ifdef INET6
 3489         case AF_INET6:
 3490                 sai6 = (struct sockaddr_in6 *)sa;
 3491                 error = prison_check_ip6(cred, &sai6->sin6_addr);
 3492                 break;
 3493 #endif
 3494         default:
 3495                 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
 3496                         error = EAFNOSUPPORT;
 3497         }
 3498         return (error);
 3499 }
 3500 
 3501 /*
 3502  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
 3503  */
 3504 int
 3505 prison_check(struct ucred *cred1, struct ucred *cred2)
 3506 {
 3507 
 3508         return ((cred1->cr_prison == cred2->cr_prison ||
 3509             prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
 3510 }
 3511 
 3512 /*
 3513  * Return 1 if p2 is a child of p1, otherwise 0.
 3514  */
 3515 int
 3516 prison_ischild(struct prison *pr1, struct prison *pr2)
 3517 {
 3518 
 3519         for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
 3520                 if (pr1 == pr2)
 3521                         return (1);
 3522         return (0);
 3523 }
 3524 
 3525 /*
 3526  * Return 1 if the passed credential is in a jail, otherwise 0.
 3527  */
 3528 int
 3529 jailed(struct ucred *cred)
 3530 {
 3531 
 3532         return (cred->cr_prison != &prison0);
 3533 }
 3534 
 3535 /*
 3536  * Return 1 if the passed credential is in a jail and that jail does not
 3537  * have its own virtual network stack, otherwise 0.
 3538  */
 3539 int
 3540 jailed_without_vnet(struct ucred *cred)
 3541 {
 3542 
 3543         if (!jailed(cred))
 3544                 return (0);
 3545 #ifdef VIMAGE
 3546         if (prison_owns_vnet(cred))
 3547                 return (0);
 3548 #endif
 3549 
 3550         return (1);
 3551 }
 3552 
 3553 /*
 3554  * Return the correct hostname (domainname, et al) for the passed credential.
 3555  */
 3556 void
 3557 getcredhostname(struct ucred *cred, char *buf, size_t size)
 3558 {
 3559         struct prison *pr;
 3560 
 3561         /*
 3562          * A NULL credential can be used to shortcut to the physical
 3563          * system's hostname.
 3564          */
 3565         pr = (cred != NULL) ? cred->cr_prison : &prison0;
 3566         mtx_lock(&pr->pr_mtx);
 3567         strlcpy(buf, pr->pr_hostname, size);
 3568         mtx_unlock(&pr->pr_mtx);
 3569 }
 3570 
 3571 void
 3572 getcreddomainname(struct ucred *cred, char *buf, size_t size)
 3573 {
 3574 
 3575         mtx_lock(&cred->cr_prison->pr_mtx);
 3576         strlcpy(buf, cred->cr_prison->pr_domainname, size);
 3577         mtx_unlock(&cred->cr_prison->pr_mtx);
 3578 }
 3579 
 3580 void
 3581 getcredhostuuid(struct ucred *cred, char *buf, size_t size)
 3582 {
 3583 
 3584         mtx_lock(&cred->cr_prison->pr_mtx);
 3585         strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
 3586         mtx_unlock(&cred->cr_prison->pr_mtx);
 3587 }
 3588 
 3589 void
 3590 getcredhostid(struct ucred *cred, unsigned long *hostid)
 3591 {
 3592 
 3593         mtx_lock(&cred->cr_prison->pr_mtx);
 3594         *hostid = cred->cr_prison->pr_hostid;
 3595         mtx_unlock(&cred->cr_prison->pr_mtx);
 3596 }
 3597 
 3598 #ifdef VIMAGE
 3599 /*
 3600  * Determine whether the prison represented by cred owns
 3601  * its vnet rather than having it inherited.
 3602  *
 3603  * Returns 1 in case the prison owns the vnet, 0 otherwise.
 3604  */
 3605 int
 3606 prison_owns_vnet(struct ucred *cred)
 3607 {
 3608 
 3609         /*
 3610          * vnets cannot be added/removed after jail creation,
 3611          * so no need to lock here.
 3612          */
 3613         return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
 3614 }
 3615 #endif
 3616 
 3617 /*
 3618  * Determine whether the subject represented by cred can "see"
 3619  * status of a mount point.
 3620  * Returns: 0 for permitted, ENOENT otherwise.
 3621  * XXX: This function should be called cr_canseemount() and should be
 3622  *      placed in kern_prot.c.
 3623  */
 3624 int
 3625 prison_canseemount(struct ucred *cred, struct mount *mp)
 3626 {
 3627         struct prison *pr;
 3628         struct statfs *sp;
 3629         size_t len;
 3630 
 3631         pr = cred->cr_prison;
 3632         if (pr->pr_enforce_statfs == 0)
 3633                 return (0);
 3634         if (pr->pr_root->v_mount == mp)
 3635                 return (0);
 3636         if (pr->pr_enforce_statfs == 2)
 3637                 return (ENOENT);
 3638         /*
 3639          * If jail's chroot directory is set to "/" we should be able to see
 3640          * all mount-points from inside a jail.
 3641          * This is ugly check, but this is the only situation when jail's
 3642          * directory ends with '/'.
 3643          */
 3644         if (strcmp(pr->pr_path, "/") == 0)
 3645                 return (0);
 3646         len = strlen(pr->pr_path);
 3647         sp = &mp->mnt_stat;
 3648         if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
 3649                 return (ENOENT);
 3650         /*
 3651          * Be sure that we don't have situation where jail's root directory
 3652          * is "/some/path" and mount point is "/some/pathpath".
 3653          */
 3654         if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
 3655                 return (ENOENT);
 3656         return (0);
 3657 }
 3658 
 3659 void
 3660 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
 3661 {
 3662         char jpath[MAXPATHLEN];
 3663         struct prison *pr;
 3664         size_t len;
 3665 
 3666         pr = cred->cr_prison;
 3667         if (pr->pr_enforce_statfs == 0)
 3668                 return;
 3669         if (prison_canseemount(cred, mp) != 0) {
 3670                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3671                 strlcpy(sp->f_mntonname, "[restricted]",
 3672                     sizeof(sp->f_mntonname));
 3673                 return;
 3674         }
 3675         if (pr->pr_root->v_mount == mp) {
 3676                 /*
 3677                  * Clear current buffer data, so we are sure nothing from
 3678                  * the valid path left there.
 3679                  */
 3680                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3681                 *sp->f_mntonname = '/';
 3682                 return;
 3683         }
 3684         /*
 3685          * If jail's chroot directory is set to "/" we should be able to see
 3686          * all mount-points from inside a jail.
 3687          */
 3688         if (strcmp(pr->pr_path, "/") == 0)
 3689                 return;
 3690         len = strlen(pr->pr_path);
 3691         strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
 3692         /*
 3693          * Clear current buffer data, so we are sure nothing from
 3694          * the valid path left there.
 3695          */
 3696         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3697         if (*jpath == '\0') {
 3698                 /* Should never happen. */
 3699                 *sp->f_mntonname = '/';
 3700         } else {
 3701                 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
 3702         }
 3703 }
 3704 
 3705 /*
 3706  * Check with permission for a specific privilege is granted within jail.  We
 3707  * have a specific list of accepted privileges; the rest are denied.
 3708  */
 3709 int
 3710 prison_priv_check(struct ucred *cred, int priv)
 3711 {
 3712 
 3713         if (!jailed(cred))
 3714                 return (0);
 3715 
 3716 #ifdef VIMAGE
 3717         /*
 3718          * Privileges specific to prisons with a virtual network stack.
 3719          * There might be a duplicate entry here in case the privilege
 3720          * is only granted conditionally in the legacy jail case.
 3721          */
 3722         switch (priv) {
 3723 #ifdef notyet
 3724                 /*
 3725                  * NFS-specific privileges.
 3726                  */
 3727         case PRIV_NFS_DAEMON:
 3728         case PRIV_NFS_LOCKD:
 3729 #endif
 3730                 /*
 3731                  * Network stack privileges.
 3732                  */
 3733         case PRIV_NET_BRIDGE:
 3734         case PRIV_NET_GRE:
 3735         case PRIV_NET_BPF:
 3736         case PRIV_NET_RAW:              /* Dup, cond. in legacy jail case. */
 3737         case PRIV_NET_ROUTE:
 3738         case PRIV_NET_TAP:
 3739         case PRIV_NET_SETIFMTU:
 3740         case PRIV_NET_SETIFFLAGS:
 3741         case PRIV_NET_SETIFCAP:
 3742         case PRIV_NET_SETIFDESCR:
 3743         case PRIV_NET_SETIFNAME :
 3744         case PRIV_NET_SETIFMETRIC:
 3745         case PRIV_NET_SETIFPHYS:
 3746         case PRIV_NET_SETIFMAC:
 3747         case PRIV_NET_ADDMULTI:
 3748         case PRIV_NET_DELMULTI:
 3749         case PRIV_NET_HWIOCTL:
 3750         case PRIV_NET_SETLLADDR:
 3751         case PRIV_NET_ADDIFGROUP:
 3752         case PRIV_NET_DELIFGROUP:
 3753         case PRIV_NET_IFCREATE:
 3754         case PRIV_NET_IFDESTROY:
 3755         case PRIV_NET_ADDIFADDR:
 3756         case PRIV_NET_DELIFADDR:
 3757         case PRIV_NET_LAGG:
 3758         case PRIV_NET_GIF:
 3759         case PRIV_NET_SETIFVNET:
 3760         case PRIV_NET_SETIFFIB:
 3761 
 3762                 /*
 3763                  * 802.11-related privileges.
 3764                  */
 3765         case PRIV_NET80211_GETKEY:
 3766 #ifdef notyet
 3767         case PRIV_NET80211_MANAGE:              /* XXX-BZ discuss with sam@ */
 3768 #endif
 3769 
 3770 #ifdef notyet
 3771                 /*
 3772                  * AppleTalk privileges.
 3773                  */
 3774         case PRIV_NETATALK_RESERVEDPORT:
 3775 
 3776                 /*
 3777                  * ATM privileges.
 3778                  */
 3779         case PRIV_NETATM_CFG:
 3780         case PRIV_NETATM_ADD:
 3781         case PRIV_NETATM_DEL:
 3782         case PRIV_NETATM_SET:
 3783 
 3784                 /*
 3785                  * Bluetooth privileges.
 3786                  */
 3787         case PRIV_NETBLUETOOTH_RAW:
 3788 #endif
 3789 
 3790                 /*
 3791                  * Netgraph and netgraph module privileges.
 3792                  */
 3793         case PRIV_NETGRAPH_CONTROL:
 3794 #ifdef notyet
 3795         case PRIV_NETGRAPH_TTY:
 3796 #endif
 3797 
 3798                 /*
 3799                  * IPv4 and IPv6 privileges.
 3800                  */
 3801         case PRIV_NETINET_IPFW:
 3802         case PRIV_NETINET_DIVERT:
 3803         case PRIV_NETINET_PF:
 3804         case PRIV_NETINET_DUMMYNET:
 3805         case PRIV_NETINET_CARP:
 3806         case PRIV_NETINET_MROUTE:
 3807         case PRIV_NETINET_RAW:
 3808         case PRIV_NETINET_ADDRCTRL6:
 3809         case PRIV_NETINET_ND6:
 3810         case PRIV_NETINET_SCOPE6:
 3811         case PRIV_NETINET_ALIFETIME6:
 3812         case PRIV_NETINET_IPSEC:
 3813         case PRIV_NETINET_BINDANY:
 3814 
 3815 #ifdef notyet
 3816                 /*
 3817                  * IPX/SPX privileges.
 3818                  */
 3819         case PRIV_NETIPX_RESERVEDPORT:
 3820         case PRIV_NETIPX_RAW:
 3821 
 3822                 /*
 3823                  * NCP privileges.
 3824                  */
 3825         case PRIV_NETNCP:
 3826 
 3827                 /*
 3828                  * SMB privileges.
 3829                  */
 3830         case PRIV_NETSMB:
 3831 #endif
 3832 
 3833         /*
 3834          * No default: or deny here.
 3835          * In case of no permit fall through to next switch().
 3836          */
 3837                 if (cred->cr_prison->pr_flags & PR_VNET)
 3838                         return (0);
 3839         }
 3840 #endif /* VIMAGE */
 3841 
 3842         switch (priv) {
 3843 
 3844                 /*
 3845                  * Allow ktrace privileges for root in jail.
 3846                  */
 3847         case PRIV_KTRACE:
 3848 
 3849 #if 0
 3850                 /*
 3851                  * Allow jailed processes to configure audit identity and
 3852                  * submit audit records (login, etc).  In the future we may
 3853                  * want to further refine the relationship between audit and
 3854                  * jail.
 3855                  */
 3856         case PRIV_AUDIT_GETAUDIT:
 3857         case PRIV_AUDIT_SETAUDIT:
 3858         case PRIV_AUDIT_SUBMIT:
 3859 #endif
 3860 
 3861                 /*
 3862                  * Allow jailed processes to manipulate process UNIX
 3863                  * credentials in any way they see fit.
 3864                  */
 3865         case PRIV_CRED_SETUID:
 3866         case PRIV_CRED_SETEUID:
 3867         case PRIV_CRED_SETGID:
 3868         case PRIV_CRED_SETEGID:
 3869         case PRIV_CRED_SETGROUPS:
 3870         case PRIV_CRED_SETREUID:
 3871         case PRIV_CRED_SETREGID:
 3872         case PRIV_CRED_SETRESUID:
 3873         case PRIV_CRED_SETRESGID:
 3874 
 3875                 /*
 3876                  * Jail implements visibility constraints already, so allow
 3877                  * jailed root to override uid/gid-based constraints.
 3878                  */
 3879         case PRIV_SEEOTHERGIDS:
 3880         case PRIV_SEEOTHERUIDS:
 3881 
 3882                 /*
 3883                  * Jail implements inter-process debugging limits already, so
 3884                  * allow jailed root various debugging privileges.
 3885                  */
 3886         case PRIV_DEBUG_DIFFCRED:
 3887         case PRIV_DEBUG_SUGID:
 3888         case PRIV_DEBUG_UNPRIV:
 3889 
 3890                 /*
 3891                  * Allow jail to set various resource limits and login
 3892                  * properties, and for now, exceed process resource limits.
 3893                  */
 3894         case PRIV_PROC_LIMIT:
 3895         case PRIV_PROC_SETLOGIN:
 3896         case PRIV_PROC_SETRLIMIT:
 3897 
 3898                 /*
 3899                  * System V and POSIX IPC privileges are granted in jail.
 3900                  */
 3901         case PRIV_IPC_READ:
 3902         case PRIV_IPC_WRITE:
 3903         case PRIV_IPC_ADMIN:
 3904         case PRIV_IPC_MSGSIZE:
 3905         case PRIV_MQ_ADMIN:
 3906 
 3907                 /*
 3908                  * Jail operations within a jail work on child jails.
 3909                  */
 3910         case PRIV_JAIL_ATTACH:
 3911         case PRIV_JAIL_SET:
 3912         case PRIV_JAIL_REMOVE:
 3913 
 3914                 /*
 3915                  * Jail implements its own inter-process limits, so allow
 3916                  * root processes in jail to change scheduling on other
 3917                  * processes in the same jail.  Likewise for signalling.
 3918                  */
 3919         case PRIV_SCHED_DIFFCRED:
 3920         case PRIV_SCHED_CPUSET:
 3921         case PRIV_SIGNAL_DIFFCRED:
 3922         case PRIV_SIGNAL_SUGID:
 3923 
 3924                 /*
 3925                  * Allow jailed processes to write to sysctls marked as jail
 3926                  * writable.
 3927                  */
 3928         case PRIV_SYSCTL_WRITEJAIL:
 3929 
 3930                 /*
 3931                  * Allow root in jail to manage a variety of quota
 3932                  * properties.  These should likely be conditional on a
 3933                  * configuration option.
 3934                  */
 3935         case PRIV_VFS_GETQUOTA:
 3936         case PRIV_VFS_SETQUOTA:
 3937 
 3938                 /*
 3939                  * Since Jail relies on chroot() to implement file system
 3940                  * protections, grant many VFS privileges to root in jail.
 3941                  * Be careful to exclude mount-related and NFS-related
 3942                  * privileges.
 3943                  */
 3944         case PRIV_VFS_READ:
 3945         case PRIV_VFS_WRITE:
 3946         case PRIV_VFS_ADMIN:
 3947         case PRIV_VFS_EXEC:
 3948         case PRIV_VFS_LOOKUP:
 3949         case PRIV_VFS_BLOCKRESERVE:     /* XXXRW: Slightly surprising. */
 3950         case PRIV_VFS_CHFLAGS_DEV:
 3951         case PRIV_VFS_CHOWN:
 3952         case PRIV_VFS_CHROOT:
 3953         case PRIV_VFS_RETAINSUGID:
 3954         case PRIV_VFS_FCHROOT:
 3955         case PRIV_VFS_LINK:
 3956         case PRIV_VFS_SETGID:
 3957         case PRIV_VFS_STAT:
 3958         case PRIV_VFS_STICKYFILE:
 3959 
 3960                 /*
 3961                  * As in the non-jail case, non-root users are expected to be
 3962                  * able to read kernel/phyiscal memory (provided /dev/[k]mem
 3963                  * exists in the jail and they have permission to access it).
 3964                  */
 3965         case PRIV_KMEM_READ:
 3966                 return (0);
 3967 
 3968                 /*
 3969                  * Depending on the global setting, allow privilege of
 3970                  * setting system flags.
 3971                  */
 3972         case PRIV_VFS_SYSFLAGS:
 3973                 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
 3974                         return (0);
 3975                 else
 3976                         return (EPERM);
 3977 
 3978                 /*
 3979                  * Depending on the global setting, allow privilege of
 3980                  * mounting/unmounting file systems.
 3981                  */
 3982         case PRIV_VFS_MOUNT:
 3983         case PRIV_VFS_UNMOUNT:
 3984         case PRIV_VFS_MOUNT_NONUSER:
 3985         case PRIV_VFS_MOUNT_OWNER:
 3986                 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT &&
 3987                     cred->cr_prison->pr_enforce_statfs < 2)
 3988                         return (0);
 3989                 else
 3990                         return (EPERM);
 3991 
 3992                 /*
 3993                  * Allow jailed root to bind reserved ports and reuse in-use
 3994                  * ports.
 3995                  */
 3996         case PRIV_NETINET_RESERVEDPORT:
 3997         case PRIV_NETINET_REUSEPORT:
 3998                 return (0);
 3999 
 4000                 /*
 4001                  * Allow jailed root to set certian IPv4/6 (option) headers.
 4002                  */
 4003         case PRIV_NETINET_SETHDROPTS:
 4004                 return (0);
 4005 
 4006                 /*
 4007                  * Conditionally allow creating raw sockets in jail.
 4008                  */
 4009         case PRIV_NETINET_RAW:
 4010                 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
 4011                         return (0);
 4012                 else
 4013                         return (EPERM);
 4014 
 4015                 /*
 4016                  * Since jail implements its own visibility limits on netstat
 4017                  * sysctls, allow getcred.  This allows identd to work in
 4018                  * jail.
 4019                  */
 4020         case PRIV_NETINET_GETCRED:
 4021                 return (0);
 4022 
 4023                 /*
 4024                  * Allow jailed root to set loginclass.
 4025                  */
 4026         case PRIV_PROC_SETLOGINCLASS:
 4027                 return (0);
 4028 
 4029         default:
 4030                 /*
 4031                  * In all remaining cases, deny the privilege request.  This
 4032                  * includes almost all network privileges, many system
 4033                  * configuration privileges.
 4034                  */
 4035                 return (EPERM);
 4036         }
 4037 }
 4038 
 4039 /*
 4040  * Return the part of pr2's name that is relative to pr1, or the whole name
 4041  * if it does not directly follow.
 4042  */
 4043 
 4044 char *
 4045 prison_name(struct prison *pr1, struct prison *pr2)
 4046 {
 4047         char *name;
 4048 
 4049         /* Jails see themselves as "" (if they see themselves at all). */
 4050         if (pr1 == pr2)
 4051                 return "";
 4052         name = pr2->pr_name;
 4053         if (prison_ischild(pr1, pr2)) {
 4054                 /*
 4055                  * pr1 isn't locked (and allprison_lock may not be either)
 4056                  * so its length can't be counted on.  But the number of dots
 4057                  * can be counted on - and counted.
 4058                  */
 4059                 for (; pr1 != &prison0; pr1 = pr1->pr_parent)
 4060                         name = strchr(name, '.') + 1;
 4061         }
 4062         return (name);
 4063 }
 4064 
 4065 /*
 4066  * Return the part of pr2's path that is relative to pr1, or the whole path
 4067  * if it does not directly follow.
 4068  */
 4069 static char *
 4070 prison_path(struct prison *pr1, struct prison *pr2)
 4071 {
 4072         char *path1, *path2;
 4073         int len1;
 4074 
 4075         path1 = pr1->pr_path;
 4076         path2 = pr2->pr_path;
 4077         if (!strcmp(path1, "/"))
 4078                 return (path2);
 4079         len1 = strlen(path1);
 4080         if (strncmp(path1, path2, len1))
 4081                 return (path2);
 4082         if (path2[len1] == '\0')
 4083                 return "/";
 4084         if (path2[len1] == '/')
 4085                 return (path2 + len1);
 4086         return (path2);
 4087 }
 4088 
 4089 
 4090 /*
 4091  * Jail-related sysctls.
 4092  */
 4093 static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
 4094     "Jails");
 4095 
 4096 static int
 4097 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 4098 {
 4099         struct xprison *xp;
 4100         struct prison *pr, *cpr;
 4101 #ifdef INET
 4102         struct in_addr *ip4 = NULL;
 4103         int ip4s = 0;
 4104 #endif
 4105 #ifdef INET6
 4106         struct in6_addr *ip6 = NULL;
 4107         int ip6s = 0;
 4108 #endif
 4109         int descend, error;
 4110 
 4111         xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
 4112         pr = req->td->td_ucred->cr_prison;
 4113         error = 0;
 4114         sx_slock(&allprison_lock);
 4115         FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 4116 #if defined(INET) || defined(INET6)
 4117  again:
 4118 #endif
 4119                 mtx_lock(&cpr->pr_mtx);
 4120 #ifdef INET
 4121                 if (cpr->pr_ip4s > 0) {
 4122                         if (ip4s < cpr->pr_ip4s) {
 4123                                 ip4s = cpr->pr_ip4s;
 4124                                 mtx_unlock(&cpr->pr_mtx);
 4125                                 ip4 = realloc(ip4, ip4s *
 4126                                     sizeof(struct in_addr), M_TEMP, M_WAITOK);
 4127                                 goto again;
 4128                         }
 4129                         bcopy(cpr->pr_ip4, ip4,
 4130                             cpr->pr_ip4s * sizeof(struct in_addr));
 4131                 }
 4132 #endif
 4133 #ifdef INET6
 4134                 if (cpr->pr_ip6s > 0) {
 4135                         if (ip6s < cpr->pr_ip6s) {
 4136                                 ip6s = cpr->pr_ip6s;
 4137                                 mtx_unlock(&cpr->pr_mtx);
 4138                                 ip6 = realloc(ip6, ip6s *
 4139                                     sizeof(struct in6_addr), M_TEMP, M_WAITOK);
 4140                                 goto again;
 4141                         }
 4142                         bcopy(cpr->pr_ip6, ip6,
 4143                             cpr->pr_ip6s * sizeof(struct in6_addr));
 4144                 }
 4145 #endif
 4146                 if (cpr->pr_ref == 0) {
 4147                         mtx_unlock(&cpr->pr_mtx);
 4148                         continue;
 4149                 }
 4150                 bzero(xp, sizeof(*xp));
 4151                 xp->pr_version = XPRISON_VERSION;
 4152                 xp->pr_id = cpr->pr_id;
 4153                 xp->pr_state = cpr->pr_uref > 0
 4154                     ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
 4155                 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
 4156                 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
 4157                 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
 4158 #ifdef INET
 4159                 xp->pr_ip4s = cpr->pr_ip4s;
 4160 #endif
 4161 #ifdef INET6
 4162                 xp->pr_ip6s = cpr->pr_ip6s;
 4163 #endif
 4164                 mtx_unlock(&cpr->pr_mtx);
 4165                 error = SYSCTL_OUT(req, xp, sizeof(*xp));
 4166                 if (error)
 4167                         break;
 4168 #ifdef INET
 4169                 if (xp->pr_ip4s > 0) {
 4170                         error = SYSCTL_OUT(req, ip4,
 4171                             xp->pr_ip4s * sizeof(struct in_addr));
 4172                         if (error)
 4173                                 break;
 4174                 }
 4175 #endif
 4176 #ifdef INET6
 4177                 if (xp->pr_ip6s > 0) {
 4178                         error = SYSCTL_OUT(req, ip6,
 4179                             xp->pr_ip6s * sizeof(struct in6_addr));
 4180                         if (error)
 4181                                 break;
 4182                 }
 4183 #endif
 4184         }
 4185         sx_sunlock(&allprison_lock);
 4186         free(xp, M_TEMP);
 4187 #ifdef INET
 4188         free(ip4, M_TEMP);
 4189 #endif
 4190 #ifdef INET6
 4191         free(ip6, M_TEMP);
 4192 #endif
 4193         return (error);
 4194 }
 4195 
 4196 SYSCTL_OID(_security_jail, OID_AUTO, list,
 4197     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 4198     sysctl_jail_list, "S", "List of active jails");
 4199 
 4200 static int
 4201 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
 4202 {
 4203         int error, injail;
 4204 
 4205         injail = jailed(req->td->td_ucred);
 4206         error = SYSCTL_OUT(req, &injail, sizeof(injail));
 4207 
 4208         return (error);
 4209 }
 4210 
 4211 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
 4212     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 4213     sysctl_jail_jailed, "I", "Process in jail?");
 4214 
 4215 static int
 4216 sysctl_jail_vnet(SYSCTL_HANDLER_ARGS)
 4217 {
 4218         int error, havevnet;
 4219 #ifdef VIMAGE
 4220         struct ucred *cred = req->td->td_ucred;
 4221 
 4222         havevnet = jailed(cred) && prison_owns_vnet(cred);
 4223 #else
 4224         havevnet = 0;
 4225 #endif
 4226         error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet));
 4227 
 4228         return (error);
 4229 }
 4230 
 4231 SYSCTL_PROC(_security_jail, OID_AUTO, vnet,
 4232     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 4233     sysctl_jail_vnet, "I", "Jail owns VNET?");
 4234 
 4235 #if defined(INET) || defined(INET6)
 4236 SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
 4237     &jail_max_af_ips, 0,
 4238     "Number of IP addresses a jail may have at most per address family");
 4239 #endif
 4240 
 4241 /*
 4242  * Default parameters for jail(2) compatability.  For historical reasons,
 4243  * the sysctl names have varying similarity to the parameter names.  Prisons
 4244  * just see their own parameters, and can't change them.
 4245  */
 4246 static int
 4247 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
 4248 {
 4249         struct prison *pr;
 4250         int allow, error, i;
 4251 
 4252         pr = req->td->td_ucred->cr_prison;
 4253         allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow;
 4254 
 4255         /* Get the current flag value, and convert it to a boolean. */
 4256         i = (allow & arg2) ? 1 : 0;
 4257         if (arg1 != NULL)
 4258                 i = !i;
 4259         error = sysctl_handle_int(oidp, &i, 0, req);
 4260         if (error || !req->newptr)
 4261                 return (error);
 4262         i = i ? arg2 : 0;
 4263         if (arg1 != NULL)
 4264                 i ^= arg2;
 4265         /*
 4266          * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
 4267          * for writing.
 4268          */
 4269         mtx_lock(&prison0.pr_mtx);
 4270         jail_default_allow = (jail_default_allow & ~arg2) | i;
 4271         mtx_unlock(&prison0.pr_mtx);
 4272         return (0);
 4273 }
 4274 
 4275 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
 4276     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4277     NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
 4278     "Processes in jail can set their hostnames");
 4279 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
 4280     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4281     (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
 4282     "Processes in jail are limited to creating UNIX/IP/route sockets only");
 4283 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
 4284     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4285     NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
 4286     "Processes in jail can use System V IPC primitives");
 4287 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
 4288     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4289     NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
 4290     "Prison root can create raw sockets");
 4291 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
 4292     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4293     NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
 4294     "Processes in jail can alter system file flags");
 4295 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
 4296     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4297     NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
 4298     "Processes in jail can mount/unmount jail-friendly file systems");
 4299 SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed,
 4300     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4301     NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I",
 4302     "Processes in jail can mount the devfs file system");
 4303 SYSCTL_PROC(_security_jail, OID_AUTO, mount_fdescfs_allowed,
 4304     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4305     NULL, PR_ALLOW_MOUNT_FDESCFS, sysctl_jail_default_allow, "I",
 4306     "Processes in jail can mount the fdescfs file system");
 4307 SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed,
 4308     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4309     NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I",
 4310     "Processes in jail can mount the nullfs file system");
 4311 SYSCTL_PROC(_security_jail, OID_AUTO, mount_procfs_allowed,
 4312     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4313     NULL, PR_ALLOW_MOUNT_PROCFS, sysctl_jail_default_allow, "I",
 4314     "Processes in jail can mount the procfs file system");
 4315 SYSCTL_PROC(_security_jail, OID_AUTO, mount_tmpfs_allowed,
 4316     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4317     NULL, PR_ALLOW_MOUNT_TMPFS, sysctl_jail_default_allow, "I",
 4318     "Processes in jail can mount the tmpfs file system");
 4319 SYSCTL_PROC(_security_jail, OID_AUTO, mount_zfs_allowed,
 4320     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4321     NULL, PR_ALLOW_MOUNT_ZFS, sysctl_jail_default_allow, "I",
 4322     "Processes in jail can mount the zfs file system");
 4323 
 4324 static int
 4325 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
 4326 {
 4327         struct prison *pr;
 4328         int level, error;
 4329 
 4330         pr = req->td->td_ucred->cr_prison;
 4331         level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
 4332         error = sysctl_handle_int(oidp, &level, 0, req);
 4333         if (error || !req->newptr)
 4334                 return (error);
 4335         *(int *)arg1 = level;
 4336         return (0);
 4337 }
 4338 
 4339 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
 4340     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4341     &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
 4342     sysctl_jail_default_level, "I",
 4343     "Processes in jail cannot see all mounted file systems");
 4344 
 4345 SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset,
 4346     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 4347     &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum),
 4348     sysctl_jail_default_level, "I",
 4349     "Ruleset for the devfs filesystem in jail");
 4350 
 4351 /*
 4352  * Nodes to describe jail parameters.  Maximum length of string parameters
 4353  * is returned in the string itself, and the other parameters exist merely
 4354  * to make themselves and their types known.
 4355  */
 4356 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0,
 4357     "Jail parameters");
 4358 
 4359 int
 4360 sysctl_jail_param(SYSCTL_HANDLER_ARGS)
 4361 {
 4362         int i;
 4363         long l;
 4364         size_t s;
 4365         char numbuf[12];
 4366 
 4367         switch (oidp->oid_kind & CTLTYPE)
 4368         {
 4369         case CTLTYPE_LONG:
 4370         case CTLTYPE_ULONG:
 4371                 l = 0;
 4372 #ifdef SCTL_MASK32
 4373                 if (!(req->flags & SCTL_MASK32))
 4374 #endif
 4375                         return (SYSCTL_OUT(req, &l, sizeof(l)));
 4376         case CTLTYPE_INT:
 4377         case CTLTYPE_UINT:
 4378                 i = 0;
 4379                 return (SYSCTL_OUT(req, &i, sizeof(i)));
 4380         case CTLTYPE_STRING:
 4381                 snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2);
 4382                 return
 4383                     (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
 4384         case CTLTYPE_STRUCT:
 4385                 s = (size_t)arg2;
 4386                 return (SYSCTL_OUT(req, &s, sizeof(s)));
 4387         }
 4388         return (0);
 4389 }
 4390 
 4391 /*
 4392  * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at
 4393  * jail creation time but cannot be changed in an existing jail.
 4394  */
 4395 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
 4396 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
 4397 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
 4398 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
 4399 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
 4400     "I", "Jail secure level");
 4401 SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", 
 4402     "Jail value for kern.osreldate and uname -K");
 4403 SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, 
 4404     "Jail value for kern.osrelease and uname -r");
 4405 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
 4406     "I", "Jail cannot see all mounted file systems");
 4407 SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW,
 4408     "I", "Ruleset for in-jail devfs mounts");
 4409 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
 4410     "B", "Jail persistence");
 4411 #ifdef VIMAGE
 4412 SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
 4413     "E,jailsys", "Virtual network stack");
 4414 #endif
 4415 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
 4416     "B", "Jail is in the process of shutting down");
 4417 
 4418 SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
 4419 SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
 4420     "I", "Current number of child jails");
 4421 SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
 4422     "I", "Maximum number of child jails");
 4423 
 4424 SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
 4425 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
 4426     "Jail hostname");
 4427 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
 4428     "Jail NIS domainname");
 4429 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
 4430     "Jail host UUID");
 4431 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
 4432     "LU", "Jail host ID");
 4433 
 4434 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
 4435 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
 4436 
 4437 #ifdef INET
 4438 SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
 4439     "Jail IPv4 address virtualization");
 4440 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
 4441     "S,in_addr,a", "Jail IPv4 addresses");
 4442 SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 4443     "B", "Do (not) use IPv4 source address selection rather than the "
 4444     "primary jail IPv4 address.");
 4445 #endif
 4446 #ifdef INET6
 4447 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
 4448     "Jail IPv6 address virtualization");
 4449 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
 4450     "S,in6_addr,a", "Jail IPv6 addresses");
 4451 SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 4452     "B", "Do (not) use IPv6 source address selection rather than the "
 4453     "primary jail IPv6 address.");
 4454 #endif
 4455 
 4456 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
 4457 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
 4458     "B", "Jail may set hostname");
 4459 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
 4460     "B", "Jail may use SYSV IPC");
 4461 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
 4462     "B", "Jail may create raw sockets");
 4463 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
 4464     "B", "Jail may alter system file flags");
 4465 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
 4466     "B", "Jail may set file quotas");
 4467 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
 4468     "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
 4469 
 4470 SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags");
 4471 SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW,
 4472     "B", "Jail may mount/unmount jail-friendly file systems in general");
 4473 SYSCTL_JAIL_PARAM(_allow_mount, devfs, CTLTYPE_INT | CTLFLAG_RW,
 4474     "B", "Jail may mount the devfs file system");
 4475 SYSCTL_JAIL_PARAM(_allow_mount, fdescfs, CTLTYPE_INT | CTLFLAG_RW,
 4476     "B", "Jail may mount the fdescfs file system");
 4477 SYSCTL_JAIL_PARAM(_allow_mount, nullfs, CTLTYPE_INT | CTLFLAG_RW,
 4478     "B", "Jail may mount the nullfs file system");
 4479 SYSCTL_JAIL_PARAM(_allow_mount, procfs, CTLTYPE_INT | CTLFLAG_RW,
 4480     "B", "Jail may mount the procfs file system");
 4481 SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW,
 4482     "B", "Jail may mount the tmpfs file system");
 4483 SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW,
 4484     "B", "Jail may mount the zfs file system");
 4485 
 4486 #ifdef RACCT
 4487 void
 4488 prison_racct_foreach(void (*callback)(struct racct *racct,
 4489     void *arg2, void *arg3), void *arg2, void *arg3)
 4490 {
 4491         struct prison_racct *prr;
 4492 
 4493         ASSERT_RACCT_ENABLED();
 4494 
 4495         sx_slock(&allprison_lock);
 4496         LIST_FOREACH(prr, &allprison_racct, prr_next)
 4497                 (callback)(prr->prr_racct, arg2, arg3);
 4498         sx_sunlock(&allprison_lock);
 4499 }
 4500 
 4501 static struct prison_racct *
 4502 prison_racct_find_locked(const char *name)
 4503 {
 4504         struct prison_racct *prr;
 4505 
 4506         ASSERT_RACCT_ENABLED();
 4507         sx_assert(&allprison_lock, SA_XLOCKED);
 4508 
 4509         if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN)
 4510                 return (NULL);
 4511 
 4512         LIST_FOREACH(prr, &allprison_racct, prr_next) {
 4513                 if (strcmp(name, prr->prr_name) != 0)
 4514                         continue;
 4515 
 4516                 /* Found prison_racct with a matching name? */
 4517                 prison_racct_hold(prr);
 4518                 return (prr);
 4519         }
 4520 
 4521         /* Add new prison_racct. */
 4522         prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK);
 4523         racct_create(&prr->prr_racct);
 4524 
 4525         strcpy(prr->prr_name, name);
 4526         refcount_init(&prr->prr_refcount, 1);
 4527         LIST_INSERT_HEAD(&allprison_racct, prr, prr_next);
 4528 
 4529         return (prr);
 4530 }
 4531 
 4532 struct prison_racct *
 4533 prison_racct_find(const char *name)
 4534 {
 4535         struct prison_racct *prr;
 4536 
 4537         ASSERT_RACCT_ENABLED();
 4538 
 4539         sx_xlock(&allprison_lock);
 4540         prr = prison_racct_find_locked(name);
 4541         sx_xunlock(&allprison_lock);
 4542         return (prr);
 4543 }
 4544 
 4545 void
 4546 prison_racct_hold(struct prison_racct *prr)
 4547 {
 4548 
 4549         ASSERT_RACCT_ENABLED();
 4550 
 4551         refcount_acquire(&prr->prr_refcount);
 4552 }
 4553 
 4554 static void
 4555 prison_racct_free_locked(struct prison_racct *prr)
 4556 {
 4557 
 4558         ASSERT_RACCT_ENABLED();
 4559         sx_assert(&allprison_lock, SA_XLOCKED);
 4560 
 4561         if (refcount_release(&prr->prr_refcount)) {
 4562                 racct_destroy(&prr->prr_racct);
 4563                 LIST_REMOVE(prr, prr_next);
 4564                 free(prr, M_PRISON_RACCT);
 4565         }
 4566 }
 4567 
 4568 void
 4569 prison_racct_free(struct prison_racct *prr)
 4570 {
 4571         int old;
 4572 
 4573         ASSERT_RACCT_ENABLED();
 4574         sx_assert(&allprison_lock, SA_UNLOCKED);
 4575 
 4576         old = prr->prr_refcount;
 4577         if (old > 1 && atomic_cmpset_int(&prr->prr_refcount, old, old - 1))
 4578                 return;
 4579 
 4580         sx_xlock(&allprison_lock);
 4581         prison_racct_free_locked(prr);
 4582         sx_xunlock(&allprison_lock);
 4583 }
 4584 
 4585 static void
 4586 prison_racct_attach(struct prison *pr)
 4587 {
 4588         struct prison_racct *prr;
 4589 
 4590         ASSERT_RACCT_ENABLED();
 4591         sx_assert(&allprison_lock, SA_XLOCKED);
 4592 
 4593         prr = prison_racct_find_locked(pr->pr_name);
 4594         KASSERT(prr != NULL, ("cannot find prison_racct"));
 4595 
 4596         pr->pr_prison_racct = prr;
 4597 }
 4598 
 4599 /*
 4600  * Handle jail renaming.  From the racct point of view, renaming means
 4601  * moving from one prison_racct to another.
 4602  */
 4603 static void
 4604 prison_racct_modify(struct prison *pr)
 4605 {
 4606         struct proc *p;
 4607         struct ucred *cred;
 4608         struct prison_racct *oldprr;
 4609 
 4610         ASSERT_RACCT_ENABLED();
 4611 
 4612         sx_slock(&allproc_lock);
 4613         sx_xlock(&allprison_lock);
 4614 
 4615         if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) {
 4616                 sx_xunlock(&allprison_lock);
 4617                 sx_sunlock(&allproc_lock);
 4618                 return;
 4619         }
 4620 
 4621         oldprr = pr->pr_prison_racct;
 4622         pr->pr_prison_racct = NULL;
 4623 
 4624         prison_racct_attach(pr);
 4625 
 4626         /*
 4627          * Move resource utilisation records.
 4628          */
 4629         racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct);
 4630 
 4631         /*
 4632          * Force rctl to reattach rules to processes.
 4633          */
 4634         FOREACH_PROC_IN_SYSTEM(p) {
 4635                 PROC_LOCK(p);
 4636                 cred = crhold(p->p_ucred);
 4637                 PROC_UNLOCK(p);
 4638                 racct_proc_ucred_changed(p, cred, cred);
 4639                 crfree(cred);
 4640         }
 4641 
 4642         sx_sunlock(&allproc_lock);
 4643         prison_racct_free_locked(oldprr);
 4644         sx_xunlock(&allprison_lock);
 4645 }
 4646 
 4647 static void
 4648 prison_racct_detach(struct prison *pr)
 4649 {
 4650 
 4651         ASSERT_RACCT_ENABLED();
 4652         sx_assert(&allprison_lock, SA_UNLOCKED);
 4653 
 4654         if (pr->pr_prison_racct == NULL)
 4655                 return;
 4656         prison_racct_free(pr->pr_prison_racct);
 4657         pr->pr_prison_racct = NULL;
 4658 }
 4659 #endif /* RACCT */
 4660 
 4661 #ifdef DDB
 4662 
 4663 static void
 4664 db_show_prison(struct prison *pr)
 4665 {
 4666         int fi;
 4667 #if defined(INET) || defined(INET6)
 4668         int ii;
 4669 #endif
 4670         unsigned jsf;
 4671 #ifdef INET6
 4672         char ip6buf[INET6_ADDRSTRLEN];
 4673 #endif
 4674 
 4675         db_printf("prison %p:\n", pr);
 4676         db_printf(" jid             = %d\n", pr->pr_id);
 4677         db_printf(" name            = %s\n", pr->pr_name);
 4678         db_printf(" parent          = %p\n", pr->pr_parent);
 4679         db_printf(" ref             = %d\n", pr->pr_ref);
 4680         db_printf(" uref            = %d\n", pr->pr_uref);
 4681         db_printf(" path            = %s\n", pr->pr_path);
 4682         db_printf(" cpuset          = %d\n", pr->pr_cpuset
 4683             ? pr->pr_cpuset->cs_id : -1);
 4684 #ifdef VIMAGE
 4685         db_printf(" vnet            = %p\n", pr->pr_vnet);
 4686 #endif
 4687         db_printf(" root            = %p\n", pr->pr_root);
 4688         db_printf(" securelevel     = %d\n", pr->pr_securelevel);
 4689         db_printf(" devfs_rsnum     = %d\n", pr->pr_devfs_rsnum);
 4690         db_printf(" children.max    = %d\n", pr->pr_childmax);
 4691         db_printf(" children.cur    = %d\n", pr->pr_childcount);
 4692         db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
 4693         db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
 4694         db_printf(" flags           = 0x%x", pr->pr_flags);
 4695         for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
 4696             fi++)
 4697                 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi)))
 4698                         db_printf(" %s", pr_flag_names[fi]);
 4699         for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
 4700             fi++) {
 4701                 jsf = pr->pr_flags &
 4702                     (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 4703                 db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name,
 4704                     pr_flag_jailsys[fi].disable && 
 4705                       (jsf == pr_flag_jailsys[fi].disable) ? "disable"
 4706                     : (jsf == pr_flag_jailsys[fi].new) ? "new"
 4707                     : "inherit");
 4708         }
 4709         db_printf(" allow           = 0x%x", pr->pr_allow);
 4710         for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
 4711             fi++)
 4712                 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi)))
 4713                         db_printf(" %s", pr_allow_names[fi]);
 4714         db_printf("\n");
 4715         db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
 4716         db_printf(" host.hostname   = %s\n", pr->pr_hostname);
 4717         db_printf(" host.domainname = %s\n", pr->pr_domainname);
 4718         db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
 4719         db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
 4720 #ifdef INET
 4721         db_printf(" ip4s            = %d\n", pr->pr_ip4s);
 4722         for (ii = 0; ii < pr->pr_ip4s; ii++)
 4723                 db_printf(" %s %s\n",
 4724                     ii == 0 ? "ip4.addr        =" : "                 ",
 4725                     inet_ntoa(pr->pr_ip4[ii]));
 4726 #endif
 4727 #ifdef INET6
 4728         db_printf(" ip6s            = %d\n", pr->pr_ip6s);
 4729         for (ii = 0; ii < pr->pr_ip6s; ii++)
 4730                 db_printf(" %s %s\n",
 4731                     ii == 0 ? "ip6.addr        =" : "                 ",
 4732                     ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
 4733 #endif
 4734 }
 4735 
 4736 DB_SHOW_COMMAND(prison, db_show_prison_command)
 4737 {
 4738         struct prison *pr;
 4739 
 4740         if (!have_addr) {
 4741                 /*
 4742                  * Show all prisons in the list, and prison0 which is not
 4743                  * listed.
 4744                  */
 4745                 db_show_prison(&prison0);
 4746                 if (!db_pager_quit) {
 4747                         TAILQ_FOREACH(pr, &allprison, pr_list) {
 4748                                 db_show_prison(pr);
 4749                                 if (db_pager_quit)
 4750                                         break;
 4751                         }
 4752                 }
 4753                 return;
 4754         }
 4755 
 4756         if (addr == 0)
 4757                 pr = &prison0;
 4758         else {
 4759                 /* Look for a prison with the ID and with references. */
 4760                 TAILQ_FOREACH(pr, &allprison, pr_list)
 4761                         if (pr->pr_id == addr && pr->pr_ref > 0)
 4762                                 break;
 4763                 if (pr == NULL)
 4764                         /* Look again, without requiring a reference. */
 4765                         TAILQ_FOREACH(pr, &allprison, pr_list)
 4766                                 if (pr->pr_id == addr)
 4767                                         break;
 4768                 if (pr == NULL)
 4769                         /* Assume address points to a valid prison. */
 4770                         pr = (struct prison *)addr;
 4771         }
 4772         db_show_prison(pr);
 4773 }
 4774 
 4775 #endif /* DDB */

Cache object: 6e0acbd07a3e3659193bcff832f734fd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.