The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999 Poul-Henning Kamp.
    3  * Copyright (c) 2008 Bjoern A. Zeeb.
    4  * Copyright (c) 2009 James Gritton.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD: releng/8.1/sys/kern/kern_jail.c 206336 2010-04-07 02:24:41Z nwhitehorn $");
   31 
   32 #include "opt_compat.h"
   33 #include "opt_ddb.h"
   34 #include "opt_inet.h"
   35 #include "opt_inet6.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/types.h>
   39 #include <sys/kernel.h>
   40 #include <sys/systm.h>
   41 #include <sys/errno.h>
   42 #include <sys/sysproto.h>
   43 #include <sys/malloc.h>
   44 #include <sys/osd.h>
   45 #include <sys/priv.h>
   46 #include <sys/proc.h>
   47 #include <sys/taskqueue.h>
   48 #include <sys/fcntl.h>
   49 #include <sys/jail.h>
   50 #include <sys/lock.h>
   51 #include <sys/mutex.h>
   52 #include <sys/sx.h>
   53 #include <sys/sysent.h>
   54 #include <sys/namei.h>
   55 #include <sys/mount.h>
   56 #include <sys/queue.h>
   57 #include <sys/socket.h>
   58 #include <sys/syscallsubr.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/vnode.h>
   61 
   62 #include <net/if.h>
   63 #include <net/vnet.h>
   64 
   65 #include <netinet/in.h>
   66 
   67 #ifdef DDB
   68 #include <ddb/ddb.h>
   69 #ifdef INET6
   70 #include <netinet6/in6_var.h>
   71 #endif /* INET6 */
   72 #endif /* DDB */
   73 
   74 #include <security/mac/mac_framework.h>
   75 
   76 #define DEFAULT_HOSTUUID        "00000000-0000-0000-0000-000000000000"
   77 
   78 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
   79 
   80 /* Keep struct prison prison0 and some code in kern_jail_set() readable. */
   81 #ifdef INET
   82 #ifdef INET6
   83 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL
   84 #else
   85 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL
   86 #endif
   87 #else /* !INET */
   88 #ifdef INET6
   89 #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL
   90 #else
   91 #define _PR_IP_SADDRSEL 0
   92 #endif
   93 #endif
   94 
   95 /* prison0 describes what is "real" about the system. */
   96 struct prison prison0 = {
   97         .pr_id          = 0,
   98         .pr_name        = "",
   99         .pr_ref         = 1,
  100         .pr_uref        = 1,
  101         .pr_path        = "/",
  102         .pr_securelevel = -1,
  103         .pr_childmax    = JAIL_MAX,
  104         .pr_hostuuid    = DEFAULT_HOSTUUID,
  105         .pr_children    = LIST_HEAD_INITIALIZER(prison0.pr_children),
  106 #ifdef VIMAGE
  107         .pr_flags       = PR_HOST|PR_VNET|_PR_IP_SADDRSEL,
  108 #else
  109         .pr_flags       = PR_HOST|_PR_IP_SADDRSEL,
  110 #endif
  111         .pr_allow       = PR_ALLOW_ALL,
  112 };
  113 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
  114 
  115 /* allprison and lastprid are protected by allprison_lock. */
  116 struct  sx allprison_lock;
  117 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
  118 struct  prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
  119 int     lastprid = 0;
  120 
  121 static int do_jail_attach(struct thread *td, struct prison *pr);
  122 static void prison_complete(void *context, int pending);
  123 static void prison_deref(struct prison *pr, int flags);
  124 static char *prison_path(struct prison *pr1, struct prison *pr2);
  125 static void prison_remove_one(struct prison *pr);
  126 #ifdef INET
  127 static int _prison_check_ip4(struct prison *pr, struct in_addr *ia);
  128 static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4);
  129 #endif
  130 #ifdef INET6
  131 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6);
  132 static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6);
  133 #endif
  134 
  135 /* Flags for prison_deref */
  136 #define PD_DEREF        0x01
  137 #define PD_DEUREF       0x02
  138 #define PD_LOCKED       0x04
  139 #define PD_LIST_SLOCKED 0x08
  140 #define PD_LIST_XLOCKED 0x10
  141 
  142 /*
  143  * Parameter names corresponding to PR_* flag values
  144  */
  145 static char *pr_flag_names[] = {
  146         [0] = "persist",
  147 #ifdef INET
  148         [7] = "ip4.saddrsel",
  149 #endif
  150 #ifdef INET6
  151         [8] = "ip6.saddrsel",
  152 #endif
  153 };
  154 
  155 static char *pr_flag_nonames[] = {
  156         [0] = "nopersist",
  157 #ifdef INET
  158         [7] = "ip4.nosaddrsel",
  159 #endif
  160 #ifdef INET6
  161         [8] = "ip6.nosaddrsel",
  162 #endif
  163 };
  164 
  165 struct jailsys_flags {
  166         const char      *name;
  167         unsigned         disable;
  168         unsigned         new;
  169 } pr_flag_jailsys[] = {
  170         { "host", 0, PR_HOST },
  171 #ifdef VIMAGE
  172         { "vnet", 0, PR_VNET },
  173 #endif
  174 #ifdef INET
  175         { "ip4", PR_IP4_USER | PR_IP4_DISABLE, PR_IP4_USER },
  176 #endif
  177 #ifdef INET6
  178         { "ip6", PR_IP6_USER | PR_IP6_DISABLE, PR_IP6_USER },
  179 #endif
  180 };
  181 
  182 static char *pr_allow_names[] = {
  183         "allow.set_hostname",
  184         "allow.sysvipc",
  185         "allow.raw_sockets",
  186         "allow.chflags",
  187         "allow.mount",
  188         "allow.quotas",
  189         "allow.socket_af",
  190 };
  191 
  192 static char *pr_allow_nonames[] = {
  193         "allow.noset_hostname",
  194         "allow.nosysvipc",
  195         "allow.noraw_sockets",
  196         "allow.nochflags",
  197         "allow.nomount",
  198         "allow.noquotas",
  199         "allow.nosocket_af",
  200 };
  201 
  202 #define JAIL_DEFAULT_ALLOW              PR_ALLOW_SET_HOSTNAME
  203 #define JAIL_DEFAULT_ENFORCE_STATFS     2
  204 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
  205 static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
  206 #if defined(INET) || defined(INET6)
  207 static unsigned jail_max_af_ips = 255;
  208 #endif
  209 
  210 #ifdef INET
  211 static int
  212 qcmp_v4(const void *ip1, const void *ip2)
  213 {
  214         in_addr_t iaa, iab;
  215 
  216         /*
  217          * We need to compare in HBO here to get the list sorted as expected
  218          * by the result of the code.  Sorting NBO addresses gives you
  219          * interesting results.  If you do not understand, do not try.
  220          */
  221         iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
  222         iab = ntohl(((const struct in_addr *)ip2)->s_addr);
  223 
  224         /*
  225          * Do not simply return the difference of the two numbers, the int is
  226          * not wide enough.
  227          */
  228         if (iaa > iab)
  229                 return (1);
  230         else if (iaa < iab)
  231                 return (-1);
  232         else
  233                 return (0);
  234 }
  235 #endif
  236 
  237 #ifdef INET6
  238 static int
  239 qcmp_v6(const void *ip1, const void *ip2)
  240 {
  241         const struct in6_addr *ia6a, *ia6b;
  242         int i, rc;
  243 
  244         ia6a = (const struct in6_addr *)ip1;
  245         ia6b = (const struct in6_addr *)ip2;
  246 
  247         rc = 0;
  248         for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) {
  249                 if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
  250                         rc = 1;
  251                 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
  252                         rc = -1;
  253         }
  254         return (rc);
  255 }
  256 #endif
  257 
  258 /*
  259  * struct jail_args {
  260  *      struct jail *jail;
  261  * };
  262  */
  263 int
  264 jail(struct thread *td, struct jail_args *uap)
  265 {
  266         uint32_t version;
  267         int error;
  268         struct jail j;
  269 
  270         error = copyin(uap->jail, &version, sizeof(uint32_t));
  271         if (error)
  272                 return (error);
  273 
  274         switch (version) {
  275         case 0:
  276         {
  277                 struct jail_v0 j0;
  278 
  279                 /* FreeBSD single IPv4 jails. */
  280                 bzero(&j, sizeof(struct jail));
  281                 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
  282                 if (error)
  283                         return (error);
  284                 j.version = j0.version;
  285                 j.path = j0.path;
  286                 j.hostname = j0.hostname;
  287                 j.ip4s = j0.ip_number;
  288                 break;
  289         }
  290 
  291         case 1:
  292                 /*
  293                  * Version 1 was used by multi-IPv4 jail implementations
  294                  * that never made it into the official kernel.
  295                  */
  296                 return (EINVAL);
  297 
  298         case 2: /* JAIL_API_VERSION */
  299                 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
  300                 error = copyin(uap->jail, &j, sizeof(struct jail));
  301                 if (error)
  302                         return (error);
  303                 break;
  304 
  305         default:
  306                 /* Sci-Fi jails are not supported, sorry. */
  307                 return (EINVAL);
  308         }
  309         return (kern_jail(td, &j));
  310 }
  311 
  312 int
  313 kern_jail(struct thread *td, struct jail *j)
  314 {
  315         struct iovec optiov[2 * (4
  316                             + sizeof(pr_allow_names) / sizeof(pr_allow_names[0])
  317 #ifdef INET
  318                             + 1
  319 #endif
  320 #ifdef INET6
  321                             + 1
  322 #endif
  323                             )];
  324         struct uio opt;
  325         char *u_path, *u_hostname, *u_name;
  326 #ifdef INET
  327         uint32_t ip4s;
  328         struct in_addr *u_ip4;
  329 #endif
  330 #ifdef INET6
  331         struct in6_addr *u_ip6;
  332 #endif
  333         size_t tmplen;
  334         int error, enforce_statfs, fi;
  335 
  336         bzero(&optiov, sizeof(optiov));
  337         opt.uio_iov = optiov;
  338         opt.uio_iovcnt = 0;
  339         opt.uio_offset = -1;
  340         opt.uio_resid = -1;
  341         opt.uio_segflg = UIO_SYSSPACE;
  342         opt.uio_rw = UIO_READ;
  343         opt.uio_td = td;
  344 
  345         /* Set permissions for top-level jails from sysctls. */
  346         if (!jailed(td->td_ucred)) {
  347                 for (fi = 0; fi < sizeof(pr_allow_names) /
  348                      sizeof(pr_allow_names[0]); fi++) {
  349                         optiov[opt.uio_iovcnt].iov_base =
  350                             (jail_default_allow & (1 << fi))
  351                             ? pr_allow_names[fi] : pr_allow_nonames[fi];
  352                         optiov[opt.uio_iovcnt].iov_len =
  353                             strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
  354                         opt.uio_iovcnt += 2;
  355                 }
  356                 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
  357                 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
  358                 opt.uio_iovcnt++;
  359                 enforce_statfs = jail_default_enforce_statfs;
  360                 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
  361                 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
  362                 opt.uio_iovcnt++;
  363         }
  364 
  365         tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
  366 #ifdef INET
  367         ip4s = (j->version == 0) ? 1 : j->ip4s;
  368         if (ip4s > jail_max_af_ips)
  369                 return (EINVAL);
  370         tmplen += ip4s * sizeof(struct in_addr);
  371 #else
  372         if (j->ip4s > 0)
  373                 return (EINVAL);
  374 #endif
  375 #ifdef INET6
  376         if (j->ip6s > jail_max_af_ips)
  377                 return (EINVAL);
  378         tmplen += j->ip6s * sizeof(struct in6_addr);
  379 #else
  380         if (j->ip6s > 0)
  381                 return (EINVAL);
  382 #endif
  383         u_path = malloc(tmplen, M_TEMP, M_WAITOK);
  384         u_hostname = u_path + MAXPATHLEN;
  385         u_name = u_hostname + MAXHOSTNAMELEN;
  386 #ifdef INET
  387         u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
  388 #endif
  389 #ifdef INET6
  390 #ifdef INET
  391         u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
  392 #else
  393         u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
  394 #endif
  395 #endif
  396         optiov[opt.uio_iovcnt].iov_base = "path";
  397         optiov[opt.uio_iovcnt].iov_len = sizeof("path");
  398         opt.uio_iovcnt++;
  399         optiov[opt.uio_iovcnt].iov_base = u_path;
  400         error = copyinstr(j->path, u_path, MAXPATHLEN,
  401             &optiov[opt.uio_iovcnt].iov_len);
  402         if (error) {
  403                 free(u_path, M_TEMP);
  404                 return (error);
  405         }
  406         opt.uio_iovcnt++;
  407         optiov[opt.uio_iovcnt].iov_base = "host.hostname";
  408         optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
  409         opt.uio_iovcnt++;
  410         optiov[opt.uio_iovcnt].iov_base = u_hostname;
  411         error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
  412             &optiov[opt.uio_iovcnt].iov_len);
  413         if (error) {
  414                 free(u_path, M_TEMP);
  415                 return (error);
  416         }
  417         opt.uio_iovcnt++;
  418         if (j->jailname != NULL) {
  419                 optiov[opt.uio_iovcnt].iov_base = "name";
  420                 optiov[opt.uio_iovcnt].iov_len = sizeof("name");
  421                 opt.uio_iovcnt++;
  422                 optiov[opt.uio_iovcnt].iov_base = u_name;
  423                 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
  424                     &optiov[opt.uio_iovcnt].iov_len);
  425                 if (error) {
  426                         free(u_path, M_TEMP);
  427                         return (error);
  428                 }
  429                 opt.uio_iovcnt++;
  430         }
  431 #ifdef INET
  432         optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
  433         optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
  434         opt.uio_iovcnt++;
  435         optiov[opt.uio_iovcnt].iov_base = u_ip4;
  436         optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
  437         if (j->version == 0)
  438                 u_ip4->s_addr = j->ip4s;
  439         else {
  440                 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
  441                 if (error) {
  442                         free(u_path, M_TEMP);
  443                         return (error);
  444                 }
  445         }
  446         opt.uio_iovcnt++;
  447 #endif
  448 #ifdef INET6
  449         optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
  450         optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
  451         opt.uio_iovcnt++;
  452         optiov[opt.uio_iovcnt].iov_base = u_ip6;
  453         optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
  454         error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
  455         if (error) {
  456                 free(u_path, M_TEMP);
  457                 return (error);
  458         }
  459         opt.uio_iovcnt++;
  460 #endif
  461         KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]),
  462             ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
  463         error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
  464         free(u_path, M_TEMP);
  465         return (error);
  466 }
  467 
  468 
  469 /*
  470  * struct jail_set_args {
  471  *      struct iovec *iovp;
  472  *      unsigned int iovcnt;
  473  *      int flags;
  474  * };
  475  */
  476 int
  477 jail_set(struct thread *td, struct jail_set_args *uap)
  478 {
  479         struct uio *auio;
  480         int error;
  481 
  482         /* Check that we have an even number of iovecs. */
  483         if (uap->iovcnt & 1)
  484                 return (EINVAL);
  485 
  486         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
  487         if (error)
  488                 return (error);
  489         error = kern_jail_set(td, auio, uap->flags);
  490         free(auio, M_IOV);
  491         return (error);
  492 }
  493 
  494 int
  495 kern_jail_set(struct thread *td, struct uio *optuio, int flags)
  496 {
  497         struct nameidata nd;
  498 #ifdef INET
  499         struct in_addr *ip4;
  500 #endif
  501 #ifdef INET6
  502         struct in6_addr *ip6;
  503 #endif
  504         struct vfsopt *opt;
  505         struct vfsoptlist *opts;
  506         struct prison *pr, *deadpr, *mypr, *ppr, *tpr;
  507         struct vnode *root;
  508         char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
  509 #if defined(INET) || defined(INET6)
  510         struct prison *tppr;
  511         void *op;
  512 #endif
  513         unsigned long hid;
  514         size_t namelen, onamelen;
  515         int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
  516         int gotchildmax, gotenforce, gothid, gotslevel;
  517         int fi, jid, jsys, len, level;
  518         int childmax, slevel, vfslocked;
  519 #if defined(INET) || defined(INET6)
  520         int ii, ij;
  521 #endif
  522 #ifdef INET
  523         int ip4s, redo_ip4;
  524 #endif
  525 #ifdef INET6
  526         int ip6s, redo_ip6;
  527 #endif
  528         unsigned pr_flags, ch_flags;
  529         unsigned pr_allow, ch_allow, tallow;
  530         char numbuf[12];
  531 
  532         error = priv_check(td, PRIV_JAIL_SET);
  533         if (!error && (flags & JAIL_ATTACH))
  534                 error = priv_check(td, PRIV_JAIL_ATTACH);
  535         if (error)
  536                 return (error);
  537         mypr = ppr = td->td_ucred->cr_prison;
  538         if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
  539                 return (EPERM);
  540         if (flags & ~JAIL_SET_MASK)
  541                 return (EINVAL);
  542 
  543         /*
  544          * Check all the parameters before committing to anything.  Not all
  545          * errors can be caught early, but we may as well try.  Also, this
  546          * takes care of some expensive stuff (path lookup) before getting
  547          * the allprison lock.
  548          *
  549          * XXX Jails are not filesystems, and jail parameters are not mount
  550          *     options.  But it makes more sense to re-use the vfsopt code
  551          *     than duplicate it under a different name.
  552          */
  553         error = vfs_buildopts(optuio, &opts);
  554         if (error)
  555                 return (error);
  556 #ifdef INET
  557         ip4 = NULL;
  558 #endif
  559 #ifdef INET6
  560         ip6 = NULL;
  561 #endif
  562 
  563         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
  564         if (error == ENOENT)
  565                 jid = 0;
  566         else if (error != 0)
  567                 goto done_free;
  568 
  569         error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
  570         if (error == ENOENT)
  571                 gotslevel = 0;
  572         else if (error != 0)
  573                 goto done_free;
  574         else
  575                 gotslevel = 1;
  576 
  577         error =
  578             vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
  579         if (error == ENOENT)
  580                 gotchildmax = 0;
  581         else if (error != 0)
  582                 goto done_free;
  583         else
  584                 gotchildmax = 1;
  585 
  586         error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
  587         gotenforce = (error == 0);
  588         if (gotenforce) {
  589                 if (enforce < 0 || enforce > 2)
  590                         return (EINVAL);
  591         } else if (error != ENOENT)
  592                 goto done_free;
  593 
  594         pr_flags = ch_flags = 0;
  595         for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
  596             fi++) {
  597                 if (pr_flag_names[fi] == NULL)
  598                         continue;
  599                 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi);
  600                 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi);
  601         }
  602         ch_flags |= pr_flags;
  603         for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
  604             fi++) {
  605                 error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys,
  606                     sizeof(jsys));
  607                 if (error == ENOENT)
  608                         continue;
  609                 if (error != 0)
  610                         goto done_free;
  611                 switch (jsys) {
  612                 case JAIL_SYS_DISABLE:
  613                         if (!pr_flag_jailsys[fi].disable) {
  614                                 error = EINVAL;
  615                                 goto done_free;
  616                         }
  617                         pr_flags |= pr_flag_jailsys[fi].disable;
  618                         break;
  619                 case JAIL_SYS_NEW:
  620                         pr_flags |= pr_flag_jailsys[fi].new;
  621                         break;
  622                 case JAIL_SYS_INHERIT:
  623                         break;
  624                 default:
  625                         error = EINVAL;
  626                         goto done_free;
  627                 }
  628                 ch_flags |=
  629                     pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable;
  630         }
  631         if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
  632             && !(pr_flags & PR_PERSIST)) {
  633                 error = EINVAL;
  634                 vfs_opterror(opts, "new jail must persist or attach");
  635                 goto done_errmsg;
  636         }
  637 #ifdef VIMAGE
  638         if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
  639                 error = EINVAL;
  640                 vfs_opterror(opts, "vnet cannot be changed after creation");
  641                 goto done_errmsg;
  642         }
  643 #endif
  644 #ifdef INET
  645         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
  646                 error = EINVAL;
  647                 vfs_opterror(opts, "ip4 cannot be changed after creation");
  648                 goto done_errmsg;
  649         }
  650 #endif
  651 #ifdef INET6
  652         if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
  653                 error = EINVAL;
  654                 vfs_opterror(opts, "ip6 cannot be changed after creation");
  655                 goto done_errmsg;
  656         }
  657 #endif
  658 
  659         pr_allow = ch_allow = 0;
  660         for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
  661             fi++) {
  662                 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi);
  663                 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi);
  664         }
  665         ch_allow |= pr_allow;
  666 
  667         error = vfs_getopt(opts, "name", (void **)&name, &len);
  668         if (error == ENOENT)
  669                 name = NULL;
  670         else if (error != 0)
  671                 goto done_free;
  672         else {
  673                 if (len == 0 || name[len - 1] != '\0') {
  674                         error = EINVAL;
  675                         goto done_free;
  676                 }
  677                 if (len > MAXHOSTNAMELEN) {
  678                         error = ENAMETOOLONG;
  679                         goto done_free;
  680                 }
  681         }
  682 
  683         error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
  684         if (error == ENOENT)
  685                 host = NULL;
  686         else if (error != 0)
  687                 goto done_free;
  688         else {
  689                 ch_flags |= PR_HOST;
  690                 pr_flags |= PR_HOST;
  691                 if (len == 0 || host[len - 1] != '\0') {
  692                         error = EINVAL;
  693                         goto done_free;
  694                 }
  695                 if (len > MAXHOSTNAMELEN) {
  696                         error = ENAMETOOLONG;
  697                         goto done_free;
  698                 }
  699         }
  700 
  701         error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
  702         if (error == ENOENT)
  703                 domain = NULL;
  704         else if (error != 0)
  705                 goto done_free;
  706         else {
  707                 ch_flags |= PR_HOST;
  708                 pr_flags |= PR_HOST;
  709                 if (len == 0 || domain[len - 1] != '\0') {
  710                         error = EINVAL;
  711                         goto done_free;
  712                 }
  713                 if (len > MAXHOSTNAMELEN) {
  714                         error = ENAMETOOLONG;
  715                         goto done_free;
  716                 }
  717         }
  718 
  719         error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
  720         if (error == ENOENT)
  721                 uuid = NULL;
  722         else if (error != 0)
  723                 goto done_free;
  724         else {
  725                 ch_flags |= PR_HOST;
  726                 pr_flags |= PR_HOST;
  727                 if (len == 0 || uuid[len - 1] != '\0') {
  728                         error = EINVAL;
  729                         goto done_free;
  730                 }
  731                 if (len > HOSTUUIDLEN) {
  732                         error = ENAMETOOLONG;
  733                         goto done_free;
  734                 }
  735         }
  736 
  737 #ifdef COMPAT_FREEBSD32
  738         if (td->td_proc->p_sysent->sv_flags & SV_ILP32) {
  739                 uint32_t hid32;
  740 
  741                 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
  742                 hid = hid32;
  743         } else
  744 #endif
  745                 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
  746         if (error == ENOENT)
  747                 gothid = 0;
  748         else if (error != 0)
  749                 goto done_free;
  750         else {
  751                 gothid = 1;
  752                 ch_flags |= PR_HOST;
  753                 pr_flags |= PR_HOST;
  754         }
  755 
  756 #ifdef INET
  757         error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
  758         if (error == ENOENT)
  759                 ip4s = (pr_flags & PR_IP4_DISABLE) ? 0 : -1;
  760         else if (error != 0)
  761                 goto done_free;
  762         else if (ip4s & (sizeof(*ip4) - 1)) {
  763                 error = EINVAL;
  764                 goto done_free;
  765         } else {
  766                 ch_flags |= PR_IP4_USER | PR_IP4_DISABLE;
  767                 if (ip4s == 0)
  768                         pr_flags |= PR_IP4_USER | PR_IP4_DISABLE;
  769                 else {
  770                         pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER;
  771                         ip4s /= sizeof(*ip4);
  772                         if (ip4s > jail_max_af_ips) {
  773                                 error = EINVAL;
  774                                 vfs_opterror(opts, "too many IPv4 addresses");
  775                                 goto done_errmsg;
  776                         }
  777                         ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
  778                         bcopy(op, ip4, ip4s * sizeof(*ip4));
  779                         /*
  780                          * IP addresses are all sorted but ip[0] to preserve
  781                          * the primary IP address as given from userland.
  782                          * This special IP is used for unbound outgoing
  783                          * connections as well for "loopback" traffic in case
  784                          * source address selection cannot find any more fitting
  785                          * address to connect from.
  786                          */
  787                         if (ip4s > 1)
  788                                 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4);
  789                         /*
  790                          * Check for duplicate addresses and do some simple
  791                          * zero and broadcast checks. If users give other bogus
  792                          * addresses it is their problem.
  793                          *
  794                          * We do not have to care about byte order for these
  795                          * checks so we will do them in NBO.
  796                          */
  797                         for (ii = 0; ii < ip4s; ii++) {
  798                                 if (ip4[ii].s_addr == INADDR_ANY ||
  799                                     ip4[ii].s_addr == INADDR_BROADCAST) {
  800                                         error = EINVAL;
  801                                         goto done_free;
  802                                 }
  803                                 if ((ii+1) < ip4s &&
  804                                     (ip4[0].s_addr == ip4[ii+1].s_addr ||
  805                                      ip4[ii].s_addr == ip4[ii+1].s_addr)) {
  806                                         error = EINVAL;
  807                                         goto done_free;
  808                                 }
  809                         }
  810                 }
  811         }
  812 #endif
  813 
  814 #ifdef INET6
  815         error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
  816         if (error == ENOENT)
  817                 ip6s = (pr_flags & PR_IP6_DISABLE) ? 0 : -1;
  818         else if (error != 0)
  819                 goto done_free;
  820         else if (ip6s & (sizeof(*ip6) - 1)) {
  821                 error = EINVAL;
  822                 goto done_free;
  823         } else {
  824                 ch_flags |= PR_IP6_USER | PR_IP6_DISABLE;
  825                 if (ip6s == 0)
  826                         pr_flags |= PR_IP6_USER | PR_IP6_DISABLE;
  827                 else {
  828                         pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER;
  829                         ip6s /= sizeof(*ip6);
  830                         if (ip6s > jail_max_af_ips) {
  831                                 error = EINVAL;
  832                                 vfs_opterror(opts, "too many IPv6 addresses");
  833                                 goto done_errmsg;
  834                         }
  835                         ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
  836                         bcopy(op, ip6, ip6s * sizeof(*ip6));
  837                         if (ip6s > 1)
  838                                 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6);
  839                         for (ii = 0; ii < ip6s; ii++) {
  840                                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
  841                                         error = EINVAL;
  842                                         goto done_free;
  843                                 }
  844                                 if ((ii+1) < ip6s &&
  845                                     (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
  846                                      IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
  847                                 {
  848                                         error = EINVAL;
  849                                         goto done_free;
  850                                 }
  851                         }
  852                 }
  853         }
  854 #endif
  855 
  856 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
  857         if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
  858                 error = EINVAL;
  859                 vfs_opterror(opts,
  860                     "vnet jails cannot have IP address restrictions");
  861                 goto done_errmsg;
  862         }
  863 #endif
  864 
  865         root = NULL;
  866         error = vfs_getopt(opts, "path", (void **)&path, &len);
  867         if (error == ENOENT)
  868                 path = NULL;
  869         else if (error != 0)
  870                 goto done_free;
  871         else {
  872                 if (flags & JAIL_UPDATE) {
  873                         error = EINVAL;
  874                         vfs_opterror(opts,
  875                             "path cannot be changed after creation");
  876                         goto done_errmsg;
  877                 }
  878                 if (len == 0 || path[len - 1] != '\0') {
  879                         error = EINVAL;
  880                         goto done_free;
  881                 }
  882                 if (len < 2 || (len == 2 && path[0] == '/'))
  883                         path = NULL;
  884                 else {
  885                         /* Leave room for a real-root full pathname. */
  886                         if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/")
  887                             ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) {
  888                                 error = ENAMETOOLONG;
  889                                 goto done_free;
  890                         }
  891                         NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE,
  892                             path, td);
  893                         error = namei(&nd);
  894                         if (error)
  895                                 goto done_free;
  896                         vfslocked = NDHASGIANT(&nd);
  897                         root = nd.ni_vp;
  898                         NDFREE(&nd, NDF_ONLY_PNBUF);
  899                         if (root->v_type != VDIR) {
  900                                 error = ENOTDIR;
  901                                 vrele(root);
  902                                 VFS_UNLOCK_GIANT(vfslocked);
  903                                 goto done_free;
  904                         }
  905                         VFS_UNLOCK_GIANT(vfslocked);
  906                 }
  907         }
  908 
  909         /*
  910          * Grab the allprison lock before letting modules check their
  911          * parameters.  Once we have it, do not let go so we'll have a
  912          * consistent view of the OSD list.
  913          */
  914         sx_xlock(&allprison_lock);
  915         error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
  916         if (error)
  917                 goto done_unlock_list;
  918 
  919         /* By now, all parameters should have been noted. */
  920         TAILQ_FOREACH(opt, opts, link) {
  921                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
  922                         error = EINVAL;
  923                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
  924                         goto done_unlock_list;
  925                 }
  926         }
  927 
  928         /*
  929          * See if we are creating a new record or updating an existing one.
  930          * This abuses the file error codes ENOENT and EEXIST.
  931          */
  932         cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
  933         if (!cuflags) {
  934                 error = EINVAL;
  935                 vfs_opterror(opts, "no valid operation (create or update)");
  936                 goto done_unlock_list;
  937         }
  938         pr = NULL;
  939         namelc = NULL;
  940         if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
  941                 namelc = strrchr(name, '.');
  942                 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
  943                 if (*p != '\0')
  944                         jid = 0;
  945         }
  946         if (jid != 0) {
  947                 /*
  948                  * See if a requested jid already exists.  There is an
  949                  * information leak here if the jid exists but is not within
  950                  * the caller's jail hierarchy.  Jail creators will get EEXIST
  951                  * even though they cannot see the jail, and CREATE | UPDATE
  952                  * will return ENOENT which is not normally a valid error.
  953                  */
  954                 if (jid < 0) {
  955                         error = EINVAL;
  956                         vfs_opterror(opts, "negative jid");
  957                         goto done_unlock_list;
  958                 }
  959                 pr = prison_find(jid);
  960                 if (pr != NULL) {
  961                         ppr = pr->pr_parent;
  962                         /* Create: jid must not exist. */
  963                         if (cuflags == JAIL_CREATE) {
  964                                 mtx_unlock(&pr->pr_mtx);
  965                                 error = EEXIST;
  966                                 vfs_opterror(opts, "jail %d already exists",
  967                                     jid);
  968                                 goto done_unlock_list;
  969                         }
  970                         if (!prison_ischild(mypr, pr)) {
  971                                 mtx_unlock(&pr->pr_mtx);
  972                                 pr = NULL;
  973                         } else if (pr->pr_uref == 0) {
  974                                 if (!(flags & JAIL_DYING)) {
  975                                         mtx_unlock(&pr->pr_mtx);
  976                                         error = ENOENT;
  977                                         vfs_opterror(opts, "jail %d is dying",
  978                                             jid);
  979                                         goto done_unlock_list;
  980                                 } else if ((flags & JAIL_ATTACH) ||
  981                                     (pr_flags & PR_PERSIST)) {
  982                                         /*
  983                                          * A dying jail might be resurrected
  984                                          * (via attach or persist), but first
  985                                          * it must determine if another jail
  986                                          * has claimed its name.  Accomplish
  987                                          * this by implicitly re-setting the
  988                                          * name.
  989                                          */
  990                                         if (name == NULL)
  991                                                 name = prison_name(mypr, pr);
  992                                 }
  993                         }
  994                 }
  995                 if (pr == NULL) {
  996                         /* Update: jid must exist. */
  997                         if (cuflags == JAIL_UPDATE) {
  998                                 error = ENOENT;
  999                                 vfs_opterror(opts, "jail %d not found", jid);
 1000                                 goto done_unlock_list;
 1001                         }
 1002                 }
 1003         }
 1004         /*
 1005          * If the caller provided a name, look for a jail by that name.
 1006          * This has different semantics for creates and updates keyed by jid
 1007          * (where the name must not already exist in a different jail),
 1008          * and updates keyed by the name itself (where the name must exist
 1009          * because that is the jail being updated).
 1010          */
 1011         if (name != NULL) {
 1012                 namelc = strrchr(name, '.');
 1013                 if (namelc == NULL)
 1014                         namelc = name;
 1015                 else {
 1016                         /*
 1017                          * This is a hierarchical name.  Split it into the
 1018                          * parent and child names, and make sure the parent
 1019                          * exists or matches an already found jail.
 1020                          */
 1021                         *namelc = '\0';
 1022                         if (pr != NULL) {
 1023                                 if (strncmp(name, ppr->pr_name, namelc - name)
 1024                                     || ppr->pr_name[namelc - name] != '\0') {
 1025                                         mtx_unlock(&pr->pr_mtx);
 1026                                         error = EINVAL;
 1027                                         vfs_opterror(opts,
 1028                                             "cannot change jail's parent");
 1029                                         goto done_unlock_list;
 1030                                 }
 1031                         } else {
 1032                                 ppr = prison_find_name(mypr, name);
 1033                                 if (ppr == NULL) {
 1034                                         error = ENOENT;
 1035                                         vfs_opterror(opts,
 1036                                             "jail \"%s\" not found", name);
 1037                                         goto done_unlock_list;
 1038                                 }
 1039                                 mtx_unlock(&ppr->pr_mtx);
 1040                         }
 1041                         name = ++namelc;
 1042                 }
 1043                 if (name[0] != '\0') {
 1044                         namelen =
 1045                             (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 1046  name_again:
 1047                         deadpr = NULL;
 1048                         FOREACH_PRISON_CHILD(ppr, tpr) {
 1049                                 if (tpr != pr && tpr->pr_ref > 0 &&
 1050                                     !strcmp(tpr->pr_name + namelen, name)) {
 1051                                         if (pr == NULL &&
 1052                                             cuflags != JAIL_CREATE) {
 1053                                                 mtx_lock(&tpr->pr_mtx);
 1054                                                 if (tpr->pr_ref > 0) {
 1055                                                         /*
 1056                                                          * Use this jail
 1057                                                          * for updates.
 1058                                                          */
 1059                                                         if (tpr->pr_uref > 0) {
 1060                                                                 pr = tpr;
 1061                                                                 break;
 1062                                                         }
 1063                                                         deadpr = tpr;
 1064                                                 }
 1065                                                 mtx_unlock(&tpr->pr_mtx);
 1066                                         } else if (tpr->pr_uref > 0) {
 1067                                                 /*
 1068                                                  * Create, or update(jid):
 1069                                                  * name must not exist in an
 1070                                                  * active sibling jail.
 1071                                                  */
 1072                                                 error = EEXIST;
 1073                                                 if (pr != NULL)
 1074                                                         mtx_unlock(&pr->pr_mtx);
 1075                                                 vfs_opterror(opts,
 1076                                                    "jail \"%s\" already exists",
 1077                                                    name);
 1078                                                 goto done_unlock_list;
 1079                                         }
 1080                                 }
 1081                         }
 1082                         /* If no active jail is found, use a dying one. */
 1083                         if (deadpr != NULL && pr == NULL) {
 1084                                 if (flags & JAIL_DYING) {
 1085                                         mtx_lock(&deadpr->pr_mtx);
 1086                                         if (deadpr->pr_ref == 0) {
 1087                                                 mtx_unlock(&deadpr->pr_mtx);
 1088                                                 goto name_again;
 1089                                         }
 1090                                         pr = deadpr;
 1091                                 } else if (cuflags == JAIL_UPDATE) {
 1092                                         error = ENOENT;
 1093                                         vfs_opterror(opts,
 1094                                             "jail \"%s\" is dying", name);
 1095                                         goto done_unlock_list;
 1096                                 }
 1097                         }
 1098                         /* Update: name must exist if no jid. */
 1099                         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1100                                 error = ENOENT;
 1101                                 vfs_opterror(opts, "jail \"%s\" not found",
 1102                                     name);
 1103                                 goto done_unlock_list;
 1104                         }
 1105                 }
 1106         }
 1107         /* Update: must provide a jid or name. */
 1108         else if (cuflags == JAIL_UPDATE && pr == NULL) {
 1109                 error = ENOENT;
 1110                 vfs_opterror(opts, "update specified no jail");
 1111                 goto done_unlock_list;
 1112         }
 1113 
 1114         /* If there's no prison to update, create a new one and link it in. */
 1115         if (pr == NULL) {
 1116                 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
 1117                         if (tpr->pr_childcount >= tpr->pr_childmax) {
 1118                                 error = EPERM;
 1119                                 vfs_opterror(opts, "prison limit exceeded");
 1120                                 goto done_unlock_list;
 1121                         }
 1122                 created = 1;
 1123                 mtx_lock(&ppr->pr_mtx);
 1124                 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) {
 1125                         mtx_unlock(&ppr->pr_mtx);
 1126                         error = ENOENT;
 1127                         vfs_opterror(opts, "parent jail went away!");
 1128                         goto done_unlock_list;
 1129                 }
 1130                 ppr->pr_ref++;
 1131                 ppr->pr_uref++;
 1132                 mtx_unlock(&ppr->pr_mtx);
 1133                 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
 1134                 if (jid == 0) {
 1135                         /* Find the next free jid. */
 1136                         jid = lastprid + 1;
 1137  findnext:
 1138                         if (jid == JAIL_MAX)
 1139                                 jid = 1;
 1140                         TAILQ_FOREACH(tpr, &allprison, pr_list) {
 1141                                 if (tpr->pr_id < jid)
 1142                                         continue;
 1143                                 if (tpr->pr_id > jid || tpr->pr_ref == 0) {
 1144                                         TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 1145                                         break;
 1146                                 }
 1147                                 if (jid == lastprid) {
 1148                                         error = EAGAIN;
 1149                                         vfs_opterror(opts,
 1150                                             "no available jail IDs");
 1151                                         free(pr, M_PRISON);
 1152                                         prison_deref(ppr, PD_DEREF |
 1153                                             PD_DEUREF | PD_LIST_XLOCKED);
 1154                                         goto done_releroot;
 1155                                 }
 1156                                 jid++;
 1157                                 goto findnext;
 1158                         }
 1159                         lastprid = jid;
 1160                 } else {
 1161                         /*
 1162                          * The jail already has a jid (that did not yet exist),
 1163                          * so just find where to insert it.
 1164                          */
 1165                         TAILQ_FOREACH(tpr, &allprison, pr_list)
 1166                                 if (tpr->pr_id >= jid) {
 1167                                         TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
 1168                                         break;
 1169                                 }
 1170                 }
 1171                 if (tpr == NULL)
 1172                         TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
 1173                 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
 1174                 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 1175                         tpr->pr_childcount++;
 1176 
 1177                 pr->pr_parent = ppr;
 1178                 pr->pr_id = jid;
 1179 
 1180                 /* Set some default values, and inherit some from the parent. */
 1181                 if (name == NULL)
 1182                         name = "";
 1183                 if (path == NULL) {
 1184                         path = "/";
 1185                         root = mypr->pr_root;
 1186                         vref(root);
 1187                 }
 1188                 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
 1189                 pr->pr_flags |= PR_HOST;
 1190 #if defined(INET) || defined(INET6)
 1191 #ifdef VIMAGE
 1192                 if (!(pr_flags & PR_VNET))
 1193 #endif
 1194                 {
 1195 #ifdef INET
 1196                         if (!(ch_flags & PR_IP4_USER))
 1197                                 pr->pr_flags |=
 1198                                     PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE;
 1199                         else if (!(pr_flags & PR_IP4_USER)) {
 1200                                 pr->pr_flags |= ppr->pr_flags & PR_IP4;
 1201                                 if (ppr->pr_ip4 != NULL) {
 1202                                         pr->pr_ip4s = ppr->pr_ip4s;
 1203                                         pr->pr_ip4 = malloc(pr->pr_ip4s *
 1204                                             sizeof(struct in_addr), M_PRISON,
 1205                                             M_WAITOK);
 1206                                         bcopy(ppr->pr_ip4, pr->pr_ip4,
 1207                                             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 1208                                 }
 1209                         }
 1210 #endif
 1211 #ifdef INET6
 1212                         if (!(ch_flags & PR_IP6_USER))
 1213                                 pr->pr_flags |=
 1214                                     PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE;
 1215                         else if (!(pr_flags & PR_IP6_USER)) {
 1216                                 pr->pr_flags |= ppr->pr_flags & PR_IP6;
 1217                                 if (ppr->pr_ip6 != NULL) {
 1218                                         pr->pr_ip6s = ppr->pr_ip6s;
 1219                                         pr->pr_ip6 = malloc(pr->pr_ip6s *
 1220                                             sizeof(struct in6_addr), M_PRISON,
 1221                                             M_WAITOK);
 1222                                         bcopy(ppr->pr_ip6, pr->pr_ip6,
 1223                                             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 1224                                 }
 1225                         }
 1226 #endif
 1227                 }
 1228 #endif
 1229                 /* Source address selection is always on by default. */
 1230                 pr->pr_flags |= _PR_IP_SADDRSEL;
 1231 
 1232                 pr->pr_securelevel = ppr->pr_securelevel;
 1233                 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
 1234                 pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
 1235 
 1236                 LIST_INIT(&pr->pr_children);
 1237                 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
 1238 
 1239 #ifdef VIMAGE
 1240                 /* Allocate a new vnet if specified. */
 1241                 pr->pr_vnet = (pr_flags & PR_VNET)
 1242                     ? vnet_alloc() : ppr->pr_vnet;
 1243 #endif
 1244                 /*
 1245                  * Allocate a dedicated cpuset for each jail.
 1246                  * Unlike other initial settings, this may return an erorr.
 1247                  */
 1248                 error = cpuset_create_root(ppr, &pr->pr_cpuset);
 1249                 if (error) {
 1250                         prison_deref(pr, PD_LIST_XLOCKED);
 1251                         goto done_releroot;
 1252                 }
 1253 
 1254                 mtx_lock(&pr->pr_mtx);
 1255                 /*
 1256                  * New prisons do not yet have a reference, because we do not
 1257                  * want other to see the incomplete prison once the
 1258                  * allprison_lock is downgraded.
 1259                  */
 1260         } else {
 1261                 created = 0;
 1262                 /*
 1263                  * Grab a reference for existing prisons, to ensure they
 1264                  * continue to exist for the duration of the call.
 1265                  */
 1266                 pr->pr_ref++;
 1267 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 1268                 if ((pr->pr_flags & PR_VNET) &&
 1269                     (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 1270                         error = EINVAL;
 1271                         vfs_opterror(opts,
 1272                             "vnet jails cannot have IP address restrictions");
 1273                         goto done_deref_locked;
 1274                 }
 1275 #endif
 1276 #ifdef INET
 1277                 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1278                         error = EINVAL;
 1279                         vfs_opterror(opts,
 1280                             "ip4 cannot be changed after creation");
 1281                         goto done_deref_locked;
 1282                 }
 1283 #endif
 1284 #ifdef INET6
 1285                 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 1286                         error = EINVAL;
 1287                         vfs_opterror(opts,
 1288                             "ip6 cannot be changed after creation");
 1289                         goto done_deref_locked;
 1290                 }
 1291 #endif
 1292         }
 1293 
 1294         /* Do final error checking before setting anything. */
 1295         if (gotslevel) {
 1296                 if (slevel < ppr->pr_securelevel) {
 1297                         error = EPERM;
 1298                         goto done_deref_locked;
 1299                 }
 1300         }
 1301         if (gotchildmax) {
 1302                 if (childmax >= ppr->pr_childmax) {
 1303                         error = EPERM;
 1304                         goto done_deref_locked;
 1305                 }
 1306         }
 1307         if (gotenforce) {
 1308                 if (enforce < ppr->pr_enforce_statfs) {
 1309                         error = EPERM;
 1310                         goto done_deref_locked;
 1311                 }
 1312         }
 1313 #ifdef INET
 1314         if (ip4s > 0) {
 1315                 if (ppr->pr_flags & PR_IP4) {
 1316                         /*
 1317                          * Make sure the new set of IP addresses is a
 1318                          * subset of the parent's list.  Don't worry
 1319                          * about the parent being unlocked, as any
 1320                          * setting is done with allprison_lock held.
 1321                          */
 1322                         for (ij = 0; ij < ppr->pr_ip4s; ij++)
 1323                                 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 1324                                         break;
 1325                         if (ij == ppr->pr_ip4s) {
 1326                                 error = EPERM;
 1327                                 goto done_deref_locked;
 1328                         }
 1329                         if (ip4s > 1) {
 1330                                 for (ii = ij = 1; ii < ip4s; ii++) {
 1331                                         if (ip4[ii].s_addr ==
 1332                                             ppr->pr_ip4[0].s_addr)
 1333                                                 continue;
 1334                                         for (; ij < ppr->pr_ip4s; ij++)
 1335                                                 if (ip4[ii].s_addr ==
 1336                                                     ppr->pr_ip4[ij].s_addr)
 1337                                                         break;
 1338                                         if (ij == ppr->pr_ip4s)
 1339                                                 break;
 1340                                 }
 1341                                 if (ij == ppr->pr_ip4s) {
 1342                                         error = EPERM;
 1343                                         goto done_deref_locked;
 1344                                 }
 1345                         }
 1346                 }
 1347                 /*
 1348                  * Check for conflicting IP addresses.  We permit them
 1349                  * if there is no more than one IP on each jail.  If
 1350                  * there is a duplicate on a jail with more than one
 1351                  * IP stop checking and return error.
 1352                  */
 1353                 tppr = ppr;
 1354 #ifdef VIMAGE
 1355                 for (; tppr != &prison0; tppr = tppr->pr_parent)
 1356                         if (tppr->pr_flags & PR_VNET)
 1357                                 break;
 1358 #endif
 1359                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1360                         if (tpr == pr ||
 1361 #ifdef VIMAGE
 1362                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1363 #endif
 1364                             tpr->pr_uref == 0) {
 1365                                 descend = 0;
 1366                                 continue;
 1367                         }
 1368                         if (!(tpr->pr_flags & PR_IP4_USER))
 1369                                 continue;
 1370                         descend = 0;
 1371                         if (tpr->pr_ip4 == NULL ||
 1372                             (ip4s == 1 && tpr->pr_ip4s == 1))
 1373                                 continue;
 1374                         for (ii = 0; ii < ip4s; ii++) {
 1375                                 if (_prison_check_ip4(tpr, &ip4[ii]) == 0) {
 1376                                         error = EADDRINUSE;
 1377                                         vfs_opterror(opts,
 1378                                             "IPv4 addresses clash");
 1379                                         goto done_deref_locked;
 1380                                 }
 1381                         }
 1382                 }
 1383         }
 1384 #endif
 1385 #ifdef INET6
 1386         if (ip6s > 0) {
 1387                 if (ppr->pr_flags & PR_IP6) {
 1388                         /*
 1389                          * Make sure the new set of IP addresses is a
 1390                          * subset of the parent's list.
 1391                          */
 1392                         for (ij = 0; ij < ppr->pr_ip6s; ij++)
 1393                                 if (IN6_ARE_ADDR_EQUAL(&ip6[0],
 1394                                     &ppr->pr_ip6[ij]))
 1395                                         break;
 1396                         if (ij == ppr->pr_ip6s) {
 1397                                 error = EPERM;
 1398                                 goto done_deref_locked;
 1399                         }
 1400                         if (ip6s > 1) {
 1401                                 for (ii = ij = 1; ii < ip6s; ii++) {
 1402                                         if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
 1403                                              &ppr->pr_ip6[0]))
 1404                                                 continue;
 1405                                         for (; ij < ppr->pr_ip6s; ij++)
 1406                                                 if (IN6_ARE_ADDR_EQUAL(
 1407                                                     &ip6[ii], &ppr->pr_ip6[ij]))
 1408                                                         break;
 1409                                         if (ij == ppr->pr_ip6s)
 1410                                                 break;
 1411                                 }
 1412                                 if (ij == ppr->pr_ip6s) {
 1413                                         error = EPERM;
 1414                                         goto done_deref_locked;
 1415                                 }
 1416                         }
 1417                 }
 1418                 /* Check for conflicting IP addresses. */
 1419                 tppr = ppr;
 1420 #ifdef VIMAGE
 1421                 for (; tppr != &prison0; tppr = tppr->pr_parent)
 1422                         if (tppr->pr_flags & PR_VNET)
 1423                                 break;
 1424 #endif
 1425                 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 1426                         if (tpr == pr ||
 1427 #ifdef VIMAGE
 1428                             (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 1429 #endif
 1430                             tpr->pr_uref == 0) {
 1431                                 descend = 0;
 1432                                 continue;
 1433                         }
 1434                         if (!(tpr->pr_flags & PR_IP6_USER))
 1435                                 continue;
 1436                         descend = 0;
 1437                         if (tpr->pr_ip6 == NULL ||
 1438                             (ip6s == 1 && tpr->pr_ip6s == 1))
 1439                                 continue;
 1440                         for (ii = 0; ii < ip6s; ii++) {
 1441                                 if (_prison_check_ip6(tpr, &ip6[ii]) == 0) {
 1442                                         error = EADDRINUSE;
 1443                                         vfs_opterror(opts,
 1444                                             "IPv6 addresses clash");
 1445                                         goto done_deref_locked;
 1446                                 }
 1447                         }
 1448                 }
 1449         }
 1450 #endif
 1451         onamelen = namelen = 0;
 1452         if (name != NULL) {
 1453                 /* Give a default name of the jid. */
 1454                 if (name[0] == '\0')
 1455                         snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
 1456                 else if (*namelc == '' || (strtoul(namelc, &p, 10) != jid &&
 1457                     *p == '\0')) {
 1458                         error = EINVAL;
 1459                         vfs_opterror(opts,
 1460                             "name cannot be numeric (unless it is the jid)");
 1461                         goto done_deref_locked;
 1462                 }
 1463                 /*
 1464                  * Make sure the name isn't too long for the prison or its
 1465                  * children.
 1466                  */
 1467                 onamelen = strlen(pr->pr_name);
 1468                 namelen = strlen(name);
 1469                 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
 1470                         error = ENAMETOOLONG;
 1471                         goto done_deref_locked;
 1472                 }
 1473                 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
 1474                         if (strlen(tpr->pr_name) + (namelen - onamelen) >=
 1475                             sizeof(pr->pr_name)) {
 1476                                 error = ENAMETOOLONG;
 1477                                 goto done_deref_locked;
 1478                         }
 1479                 }
 1480         }
 1481         if (pr_allow & ~ppr->pr_allow) {
 1482                 error = EPERM;
 1483                 goto done_deref_locked;
 1484         }
 1485 
 1486         /* Set the parameters of the prison. */
 1487 #ifdef INET
 1488         redo_ip4 = 0;
 1489         if (pr_flags & PR_IP4_USER) {
 1490                 pr->pr_flags |= PR_IP4;
 1491                 free(pr->pr_ip4, M_PRISON);
 1492                 pr->pr_ip4s = ip4s;
 1493                 pr->pr_ip4 = ip4;
 1494                 ip4 = NULL;
 1495                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1496 #ifdef VIMAGE
 1497                         if (tpr->pr_flags & PR_VNET) {
 1498                                 descend = 0;
 1499                                 continue;
 1500                         }
 1501 #endif
 1502                         if (prison_restrict_ip4(tpr, NULL)) {
 1503                                 redo_ip4 = 1;
 1504                                 descend = 0;
 1505                         }
 1506                 }
 1507         }
 1508 #endif
 1509 #ifdef INET6
 1510         redo_ip6 = 0;
 1511         if (pr_flags & PR_IP6_USER) {
 1512                 pr->pr_flags |= PR_IP6;
 1513                 free(pr->pr_ip6, M_PRISON);
 1514                 pr->pr_ip6s = ip6s;
 1515                 pr->pr_ip6 = ip6;
 1516                 ip6 = NULL;
 1517                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1518 #ifdef VIMAGE
 1519                         if (tpr->pr_flags & PR_VNET) {
 1520                                 descend = 0;
 1521                                 continue;
 1522                         }
 1523 #endif
 1524                         if (prison_restrict_ip6(tpr, NULL)) {
 1525                                 redo_ip6 = 1;
 1526                                 descend = 0;
 1527                         }
 1528                 }
 1529         }
 1530 #endif
 1531         if (gotslevel) {
 1532                 pr->pr_securelevel = slevel;
 1533                 /* Set all child jails to be at least this level. */
 1534                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1535                         if (tpr->pr_securelevel < slevel)
 1536                                 tpr->pr_securelevel = slevel;
 1537         }
 1538         if (gotchildmax) {
 1539                 pr->pr_childmax = childmax;
 1540                 /* Set all child jails to under this limit. */
 1541                 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
 1542                         if (tpr->pr_childmax > childmax - level)
 1543                                 tpr->pr_childmax = childmax > level
 1544                                     ? childmax - level : 0;
 1545         }
 1546         if (gotenforce) {
 1547                 pr->pr_enforce_statfs = enforce;
 1548                 /* Pass this restriction on to the children. */
 1549                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1550                         if (tpr->pr_enforce_statfs < enforce)
 1551                                 tpr->pr_enforce_statfs = enforce;
 1552         }
 1553         if (name != NULL) {
 1554                 if (ppr == &prison0)
 1555                         strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
 1556                 else
 1557                         snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
 1558                             ppr->pr_name, name);
 1559                 /* Change this component of child names. */
 1560                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1561                         bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
 1562                             strlen(tpr->pr_name + onamelen) + 1);
 1563                         bcopy(pr->pr_name, tpr->pr_name, namelen);
 1564                 }
 1565         }
 1566         if (path != NULL) {
 1567                 /* Try to keep a real-rooted full pathname. */
 1568                 if (path[0] == '/' && strcmp(mypr->pr_path, "/"))
 1569                         snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s",
 1570                             mypr->pr_path, path);
 1571                 else
 1572                         strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
 1573                 pr->pr_root = root;
 1574         }
 1575         if (PR_HOST & ch_flags & ~pr_flags) {
 1576                 if (pr->pr_flags & PR_HOST) {
 1577                         /*
 1578                          * Copy the parent's host info.  As with pr_ip4 above,
 1579                          * the lack of a lock on the parent is not a problem;
 1580                          * it is always set with allprison_lock at least
 1581                          * shared, and is held exclusively here.
 1582                          */
 1583                         strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
 1584                             sizeof(pr->pr_hostname));
 1585                         strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
 1586                             sizeof(pr->pr_domainname));
 1587                         strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
 1588                             sizeof(pr->pr_hostuuid));
 1589                         pr->pr_hostid = pr->pr_parent->pr_hostid;
 1590                 }
 1591         } else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
 1592                 /* Set this prison, and any descendants without PR_HOST. */
 1593                 if (host != NULL)
 1594                         strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
 1595                 if (domain != NULL)
 1596                         strlcpy(pr->pr_domainname, domain, 
 1597                             sizeof(pr->pr_domainname));
 1598                 if (uuid != NULL)
 1599                         strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
 1600                 if (gothid)
 1601                         pr->pr_hostid = hid;
 1602                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1603                         if (tpr->pr_flags & PR_HOST)
 1604                                 descend = 0;
 1605                         else {
 1606                                 if (host != NULL)
 1607                                         strlcpy(tpr->pr_hostname,
 1608                                             pr->pr_hostname,
 1609                                             sizeof(tpr->pr_hostname));
 1610                                 if (domain != NULL)
 1611                                         strlcpy(tpr->pr_domainname, 
 1612                                             pr->pr_domainname,
 1613                                             sizeof(tpr->pr_domainname));
 1614                                 if (uuid != NULL)
 1615                                         strlcpy(tpr->pr_hostuuid,
 1616                                             pr->pr_hostuuid,
 1617                                             sizeof(tpr->pr_hostuuid));
 1618                                 if (gothid)
 1619                                         tpr->pr_hostid = hid;
 1620                         }
 1621                 }
 1622         }
 1623         if ((tallow = ch_allow & ~pr_allow)) {
 1624                 /* Clear allow bits in all children. */
 1625                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 1626                         tpr->pr_allow &= ~tallow;
 1627         }
 1628         pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
 1629         /*
 1630          * Persistent prisons get an extra reference, and prisons losing their
 1631          * persist flag lose that reference.  Only do this for existing prisons
 1632          * for now, so new ones will remain unseen until after the module
 1633          * handlers have completed.
 1634          */
 1635         if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
 1636                 if (pr_flags & PR_PERSIST) {
 1637                         pr->pr_ref++;
 1638                         pr->pr_uref++;
 1639                 } else {
 1640                         pr->pr_ref--;
 1641                         pr->pr_uref--;
 1642                 }
 1643         }
 1644         pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
 1645         mtx_unlock(&pr->pr_mtx);
 1646 
 1647         /* Locks may have prevented a complete restriction of child IP
 1648          * addresses.  If so, allocate some more memory and try again.
 1649          */
 1650 #ifdef INET
 1651         while (redo_ip4) {
 1652                 ip4s = pr->pr_ip4s;
 1653                 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 1654                 mtx_lock(&pr->pr_mtx);
 1655                 redo_ip4 = 0;
 1656                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1657 #ifdef VIMAGE
 1658                         if (tpr->pr_flags & PR_VNET) {
 1659                                 descend = 0;
 1660                                 continue;
 1661                         }
 1662 #endif
 1663                         if (prison_restrict_ip4(tpr, ip4)) {
 1664                                 if (ip4 != NULL)
 1665                                         ip4 = NULL;
 1666                                 else
 1667                                         redo_ip4 = 1;
 1668                         }
 1669                 }
 1670                 mtx_unlock(&pr->pr_mtx);
 1671         }
 1672 #endif
 1673 #ifdef INET6
 1674         while (redo_ip6) {
 1675                 ip6s = pr->pr_ip6s;
 1676                 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 1677                 mtx_lock(&pr->pr_mtx);
 1678                 redo_ip6 = 0;
 1679                 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 1680 #ifdef VIMAGE
 1681                         if (tpr->pr_flags & PR_VNET) {
 1682                                 descend = 0;
 1683                                 continue;
 1684                         }
 1685 #endif
 1686                         if (prison_restrict_ip6(tpr, ip6)) {
 1687                                 if (ip6 != NULL)
 1688                                         ip6 = NULL;
 1689                                 else
 1690                                         redo_ip6 = 1;
 1691                         }
 1692                 }
 1693                 mtx_unlock(&pr->pr_mtx);
 1694         }
 1695 #endif
 1696 
 1697         /* Let the modules do their work. */
 1698         sx_downgrade(&allprison_lock);
 1699         if (created) {
 1700                 error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
 1701                 if (error) {
 1702                         prison_deref(pr, PD_LIST_SLOCKED);
 1703                         goto done_errmsg;
 1704                 }
 1705         }
 1706         error = osd_jail_call(pr, PR_METHOD_SET, opts);
 1707         if (error) {
 1708                 prison_deref(pr, created
 1709                     ? PD_LIST_SLOCKED
 1710                     : PD_DEREF | PD_LIST_SLOCKED);
 1711                 goto done_errmsg;
 1712         }
 1713 
 1714         /* Attach this process to the prison if requested. */
 1715         if (flags & JAIL_ATTACH) {
 1716                 mtx_lock(&pr->pr_mtx);
 1717                 error = do_jail_attach(td, pr);
 1718                 if (error) {
 1719                         vfs_opterror(opts, "attach failed");
 1720                         if (!created)
 1721                                 prison_deref(pr, PD_DEREF);
 1722                         goto done_errmsg;
 1723                 }
 1724         }
 1725 
 1726         /*
 1727          * Now that it is all there, drop the temporary reference from existing
 1728          * prisons.  Or add a reference to newly created persistent prisons
 1729          * (which was not done earlier so that the prison would not be publicly
 1730          * visible).
 1731          */
 1732         if (!created) {
 1733                 prison_deref(pr, (flags & JAIL_ATTACH)
 1734                     ? PD_DEREF
 1735                     : PD_DEREF | PD_LIST_SLOCKED);
 1736         } else {
 1737                 if (pr_flags & PR_PERSIST) {
 1738                         mtx_lock(&pr->pr_mtx);
 1739                         pr->pr_ref++;
 1740                         pr->pr_uref++;
 1741                         mtx_unlock(&pr->pr_mtx);
 1742                 }
 1743                 if (!(flags & JAIL_ATTACH))
 1744                         sx_sunlock(&allprison_lock);
 1745         }
 1746         td->td_retval[0] = pr->pr_id;
 1747         goto done_errmsg;
 1748 
 1749  done_deref_locked:
 1750         prison_deref(pr, created
 1751             ? PD_LOCKED | PD_LIST_XLOCKED
 1752             : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 1753         goto done_releroot;
 1754  done_unlock_list:
 1755         sx_xunlock(&allprison_lock);
 1756  done_releroot:
 1757         if (root != NULL) {
 1758                 vfslocked = VFS_LOCK_GIANT(root->v_mount);
 1759                 vrele(root);
 1760                 VFS_UNLOCK_GIANT(vfslocked);
 1761         }
 1762  done_errmsg:
 1763         if (error) {
 1764                 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 1765                 if (errmsg_len > 0) {
 1766                         errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
 1767                         if (errmsg_pos > 0) {
 1768                                 if (optuio->uio_segflg == UIO_SYSSPACE)
 1769                                         bcopy(errmsg,
 1770                                            optuio->uio_iov[errmsg_pos].iov_base,
 1771                                            errmsg_len);
 1772                                 else
 1773                                         copyout(errmsg,
 1774                                            optuio->uio_iov[errmsg_pos].iov_base,
 1775                                            errmsg_len);
 1776                         }
 1777                 }
 1778         }
 1779  done_free:
 1780 #ifdef INET
 1781         free(ip4, M_PRISON);
 1782 #endif
 1783 #ifdef INET6
 1784         free(ip6, M_PRISON);
 1785 #endif
 1786         vfs_freeopts(opts);
 1787         return (error);
 1788 }
 1789 
 1790 
 1791 /*
 1792  * struct jail_get_args {
 1793  *      struct iovec *iovp;
 1794  *      unsigned int iovcnt;
 1795  *      int flags;
 1796  * };
 1797  */
 1798 int
 1799 jail_get(struct thread *td, struct jail_get_args *uap)
 1800 {
 1801         struct uio *auio;
 1802         int error;
 1803 
 1804         /* Check that we have an even number of iovecs. */
 1805         if (uap->iovcnt & 1)
 1806                 return (EINVAL);
 1807 
 1808         error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 1809         if (error)
 1810                 return (error);
 1811         error = kern_jail_get(td, auio, uap->flags);
 1812         if (error == 0)
 1813                 error = copyout(auio->uio_iov, uap->iovp,
 1814                     uap->iovcnt * sizeof (struct iovec));
 1815         free(auio, M_IOV);
 1816         return (error);
 1817 }
 1818 
 1819 int
 1820 kern_jail_get(struct thread *td, struct uio *optuio, int flags)
 1821 {
 1822         struct prison *pr, *mypr;
 1823         struct vfsopt *opt;
 1824         struct vfsoptlist *opts;
 1825         char *errmsg, *name;
 1826         int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos;
 1827 
 1828         if (flags & ~JAIL_GET_MASK)
 1829                 return (EINVAL);
 1830 
 1831         /* Get the parameter list. */
 1832         error = vfs_buildopts(optuio, &opts);
 1833         if (error)
 1834                 return (error);
 1835         errmsg_pos = vfs_getopt_pos(opts, "errmsg");
 1836         mypr = td->td_ucred->cr_prison;
 1837 
 1838         /*
 1839          * Find the prison specified by one of: lastjid, jid, name.
 1840          */
 1841         sx_slock(&allprison_lock);
 1842         error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
 1843         if (error == 0) {
 1844                 TAILQ_FOREACH(pr, &allprison, pr_list) {
 1845                         if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
 1846                                 mtx_lock(&pr->pr_mtx);
 1847                                 if (pr->pr_ref > 0 &&
 1848                                     (pr->pr_uref > 0 || (flags & JAIL_DYING)))
 1849                                         break;
 1850                                 mtx_unlock(&pr->pr_mtx);
 1851                         }
 1852                 }
 1853                 if (pr != NULL)
 1854                         goto found_prison;
 1855                 error = ENOENT;
 1856                 vfs_opterror(opts, "no jail after %d", jid);
 1857                 goto done_unlock_list;
 1858         } else if (error != ENOENT)
 1859                 goto done_unlock_list;
 1860 
 1861         error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 1862         if (error == 0) {
 1863                 if (jid != 0) {
 1864                         pr = prison_find_child(mypr, jid);
 1865                         if (pr != NULL) {
 1866                                 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 1867                                         mtx_unlock(&pr->pr_mtx);
 1868                                         error = ENOENT;
 1869                                         vfs_opterror(opts, "jail %d is dying",
 1870                                             jid);
 1871                                         goto done_unlock_list;
 1872                                 }
 1873                                 goto found_prison;
 1874                         }
 1875                         error = ENOENT;
 1876                         vfs_opterror(opts, "jail %d not found", jid);
 1877                         goto done_unlock_list;
 1878                 }
 1879         } else if (error != ENOENT)
 1880                 goto done_unlock_list;
 1881 
 1882         error = vfs_getopt(opts, "name", (void **)&name, &len);
 1883         if (error == 0) {
 1884                 if (len == 0 || name[len - 1] != '\0') {
 1885                         error = EINVAL;
 1886                         goto done_unlock_list;
 1887                 }
 1888                 pr = prison_find_name(mypr, name);
 1889                 if (pr != NULL) {
 1890                         if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
 1891                                 mtx_unlock(&pr->pr_mtx);
 1892                                 error = ENOENT;
 1893                                 vfs_opterror(opts, "jail \"%s\" is dying",
 1894                                     name);
 1895                                 goto done_unlock_list;
 1896                         }
 1897                         goto found_prison;
 1898                 }
 1899                 error = ENOENT;
 1900                 vfs_opterror(opts, "jail \"%s\" not found", name);
 1901                 goto done_unlock_list;
 1902         } else if (error != ENOENT)
 1903                 goto done_unlock_list;
 1904 
 1905         vfs_opterror(opts, "no jail specified");
 1906         error = ENOENT;
 1907         goto done_unlock_list;
 1908 
 1909  found_prison:
 1910         /* Get the parameters of the prison. */
 1911         pr->pr_ref++;
 1912         locked = PD_LOCKED;
 1913         td->td_retval[0] = pr->pr_id;
 1914         error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
 1915         if (error != 0 && error != ENOENT)
 1916                 goto done_deref;
 1917         i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
 1918         error = vfs_setopt(opts, "parent", &i, sizeof(i));
 1919         if (error != 0 && error != ENOENT)
 1920                 goto done_deref;
 1921         error = vfs_setopts(opts, "name", prison_name(mypr, pr));
 1922         if (error != 0 && error != ENOENT)
 1923                 goto done_deref;
 1924         error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
 1925             sizeof(pr->pr_cpuset->cs_id));
 1926         if (error != 0 && error != ENOENT)
 1927                 goto done_deref;
 1928         error = vfs_setopts(opts, "path", prison_path(mypr, pr));
 1929         if (error != 0 && error != ENOENT)
 1930                 goto done_deref;
 1931 #ifdef INET
 1932         error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
 1933             pr->pr_ip4s * sizeof(*pr->pr_ip4));
 1934         if (error != 0 && error != ENOENT)
 1935                 goto done_deref;
 1936 #endif
 1937 #ifdef INET6
 1938         error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
 1939             pr->pr_ip6s * sizeof(*pr->pr_ip6));
 1940         if (error != 0 && error != ENOENT)
 1941                 goto done_deref;
 1942 #endif
 1943         error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
 1944             sizeof(pr->pr_securelevel));
 1945         if (error != 0 && error != ENOENT)
 1946                 goto done_deref;
 1947         error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
 1948             sizeof(pr->pr_childcount));
 1949         if (error != 0 && error != ENOENT)
 1950                 goto done_deref;
 1951         error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
 1952             sizeof(pr->pr_childmax));
 1953         if (error != 0 && error != ENOENT)
 1954                 goto done_deref;
 1955         error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
 1956         if (error != 0 && error != ENOENT)
 1957                 goto done_deref;
 1958         error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
 1959         if (error != 0 && error != ENOENT)
 1960                 goto done_deref;
 1961         error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
 1962         if (error != 0 && error != ENOENT)
 1963                 goto done_deref;
 1964 #ifdef COMPAT_FREEBSD32
 1965         if (td->td_proc->p_sysent->sv_flags & SV_ILP32) {
 1966                 uint32_t hid32 = pr->pr_hostid;
 1967 
 1968                 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
 1969         } else
 1970 #endif
 1971         error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
 1972             sizeof(pr->pr_hostid));
 1973         if (error != 0 && error != ENOENT)
 1974                 goto done_deref;
 1975         error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
 1976             sizeof(pr->pr_enforce_statfs));
 1977         if (error != 0 && error != ENOENT)
 1978                 goto done_deref;
 1979         for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
 1980             fi++) {
 1981                 if (pr_flag_names[fi] == NULL)
 1982                         continue;
 1983                 i = (pr->pr_flags & (1 << fi)) ? 1 : 0;
 1984                 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i));
 1985                 if (error != 0 && error != ENOENT)
 1986                         goto done_deref;
 1987                 i = !i;
 1988                 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i));
 1989                 if (error != 0 && error != ENOENT)
 1990                         goto done_deref;
 1991         }
 1992         for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
 1993             fi++) {
 1994                 i = pr->pr_flags &
 1995                     (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 1996                 i = pr_flag_jailsys[fi].disable &&
 1997                       (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE
 1998                     : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW
 1999                     : JAIL_SYS_INHERIT;
 2000                 error =
 2001                     vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i));
 2002                 if (error != 0 && error != ENOENT)
 2003                         goto done_deref;
 2004         }
 2005         for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
 2006             fi++) {
 2007                 if (pr_allow_names[fi] == NULL)
 2008                         continue;
 2009                 i = (pr->pr_allow & (1 << fi)) ? 1 : 0;
 2010                 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i));
 2011                 if (error != 0 && error != ENOENT)
 2012                         goto done_deref;
 2013                 i = !i;
 2014                 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i));
 2015                 if (error != 0 && error != ENOENT)
 2016                         goto done_deref;
 2017         }
 2018         i = (pr->pr_uref == 0);
 2019         error = vfs_setopt(opts, "dying", &i, sizeof(i));
 2020         if (error != 0 && error != ENOENT)
 2021                 goto done_deref;
 2022         i = !i;
 2023         error = vfs_setopt(opts, "nodying", &i, sizeof(i));
 2024         if (error != 0 && error != ENOENT)
 2025                 goto done_deref;
 2026 
 2027         /* Get the module parameters. */
 2028         mtx_unlock(&pr->pr_mtx);
 2029         locked = 0;
 2030         error = osd_jail_call(pr, PR_METHOD_GET, opts);
 2031         if (error)
 2032                 goto done_deref;
 2033         prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED);
 2034 
 2035         /* By now, all parameters should have been noted. */
 2036         TAILQ_FOREACH(opt, opts, link) {
 2037                 if (!opt->seen && strcmp(opt->name, "errmsg")) {
 2038                         error = EINVAL;
 2039                         vfs_opterror(opts, "unknown parameter: %s", opt->name);
 2040                         goto done_errmsg;
 2041                 }
 2042         }
 2043 
 2044         /* Write the fetched parameters back to userspace. */
 2045         error = 0;
 2046         TAILQ_FOREACH(opt, opts, link) {
 2047                 if (opt->pos >= 0 && opt->pos != errmsg_pos) {
 2048                         pos = 2 * opt->pos + 1;
 2049                         optuio->uio_iov[pos].iov_len = opt->len;
 2050                         if (opt->value != NULL) {
 2051                                 if (optuio->uio_segflg == UIO_SYSSPACE) {
 2052                                         bcopy(opt->value,
 2053                                             optuio->uio_iov[pos].iov_base,
 2054                                             opt->len);
 2055                                 } else {
 2056                                         error = copyout(opt->value,
 2057                                             optuio->uio_iov[pos].iov_base,
 2058                                             opt->len);
 2059                                         if (error)
 2060                                                 break;
 2061                                 }
 2062                         }
 2063                 }
 2064         }
 2065         goto done_errmsg;
 2066 
 2067  done_deref:
 2068         prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED);
 2069         goto done_errmsg;
 2070 
 2071  done_unlock_list:
 2072         sx_sunlock(&allprison_lock);
 2073  done_errmsg:
 2074         if (error && errmsg_pos >= 0) {
 2075                 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 2076                 errmsg_pos = 2 * errmsg_pos + 1;
 2077                 if (errmsg_len > 0) {
 2078                         if (optuio->uio_segflg == UIO_SYSSPACE)
 2079                                 bcopy(errmsg,
 2080                                     optuio->uio_iov[errmsg_pos].iov_base,
 2081                                     errmsg_len);
 2082                         else
 2083                                 copyout(errmsg,
 2084                                     optuio->uio_iov[errmsg_pos].iov_base,
 2085                                     errmsg_len);
 2086                 }
 2087         }
 2088         vfs_freeopts(opts);
 2089         return (error);
 2090 }
 2091 
 2092 
 2093 /*
 2094  * struct jail_remove_args {
 2095  *      int jid;
 2096  * };
 2097  */
 2098 int
 2099 jail_remove(struct thread *td, struct jail_remove_args *uap)
 2100 {
 2101         struct prison *pr, *cpr, *lpr, *tpr;
 2102         int descend, error;
 2103 
 2104         error = priv_check(td, PRIV_JAIL_REMOVE);
 2105         if (error)
 2106                 return (error);
 2107 
 2108         sx_xlock(&allprison_lock);
 2109         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2110         if (pr == NULL) {
 2111                 sx_xunlock(&allprison_lock);
 2112                 return (EINVAL);
 2113         }
 2114 
 2115         /* Remove all descendants of this prison, then remove this prison. */
 2116         pr->pr_ref++;
 2117         pr->pr_flags |= PR_REMOVE;
 2118         if (!LIST_EMPTY(&pr->pr_children)) {
 2119                 mtx_unlock(&pr->pr_mtx);
 2120                 lpr = NULL;
 2121                 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 2122                         mtx_lock(&cpr->pr_mtx);
 2123                         if (cpr->pr_ref > 0) {
 2124                                 tpr = cpr;
 2125                                 cpr->pr_ref++;
 2126                                 cpr->pr_flags |= PR_REMOVE;
 2127                         } else {
 2128                                 /* Already removed - do not do it again. */
 2129                                 tpr = NULL;
 2130                         }
 2131                         mtx_unlock(&cpr->pr_mtx);
 2132                         if (lpr != NULL) {
 2133                                 mtx_lock(&lpr->pr_mtx);
 2134                                 prison_remove_one(lpr);
 2135                                 sx_xlock(&allprison_lock);
 2136                         }
 2137                         lpr = tpr;
 2138                 }
 2139                 if (lpr != NULL) {
 2140                         mtx_lock(&lpr->pr_mtx);
 2141                         prison_remove_one(lpr);
 2142                         sx_xlock(&allprison_lock);
 2143                 }
 2144                 mtx_lock(&pr->pr_mtx);
 2145         }
 2146         prison_remove_one(pr);
 2147         return (0);
 2148 }
 2149 
 2150 static void
 2151 prison_remove_one(struct prison *pr)
 2152 {
 2153         struct proc *p;
 2154         int deuref;
 2155 
 2156         /* If the prison was persistent, it is not anymore. */
 2157         deuref = 0;
 2158         if (pr->pr_flags & PR_PERSIST) {
 2159                 pr->pr_ref--;
 2160                 deuref = PD_DEUREF;
 2161                 pr->pr_flags &= ~PR_PERSIST;
 2162         }
 2163 
 2164         /*
 2165          * jail_remove added a reference.  If that's the only one, remove
 2166          * the prison now.
 2167          */
 2168         KASSERT(pr->pr_ref > 0,
 2169             ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
 2170         if (pr->pr_ref == 1) {
 2171                 prison_deref(pr,
 2172                     deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
 2173                 return;
 2174         }
 2175 
 2176         mtx_unlock(&pr->pr_mtx);
 2177         sx_xunlock(&allprison_lock);
 2178         /*
 2179          * Kill all processes unfortunate enough to be attached to this prison.
 2180          */
 2181         sx_slock(&allproc_lock);
 2182         LIST_FOREACH(p, &allproc, p_list) {
 2183                 PROC_LOCK(p);
 2184                 if (p->p_state != PRS_NEW && p->p_ucred &&
 2185                     p->p_ucred->cr_prison == pr)
 2186                         psignal(p, SIGKILL);
 2187                 PROC_UNLOCK(p);
 2188         }
 2189         sx_sunlock(&allproc_lock);
 2190         /* Remove the temporary reference added by jail_remove. */
 2191         prison_deref(pr, deuref | PD_DEREF);
 2192 }
 2193 
 2194 
 2195 /*
 2196  * struct jail_attach_args {
 2197  *      int jid;
 2198  * };
 2199  */
 2200 int
 2201 jail_attach(struct thread *td, struct jail_attach_args *uap)
 2202 {
 2203         struct prison *pr;
 2204         int error;
 2205 
 2206         error = priv_check(td, PRIV_JAIL_ATTACH);
 2207         if (error)
 2208                 return (error);
 2209 
 2210         sx_slock(&allprison_lock);
 2211         pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 2212         if (pr == NULL) {
 2213                 sx_sunlock(&allprison_lock);
 2214                 return (EINVAL);
 2215         }
 2216 
 2217         /*
 2218          * Do not allow a process to attach to a prison that is not
 2219          * considered to be "alive".
 2220          */
 2221         if (pr->pr_uref == 0) {
 2222                 mtx_unlock(&pr->pr_mtx);
 2223                 sx_sunlock(&allprison_lock);
 2224                 return (EINVAL);
 2225         }
 2226 
 2227         return (do_jail_attach(td, pr));
 2228 }
 2229 
 2230 static int
 2231 do_jail_attach(struct thread *td, struct prison *pr)
 2232 {
 2233         struct prison *ppr;
 2234         struct proc *p;
 2235         struct ucred *newcred, *oldcred;
 2236         int vfslocked, error;
 2237 
 2238         /*
 2239          * XXX: Note that there is a slight race here if two threads
 2240          * in the same privileged process attempt to attach to two
 2241          * different jails at the same time.  It is important for
 2242          * user processes not to do this, or they might end up with
 2243          * a process root from one prison, but attached to the jail
 2244          * of another.
 2245          */
 2246         pr->pr_ref++;
 2247         pr->pr_uref++;
 2248         mtx_unlock(&pr->pr_mtx);
 2249 
 2250         /* Let modules do whatever they need to prepare for attaching. */
 2251         error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
 2252         if (error) {
 2253                 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
 2254                 return (error);
 2255         }
 2256         sx_sunlock(&allprison_lock);
 2257 
 2258         /*
 2259          * Reparent the newly attached process to this jail.
 2260          */
 2261         ppr = td->td_ucred->cr_prison;
 2262         p = td->td_proc;
 2263         error = cpuset_setproc_update_set(p, pr->pr_cpuset);
 2264         if (error)
 2265                 goto e_revert_osd;
 2266 
 2267         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
 2268         vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
 2269         if ((error = change_dir(pr->pr_root, td)) != 0)
 2270                 goto e_unlock;
 2271 #ifdef MAC
 2272         if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
 2273                 goto e_unlock;
 2274 #endif
 2275         VOP_UNLOCK(pr->pr_root, 0);
 2276         if ((error = change_root(pr->pr_root, td)))
 2277                 goto e_unlock_giant;
 2278         VFS_UNLOCK_GIANT(vfslocked);
 2279 
 2280         newcred = crget();
 2281         PROC_LOCK(p);
 2282         oldcred = p->p_ucred;
 2283         setsugid(p);
 2284         crcopy(newcred, oldcred);
 2285         newcred->cr_prison = pr;
 2286         p->p_ucred = newcred;
 2287         PROC_UNLOCK(p);
 2288         crfree(oldcred);
 2289         prison_deref(ppr, PD_DEREF | PD_DEUREF);
 2290         return (0);
 2291  e_unlock:
 2292         VOP_UNLOCK(pr->pr_root, 0);
 2293  e_unlock_giant:
 2294         VFS_UNLOCK_GIANT(vfslocked);
 2295  e_revert_osd:
 2296         /* Tell modules this thread is still in its old jail after all. */
 2297         (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
 2298         prison_deref(pr, PD_DEREF | PD_DEUREF);
 2299         return (error);
 2300 }
 2301 
 2302 
 2303 /*
 2304  * Returns a locked prison instance, or NULL on failure.
 2305  */
 2306 struct prison *
 2307 prison_find(int prid)
 2308 {
 2309         struct prison *pr;
 2310 
 2311         sx_assert(&allprison_lock, SX_LOCKED);
 2312         TAILQ_FOREACH(pr, &allprison, pr_list) {
 2313                 if (pr->pr_id == prid) {
 2314                         mtx_lock(&pr->pr_mtx);
 2315                         if (pr->pr_ref > 0)
 2316                                 return (pr);
 2317                         mtx_unlock(&pr->pr_mtx);
 2318                 }
 2319         }
 2320         return (NULL);
 2321 }
 2322 
 2323 /*
 2324  * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
 2325  */
 2326 struct prison *
 2327 prison_find_child(struct prison *mypr, int prid)
 2328 {
 2329         struct prison *pr;
 2330         int descend;
 2331 
 2332         sx_assert(&allprison_lock, SX_LOCKED);
 2333         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2334                 if (pr->pr_id == prid) {
 2335                         mtx_lock(&pr->pr_mtx);
 2336                         if (pr->pr_ref > 0)
 2337                                 return (pr);
 2338                         mtx_unlock(&pr->pr_mtx);
 2339                 }
 2340         }
 2341         return (NULL);
 2342 }
 2343 
 2344 /*
 2345  * Look for the name relative to mypr.  Returns a locked prison or NULL.
 2346  */
 2347 struct prison *
 2348 prison_find_name(struct prison *mypr, const char *name)
 2349 {
 2350         struct prison *pr, *deadpr;
 2351         size_t mylen;
 2352         int descend;
 2353 
 2354         sx_assert(&allprison_lock, SX_LOCKED);
 2355         mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
 2356  again:
 2357         deadpr = NULL;
 2358         FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 2359                 if (!strcmp(pr->pr_name + mylen, name)) {
 2360                         mtx_lock(&pr->pr_mtx);
 2361                         if (pr->pr_ref > 0) {
 2362                                 if (pr->pr_uref > 0)
 2363                                         return (pr);
 2364                                 deadpr = pr;
 2365                         }
 2366                         mtx_unlock(&pr->pr_mtx);
 2367                 }
 2368         }
 2369         /* There was no valid prison - perhaps there was a dying one. */
 2370         if (deadpr != NULL) {
 2371                 mtx_lock(&deadpr->pr_mtx);
 2372                 if (deadpr->pr_ref == 0) {
 2373                         mtx_unlock(&deadpr->pr_mtx);
 2374                         goto again;
 2375                 }
 2376         }
 2377         return (deadpr);
 2378 }
 2379 
 2380 /*
 2381  * See if a prison has the specific flag set.
 2382  */
 2383 int
 2384 prison_flag(struct ucred *cred, unsigned flag)
 2385 {
 2386 
 2387         /* This is an atomic read, so no locking is necessary. */
 2388         return (cred->cr_prison->pr_flags & flag);
 2389 }
 2390 
 2391 int
 2392 prison_allow(struct ucred *cred, unsigned flag)
 2393 {
 2394 
 2395         /* This is an atomic read, so no locking is necessary. */
 2396         return (cred->cr_prison->pr_allow & flag);
 2397 }
 2398 
 2399 /*
 2400  * Remove a prison reference.  If that was the last reference, remove the
 2401  * prison itself - but not in this context in case there are locks held.
 2402  */
 2403 void
 2404 prison_free_locked(struct prison *pr)
 2405 {
 2406 
 2407         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2408         pr->pr_ref--;
 2409         if (pr->pr_ref == 0) {
 2410                 mtx_unlock(&pr->pr_mtx);
 2411                 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 2412                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 2413                 return;
 2414         }
 2415         mtx_unlock(&pr->pr_mtx);
 2416 }
 2417 
 2418 void
 2419 prison_free(struct prison *pr)
 2420 {
 2421 
 2422         mtx_lock(&pr->pr_mtx);
 2423         prison_free_locked(pr);
 2424 }
 2425 
 2426 static void
 2427 prison_complete(void *context, int pending)
 2428 {
 2429 
 2430         prison_deref((struct prison *)context, 0);
 2431 }
 2432 
 2433 /*
 2434  * Remove a prison reference (usually).  This internal version assumes no
 2435  * mutexes are held, except perhaps the prison itself.  If there are no more
 2436  * references, release and delist the prison.  On completion, the prison lock
 2437  * and the allprison lock are both unlocked.
 2438  */
 2439 static void
 2440 prison_deref(struct prison *pr, int flags)
 2441 {
 2442         struct prison *ppr, *tpr;
 2443         int vfslocked;
 2444 
 2445         if (!(flags & PD_LOCKED))
 2446                 mtx_lock(&pr->pr_mtx);
 2447         /* Decrement the user references in a separate loop. */
 2448         if (flags & PD_DEUREF) {
 2449                 for (tpr = pr;; tpr = tpr->pr_parent) {
 2450                         if (tpr != pr)
 2451                                 mtx_lock(&tpr->pr_mtx);
 2452                         if (--tpr->pr_uref > 0)
 2453                                 break;
 2454                         KASSERT(tpr != &prison0, ("prison0 pr_uref=0"));
 2455                         mtx_unlock(&tpr->pr_mtx);
 2456                 }
 2457                 /* Done if there were only user references to remove. */
 2458                 if (!(flags & PD_DEREF)) {
 2459                         mtx_unlock(&tpr->pr_mtx);
 2460                         if (flags & PD_LIST_SLOCKED)
 2461                                 sx_sunlock(&allprison_lock);
 2462                         else if (flags & PD_LIST_XLOCKED)
 2463                                 sx_xunlock(&allprison_lock);
 2464                         return;
 2465                 }
 2466                 if (tpr != pr) {
 2467                         mtx_unlock(&tpr->pr_mtx);
 2468                         mtx_lock(&pr->pr_mtx);
 2469                 }
 2470         }
 2471 
 2472         for (;;) {
 2473                 if (flags & PD_DEREF)
 2474                         pr->pr_ref--;
 2475                 /* If the prison still has references, nothing else to do. */
 2476                 if (pr->pr_ref > 0) {
 2477                         mtx_unlock(&pr->pr_mtx);
 2478                         if (flags & PD_LIST_SLOCKED)
 2479                                 sx_sunlock(&allprison_lock);
 2480                         else if (flags & PD_LIST_XLOCKED)
 2481                                 sx_xunlock(&allprison_lock);
 2482                         return;
 2483                 }
 2484 
 2485                 mtx_unlock(&pr->pr_mtx);
 2486                 if (flags & PD_LIST_SLOCKED) {
 2487                         if (!sx_try_upgrade(&allprison_lock)) {
 2488                                 sx_sunlock(&allprison_lock);
 2489                                 sx_xlock(&allprison_lock);
 2490                         }
 2491                 } else if (!(flags & PD_LIST_XLOCKED))
 2492                         sx_xlock(&allprison_lock);
 2493 
 2494                 TAILQ_REMOVE(&allprison, pr, pr_list);
 2495                 LIST_REMOVE(pr, pr_sibling);
 2496                 ppr = pr->pr_parent;
 2497                 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 2498                         tpr->pr_childcount--;
 2499                 sx_xunlock(&allprison_lock);
 2500 
 2501 #ifdef VIMAGE
 2502                 if (pr->pr_vnet != ppr->pr_vnet)
 2503                         vnet_destroy(pr->pr_vnet);
 2504 #endif
 2505                 if (pr->pr_root != NULL) {
 2506                         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
 2507                         vrele(pr->pr_root);
 2508                         VFS_UNLOCK_GIANT(vfslocked);
 2509                 }
 2510                 mtx_destroy(&pr->pr_mtx);
 2511 #ifdef INET
 2512                 free(pr->pr_ip4, M_PRISON);
 2513 #endif
 2514 #ifdef INET6
 2515                 free(pr->pr_ip6, M_PRISON);
 2516 #endif
 2517                 if (pr->pr_cpuset != NULL)
 2518                         cpuset_rel(pr->pr_cpuset);
 2519                 osd_jail_exit(pr);
 2520                 free(pr, M_PRISON);
 2521 
 2522                 /* Removing a prison frees a reference on its parent. */
 2523                 pr = ppr;
 2524                 mtx_lock(&pr->pr_mtx);
 2525                 flags = PD_DEREF;
 2526         }
 2527 }
 2528 
 2529 void
 2530 prison_hold_locked(struct prison *pr)
 2531 {
 2532 
 2533         mtx_assert(&pr->pr_mtx, MA_OWNED);
 2534         KASSERT(pr->pr_ref > 0,
 2535             ("Trying to hold dead prison (jid=%d).", pr->pr_id));
 2536         pr->pr_ref++;
 2537 }
 2538 
 2539 void
 2540 prison_hold(struct prison *pr)
 2541 {
 2542 
 2543         mtx_lock(&pr->pr_mtx);
 2544         prison_hold_locked(pr);
 2545         mtx_unlock(&pr->pr_mtx);
 2546 }
 2547 
 2548 void
 2549 prison_proc_hold(struct prison *pr)
 2550 {
 2551 
 2552         mtx_lock(&pr->pr_mtx);
 2553         KASSERT(pr->pr_uref > 0,
 2554             ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
 2555         pr->pr_uref++;
 2556         mtx_unlock(&pr->pr_mtx);
 2557 }
 2558 
 2559 void
 2560 prison_proc_free(struct prison *pr)
 2561 {
 2562 
 2563         mtx_lock(&pr->pr_mtx);
 2564         KASSERT(pr->pr_uref > 0,
 2565             ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
 2566         prison_deref(pr, PD_DEUREF | PD_LOCKED);
 2567 }
 2568 
 2569 
 2570 #ifdef INET
 2571 /*
 2572  * Restrict a prison's IP address list with its parent's, possibly replacing
 2573  * it.  Return true if the replacement buffer was used (or would have been).
 2574  */
 2575 static int
 2576 prison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
 2577 {
 2578         int ii, ij, used;
 2579         struct prison *ppr;
 2580 
 2581         ppr = pr->pr_parent;
 2582         if (!(pr->pr_flags & PR_IP4_USER)) {
 2583                 /* This has no user settings, so just copy the parent's list. */
 2584                 if (pr->pr_ip4s < ppr->pr_ip4s) {
 2585                         /*
 2586                          * There's no room for the parent's list.  Use the
 2587                          * new list buffer, which is assumed to be big enough
 2588                          * (if it was passed).  If there's no buffer, try to
 2589                          * allocate one.
 2590                          */
 2591                         used = 1;
 2592                         if (newip4 == NULL) {
 2593                                 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
 2594                                     M_PRISON, M_NOWAIT);
 2595                                 if (newip4 != NULL)
 2596                                         used = 0;
 2597                         }
 2598                         if (newip4 != NULL) {
 2599                                 bcopy(ppr->pr_ip4, newip4,
 2600                                     ppr->pr_ip4s * sizeof(*newip4));
 2601                                 free(pr->pr_ip4, M_PRISON);
 2602                                 pr->pr_ip4 = newip4;
 2603                                 pr->pr_ip4s = ppr->pr_ip4s;
 2604                         }
 2605                         return (used);
 2606                 }
 2607                 pr->pr_ip4s = ppr->pr_ip4s;
 2608                 if (pr->pr_ip4s > 0)
 2609                         bcopy(ppr->pr_ip4, pr->pr_ip4,
 2610                             pr->pr_ip4s * sizeof(*newip4));
 2611                 else if (pr->pr_ip4 != NULL) {
 2612                         free(pr->pr_ip4, M_PRISON);
 2613                         pr->pr_ip4 = NULL;
 2614                 }
 2615         } else if (pr->pr_ip4s > 0) {
 2616                 /* Remove addresses that aren't in the parent. */
 2617                 for (ij = 0; ij < ppr->pr_ip4s; ij++)
 2618                         if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 2619                                 break;
 2620                 if (ij < ppr->pr_ip4s)
 2621                         ii = 1;
 2622                 else {
 2623                         bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
 2624                             --pr->pr_ip4s * sizeof(*pr->pr_ip4));
 2625                         ii = 0;
 2626                 }
 2627                 for (ij = 1; ii < pr->pr_ip4s; ) {
 2628                         if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
 2629                                 ii++;
 2630                                 continue;
 2631                         }
 2632                         switch (ij >= ppr->pr_ip4s ? -1 :
 2633                                 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
 2634                         case -1:
 2635                                 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
 2636                                     (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
 2637                                 break;
 2638                         case 0:
 2639                                 ii++;
 2640                                 ij++;
 2641                                 break;
 2642                         case 1:
 2643                                 ij++;
 2644                                 break;
 2645                         }
 2646                 }
 2647                 if (pr->pr_ip4s == 0) {
 2648                         pr->pr_flags |= PR_IP4_DISABLE;
 2649                         free(pr->pr_ip4, M_PRISON);
 2650                         pr->pr_ip4 = NULL;
 2651                 }
 2652         }
 2653         return (0);
 2654 }
 2655 
 2656 /*
 2657  * Pass back primary IPv4 address of this jail.
 2658  *
 2659  * If not restricted return success but do not alter the address.  Caller has
 2660  * to make sure to initialize it correctly (e.g. INADDR_ANY).
 2661  *
 2662  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
 2663  * Address returned in NBO.
 2664  */
 2665 int
 2666 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
 2667 {
 2668         struct prison *pr;
 2669 
 2670         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2671         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2672 
 2673         pr = cred->cr_prison;
 2674         if (!(pr->pr_flags & PR_IP4))
 2675                 return (0);
 2676         mtx_lock(&pr->pr_mtx);
 2677         if (!(pr->pr_flags & PR_IP4)) {
 2678                 mtx_unlock(&pr->pr_mtx);
 2679                 return (0);
 2680         }
 2681         if (pr->pr_ip4 == NULL) {
 2682                 mtx_unlock(&pr->pr_mtx);
 2683                 return (EAFNOSUPPORT);
 2684         }
 2685 
 2686         ia->s_addr = pr->pr_ip4[0].s_addr;
 2687         mtx_unlock(&pr->pr_mtx);
 2688         return (0);
 2689 }
 2690 
 2691 /*
 2692  * Return 1 if we should do proper source address selection or are not jailed.
 2693  * We will return 0 if we should bypass source address selection in favour
 2694  * of the primary jail IPv4 address. Only in this case *ia will be updated and
 2695  * returned in NBO.
 2696  * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
 2697  */
 2698 int
 2699 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
 2700 {
 2701         struct prison *pr;
 2702         struct in_addr lia;
 2703         int error;
 2704 
 2705         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2706         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2707 
 2708         if (!jailed(cred))
 2709                 return (1);
 2710 
 2711         pr = cred->cr_prison;
 2712         if (pr->pr_flags & PR_IP4_SADDRSEL)
 2713                 return (1);
 2714 
 2715         lia.s_addr = INADDR_ANY;
 2716         error = prison_get_ip4(cred, &lia);
 2717         if (error)
 2718                 return (error);
 2719         if (lia.s_addr == INADDR_ANY)
 2720                 return (1);
 2721 
 2722         ia->s_addr = lia.s_addr;
 2723         return (0);
 2724 }
 2725 
 2726 /*
 2727  * Return true if pr1 and pr2 have the same IPv4 address restrictions.
 2728  */
 2729 int
 2730 prison_equal_ip4(struct prison *pr1, struct prison *pr2)
 2731 {
 2732 
 2733         if (pr1 == pr2)
 2734                 return (1);
 2735 
 2736         /*
 2737          * No need to lock since the PR_IP4_USER flag can't be altered for
 2738          * existing prisons.
 2739          */
 2740         while (pr1 != &prison0 &&
 2741 #ifdef VIMAGE
 2742                !(pr1->pr_flags & PR_VNET) &&
 2743 #endif
 2744                !(pr1->pr_flags & PR_IP4_USER))
 2745                 pr1 = pr1->pr_parent;
 2746         while (pr2 != &prison0 &&
 2747 #ifdef VIMAGE
 2748                !(pr2->pr_flags & PR_VNET) &&
 2749 #endif
 2750                !(pr2->pr_flags & PR_IP4_USER))
 2751                 pr2 = pr2->pr_parent;
 2752         return (pr1 == pr2);
 2753 }
 2754 
 2755 /*
 2756  * Make sure our (source) address is set to something meaningful to this
 2757  * jail.
 2758  *
 2759  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
 2760  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 2761  * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
 2762  */
 2763 int
 2764 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
 2765 {
 2766         struct prison *pr;
 2767         struct in_addr ia0;
 2768         int error;
 2769 
 2770         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2771         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2772 
 2773         pr = cred->cr_prison;
 2774         if (!(pr->pr_flags & PR_IP4))
 2775                 return (0);
 2776         mtx_lock(&pr->pr_mtx);
 2777         if (!(pr->pr_flags & PR_IP4)) {
 2778                 mtx_unlock(&pr->pr_mtx);
 2779                 return (0);
 2780         }
 2781         if (pr->pr_ip4 == NULL) {
 2782                 mtx_unlock(&pr->pr_mtx);
 2783                 return (EAFNOSUPPORT);
 2784         }
 2785 
 2786         ia0.s_addr = ntohl(ia->s_addr);
 2787         if (ia0.s_addr == INADDR_LOOPBACK) {
 2788                 ia->s_addr = pr->pr_ip4[0].s_addr;
 2789                 mtx_unlock(&pr->pr_mtx);
 2790                 return (0);
 2791         }
 2792 
 2793         if (ia0.s_addr == INADDR_ANY) {
 2794                 /*
 2795                  * In case there is only 1 IPv4 address, bind directly.
 2796                  */
 2797                 if (pr->pr_ip4s == 1)
 2798                         ia->s_addr = pr->pr_ip4[0].s_addr;
 2799                 mtx_unlock(&pr->pr_mtx);
 2800                 return (0);
 2801         }
 2802 
 2803         error = _prison_check_ip4(pr, ia);
 2804         mtx_unlock(&pr->pr_mtx);
 2805         return (error);
 2806 }
 2807 
 2808 /*
 2809  * Rewrite destination address in case we will connect to loopback address.
 2810  *
 2811  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
 2812  * Address passed in in NBO and returned in NBO.
 2813  */
 2814 int
 2815 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
 2816 {
 2817         struct prison *pr;
 2818 
 2819         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2820         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2821 
 2822         pr = cred->cr_prison;
 2823         if (!(pr->pr_flags & PR_IP4))
 2824                 return (0);
 2825         mtx_lock(&pr->pr_mtx);
 2826         if (!(pr->pr_flags & PR_IP4)) {
 2827                 mtx_unlock(&pr->pr_mtx);
 2828                 return (0);
 2829         }
 2830         if (pr->pr_ip4 == NULL) {
 2831                 mtx_unlock(&pr->pr_mtx);
 2832                 return (EAFNOSUPPORT);
 2833         }
 2834 
 2835         if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
 2836                 ia->s_addr = pr->pr_ip4[0].s_addr;
 2837                 mtx_unlock(&pr->pr_mtx);
 2838                 return (0);
 2839         }
 2840 
 2841         /*
 2842          * Return success because nothing had to be changed.
 2843          */
 2844         mtx_unlock(&pr->pr_mtx);
 2845         return (0);
 2846 }
 2847 
 2848 /*
 2849  * Check if given address belongs to the jail referenced by cred/prison.
 2850  *
 2851  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
 2852  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 2853  * doesn't allow IPv4.  Address passed in in NBO.
 2854  */
 2855 static int
 2856 _prison_check_ip4(struct prison *pr, struct in_addr *ia)
 2857 {
 2858         int i, a, z, d;
 2859 
 2860         /*
 2861          * Check the primary IP.
 2862          */
 2863         if (pr->pr_ip4[0].s_addr == ia->s_addr)
 2864                 return (0);
 2865 
 2866         /*
 2867          * All the other IPs are sorted so we can do a binary search.
 2868          */
 2869         a = 0;
 2870         z = pr->pr_ip4s - 2;
 2871         while (a <= z) {
 2872                 i = (a + z) / 2;
 2873                 d = qcmp_v4(&pr->pr_ip4[i+1], ia);
 2874                 if (d > 0)
 2875                         z = i - 1;
 2876                 else if (d < 0)
 2877                         a = i + 1;
 2878                 else
 2879                         return (0);
 2880         }
 2881 
 2882         return (EADDRNOTAVAIL);
 2883 }
 2884 
 2885 int
 2886 prison_check_ip4(struct ucred *cred, struct in_addr *ia)
 2887 {
 2888         struct prison *pr;
 2889         int error;
 2890 
 2891         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 2892         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
 2893 
 2894         pr = cred->cr_prison;
 2895         if (!(pr->pr_flags & PR_IP4))
 2896                 return (0);
 2897         mtx_lock(&pr->pr_mtx);
 2898         if (!(pr->pr_flags & PR_IP4)) {
 2899                 mtx_unlock(&pr->pr_mtx);
 2900                 return (0);
 2901         }
 2902         if (pr->pr_ip4 == NULL) {
 2903                 mtx_unlock(&pr->pr_mtx);
 2904                 return (EAFNOSUPPORT);
 2905         }
 2906 
 2907         error = _prison_check_ip4(pr, ia);
 2908         mtx_unlock(&pr->pr_mtx);
 2909         return (error);
 2910 }
 2911 #endif
 2912 
 2913 #ifdef INET6
 2914 static int
 2915 prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6)
 2916 {
 2917         int ii, ij, used;
 2918         struct prison *ppr;
 2919 
 2920         ppr = pr->pr_parent;
 2921         if (!(pr->pr_flags & PR_IP6_USER)) {
 2922                 /* This has no user settings, so just copy the parent's list. */
 2923                 if (pr->pr_ip6s < ppr->pr_ip6s) {
 2924                         /*
 2925                          * There's no room for the parent's list.  Use the
 2926                          * new list buffer, which is assumed to be big enough
 2927                          * (if it was passed).  If there's no buffer, try to
 2928                          * allocate one.
 2929                          */
 2930                         used = 1;
 2931                         if (newip6 == NULL) {
 2932                                 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6),
 2933                                     M_PRISON, M_NOWAIT);
 2934                                 if (newip6 != NULL)
 2935                                         used = 0;
 2936                         }
 2937                         if (newip6 != NULL) {
 2938                                 bcopy(ppr->pr_ip6, newip6,
 2939                                     ppr->pr_ip6s * sizeof(*newip6));
 2940                                 free(pr->pr_ip6, M_PRISON);
 2941                                 pr->pr_ip6 = newip6;
 2942                                 pr->pr_ip6s = ppr->pr_ip6s;
 2943                         }
 2944                         return (used);
 2945                 }
 2946                 pr->pr_ip6s = ppr->pr_ip6s;
 2947                 if (pr->pr_ip6s > 0)
 2948                         bcopy(ppr->pr_ip6, pr->pr_ip6,
 2949                             pr->pr_ip6s * sizeof(*newip6));
 2950                 else if (pr->pr_ip6 != NULL) {
 2951                         free(pr->pr_ip6, M_PRISON);
 2952                         pr->pr_ip6 = NULL;
 2953                 }
 2954         } else if (pr->pr_ip6s > 0) {
 2955                 /* Remove addresses that aren't in the parent. */
 2956                 for (ij = 0; ij < ppr->pr_ip6s; ij++)
 2957                         if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0],
 2958                             &ppr->pr_ip6[ij]))
 2959                                 break;
 2960                 if (ij < ppr->pr_ip6s)
 2961                         ii = 1;
 2962                 else {
 2963                         bcopy(pr->pr_ip6 + 1, pr->pr_ip6,
 2964                             --pr->pr_ip6s * sizeof(*pr->pr_ip6));
 2965                         ii = 0;
 2966                 }
 2967                 for (ij = 1; ii < pr->pr_ip6s; ) {
 2968                         if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii],
 2969                             &ppr->pr_ip6[0])) {
 2970                                 ii++;
 2971                                 continue;
 2972                         }
 2973                         switch (ij >= ppr->pr_ip4s ? -1 :
 2974                                 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) {
 2975                         case -1:
 2976                                 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii,
 2977                                     (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6));
 2978                                 break;
 2979                         case 0:
 2980                                 ii++;
 2981                                 ij++;
 2982                                 break;
 2983                         case 1:
 2984                                 ij++;
 2985                                 break;
 2986                         }
 2987                 }
 2988                 if (pr->pr_ip6s == 0) {
 2989                         pr->pr_flags |= PR_IP6_DISABLE;
 2990                         free(pr->pr_ip6, M_PRISON);
 2991                         pr->pr_ip6 = NULL;
 2992                 }
 2993         }
 2994         return 0;
 2995 }
 2996 
 2997 /*
 2998  * Pass back primary IPv6 address for this jail.
 2999  *
 3000  * If not restricted return success but do not alter the address.  Caller has
 3001  * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT).
 3002  *
 3003  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
 3004  */
 3005 int
 3006 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
 3007 {
 3008         struct prison *pr;
 3009 
 3010         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3011         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3012 
 3013         pr = cred->cr_prison;
 3014         if (!(pr->pr_flags & PR_IP6))
 3015                 return (0);
 3016         mtx_lock(&pr->pr_mtx);
 3017         if (!(pr->pr_flags & PR_IP6)) {
 3018                 mtx_unlock(&pr->pr_mtx);
 3019                 return (0);
 3020         }
 3021         if (pr->pr_ip6 == NULL) {
 3022                 mtx_unlock(&pr->pr_mtx);
 3023                 return (EAFNOSUPPORT);
 3024         }
 3025 
 3026         bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3027         mtx_unlock(&pr->pr_mtx);
 3028         return (0);
 3029 }
 3030 
 3031 /*
 3032  * Return 1 if we should do proper source address selection or are not jailed.
 3033  * We will return 0 if we should bypass source address selection in favour
 3034  * of the primary jail IPv6 address. Only in this case *ia will be updated and
 3035  * returned in NBO.
 3036  * Return EAFNOSUPPORT, in case this jail does not allow IPv6.
 3037  */
 3038 int
 3039 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6)
 3040 {
 3041         struct prison *pr;
 3042         struct in6_addr lia6;
 3043         int error;
 3044 
 3045         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3046         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3047 
 3048         if (!jailed(cred))
 3049                 return (1);
 3050 
 3051         pr = cred->cr_prison;
 3052         if (pr->pr_flags & PR_IP6_SADDRSEL)
 3053                 return (1);
 3054 
 3055         lia6 = in6addr_any;
 3056         error = prison_get_ip6(cred, &lia6);
 3057         if (error)
 3058                 return (error);
 3059         if (IN6_IS_ADDR_UNSPECIFIED(&lia6))
 3060                 return (1);
 3061 
 3062         bcopy(&lia6, ia6, sizeof(struct in6_addr));
 3063         return (0);
 3064 }
 3065 
 3066 /*
 3067  * Return true if pr1 and pr2 have the same IPv6 address restrictions.
 3068  */
 3069 int
 3070 prison_equal_ip6(struct prison *pr1, struct prison *pr2)
 3071 {
 3072 
 3073         if (pr1 == pr2)
 3074                 return (1);
 3075 
 3076         while (pr1 != &prison0 &&
 3077 #ifdef VIMAGE
 3078                !(pr1->pr_flags & PR_VNET) &&
 3079 #endif
 3080                !(pr1->pr_flags & PR_IP6_USER))
 3081                 pr1 = pr1->pr_parent;
 3082         while (pr2 != &prison0 &&
 3083 #ifdef VIMAGE
 3084                !(pr2->pr_flags & PR_VNET) &&
 3085 #endif
 3086                !(pr2->pr_flags & PR_IP6_USER))
 3087                 pr2 = pr2->pr_parent;
 3088         return (pr1 == pr2);
 3089 }
 3090 
 3091 /*
 3092  * Make sure our (source) address is set to something meaningful to this jail.
 3093  *
 3094  * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
 3095  * when needed while binding.
 3096  *
 3097  * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
 3098  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 3099  * doesn't allow IPv6.
 3100  */
 3101 int
 3102 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
 3103 {
 3104         struct prison *pr;
 3105         int error;
 3106 
 3107         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3108         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3109 
 3110         pr = cred->cr_prison;
 3111         if (!(pr->pr_flags & PR_IP6))
 3112                 return (0);
 3113         mtx_lock(&pr->pr_mtx);
 3114         if (!(pr->pr_flags & PR_IP6)) {
 3115                 mtx_unlock(&pr->pr_mtx);
 3116                 return (0);
 3117         }
 3118         if (pr->pr_ip6 == NULL) {
 3119                 mtx_unlock(&pr->pr_mtx);
 3120                 return (EAFNOSUPPORT);
 3121         }
 3122 
 3123         if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 3124                 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3125                 mtx_unlock(&pr->pr_mtx);
 3126                 return (0);
 3127         }
 3128 
 3129         if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
 3130                 /*
 3131                  * In case there is only 1 IPv6 address, and v6only is true,
 3132                  * then bind directly.
 3133                  */
 3134                 if (v6only != 0 && pr->pr_ip6s == 1)
 3135                         bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3136                 mtx_unlock(&pr->pr_mtx);
 3137                 return (0);
 3138         }
 3139 
 3140         error = _prison_check_ip6(pr, ia6);
 3141         mtx_unlock(&pr->pr_mtx);
 3142         return (error);
 3143 }
 3144 
 3145 /*
 3146  * Rewrite destination address in case we will connect to loopback address.
 3147  *
 3148  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
 3149  */
 3150 int
 3151 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
 3152 {
 3153         struct prison *pr;
 3154 
 3155         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3156         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3157 
 3158         pr = cred->cr_prison;
 3159         if (!(pr->pr_flags & PR_IP6))
 3160                 return (0);
 3161         mtx_lock(&pr->pr_mtx);
 3162         if (!(pr->pr_flags & PR_IP6)) {
 3163                 mtx_unlock(&pr->pr_mtx);
 3164                 return (0);
 3165         }
 3166         if (pr->pr_ip6 == NULL) {
 3167                 mtx_unlock(&pr->pr_mtx);
 3168                 return (EAFNOSUPPORT);
 3169         }
 3170 
 3171         if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 3172                 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
 3173                 mtx_unlock(&pr->pr_mtx);
 3174                 return (0);
 3175         }
 3176 
 3177         /*
 3178          * Return success because nothing had to be changed.
 3179          */
 3180         mtx_unlock(&pr->pr_mtx);
 3181         return (0);
 3182 }
 3183 
 3184 /*
 3185  * Check if given address belongs to the jail referenced by cred/prison.
 3186  *
 3187  * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
 3188  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
 3189  * doesn't allow IPv6.
 3190  */
 3191 static int
 3192 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
 3193 {
 3194         int i, a, z, d;
 3195 
 3196         /*
 3197          * Check the primary IP.
 3198          */
 3199         if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
 3200                 return (0);
 3201 
 3202         /*
 3203          * All the other IPs are sorted so we can do a binary search.
 3204          */
 3205         a = 0;
 3206         z = pr->pr_ip6s - 2;
 3207         while (a <= z) {
 3208                 i = (a + z) / 2;
 3209                 d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
 3210                 if (d > 0)
 3211                         z = i - 1;
 3212                 else if (d < 0)
 3213                         a = i + 1;
 3214                 else
 3215                         return (0);
 3216         }
 3217 
 3218         return (EADDRNOTAVAIL);
 3219 }
 3220 
 3221 int
 3222 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
 3223 {
 3224         struct prison *pr;
 3225         int error;
 3226 
 3227         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3228         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 3229 
 3230         pr = cred->cr_prison;
 3231         if (!(pr->pr_flags & PR_IP6))
 3232                 return (0);
 3233         mtx_lock(&pr->pr_mtx);
 3234         if (!(pr->pr_flags & PR_IP6)) {
 3235                 mtx_unlock(&pr->pr_mtx);
 3236                 return (0);
 3237         }
 3238         if (pr->pr_ip6 == NULL) {
 3239                 mtx_unlock(&pr->pr_mtx);
 3240                 return (EAFNOSUPPORT);
 3241         }
 3242 
 3243         error = _prison_check_ip6(pr, ia6);
 3244         mtx_unlock(&pr->pr_mtx);
 3245         return (error);
 3246 }
 3247 #endif
 3248 
 3249 /*
 3250  * Check if a jail supports the given address family.
 3251  *
 3252  * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
 3253  * if not.
 3254  */
 3255 int
 3256 prison_check_af(struct ucred *cred, int af)
 3257 {
 3258         struct prison *pr;
 3259         int error;
 3260 
 3261         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3262 
 3263         pr = cred->cr_prison;
 3264 #ifdef VIMAGE
 3265         /* Prisons with their own network stack are not limited. */
 3266         if (prison_owns_vnet(cred))
 3267                 return (0);
 3268 #endif
 3269 
 3270         error = 0;
 3271         switch (af)
 3272         {
 3273 #ifdef INET
 3274         case AF_INET:
 3275                 if (pr->pr_flags & PR_IP4)
 3276                 {
 3277                         mtx_lock(&pr->pr_mtx);
 3278                         if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
 3279                                 error = EAFNOSUPPORT;
 3280                         mtx_unlock(&pr->pr_mtx);
 3281                 }
 3282                 break;
 3283 #endif
 3284 #ifdef INET6
 3285         case AF_INET6:
 3286                 if (pr->pr_flags & PR_IP6)
 3287                 {
 3288                         mtx_lock(&pr->pr_mtx);
 3289                         if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
 3290                                 error = EAFNOSUPPORT;
 3291                         mtx_unlock(&pr->pr_mtx);
 3292                 }
 3293                 break;
 3294 #endif
 3295         case AF_LOCAL:
 3296         case AF_ROUTE:
 3297                 break;
 3298         default:
 3299                 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
 3300                         error = EAFNOSUPPORT;
 3301         }
 3302         return (error);
 3303 }
 3304 
 3305 /*
 3306  * Check if given address belongs to the jail referenced by cred (wrapper to
 3307  * prison_check_ip[46]).
 3308  *
 3309  * Returns 0 if jail doesn't restrict the address family or if address belongs
 3310  * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
 3311  * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
 3312  */
 3313 int
 3314 prison_if(struct ucred *cred, struct sockaddr *sa)
 3315 {
 3316 #ifdef INET
 3317         struct sockaddr_in *sai;
 3318 #endif
 3319 #ifdef INET6
 3320         struct sockaddr_in6 *sai6;
 3321 #endif
 3322         int error;
 3323 
 3324         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 3325         KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 3326 
 3327 #ifdef VIMAGE
 3328         if (prison_owns_vnet(cred))
 3329                 return (0);
 3330 #endif
 3331 
 3332         error = 0;
 3333         switch (sa->sa_family)
 3334         {
 3335 #ifdef INET
 3336         case AF_INET:
 3337                 sai = (struct sockaddr_in *)sa;
 3338                 error = prison_check_ip4(cred, &sai->sin_addr);
 3339                 break;
 3340 #endif
 3341 #ifdef INET6
 3342         case AF_INET6:
 3343                 sai6 = (struct sockaddr_in6 *)sa;
 3344                 error = prison_check_ip6(cred, &sai6->sin6_addr);
 3345                 break;
 3346 #endif
 3347         default:
 3348                 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
 3349                         error = EAFNOSUPPORT;
 3350         }
 3351         return (error);
 3352 }
 3353 
 3354 /*
 3355  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
 3356  */
 3357 int
 3358 prison_check(struct ucred *cred1, struct ucred *cred2)
 3359 {
 3360 
 3361         return ((cred1->cr_prison == cred2->cr_prison ||
 3362             prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
 3363 }
 3364 
 3365 /*
 3366  * Return 1 if p2 is a child of p1, otherwise 0.
 3367  */
 3368 int
 3369 prison_ischild(struct prison *pr1, struct prison *pr2)
 3370 {
 3371 
 3372         for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
 3373                 if (pr1 == pr2)
 3374                         return (1);
 3375         return (0);
 3376 }
 3377 
 3378 /*
 3379  * Return 1 if the passed credential is in a jail, otherwise 0.
 3380  */
 3381 int
 3382 jailed(struct ucred *cred)
 3383 {
 3384 
 3385         return (cred->cr_prison != &prison0);
 3386 }
 3387 
 3388 /*
 3389  * Return 1 if the passed credential is in a jail and that jail does not
 3390  * have its own virtual network stack, otherwise 0.
 3391  */
 3392 int
 3393 jailed_without_vnet(struct ucred *cred)
 3394 {
 3395 
 3396         if (!jailed(cred))
 3397                 return (0);
 3398 #ifdef VIMAGE
 3399         if (prison_owns_vnet(cred))
 3400                 return (0);
 3401 #endif
 3402 
 3403         return (1);
 3404 }
 3405 
 3406 /*
 3407  * Return the correct hostname (domainname, et al) for the passed credential.
 3408  */
 3409 void
 3410 getcredhostname(struct ucred *cred, char *buf, size_t size)
 3411 {
 3412         struct prison *pr;
 3413 
 3414         /*
 3415          * A NULL credential can be used to shortcut to the physical
 3416          * system's hostname.
 3417          */
 3418         pr = (cred != NULL) ? cred->cr_prison : &prison0;
 3419         mtx_lock(&pr->pr_mtx);
 3420         strlcpy(buf, pr->pr_hostname, size);
 3421         mtx_unlock(&pr->pr_mtx);
 3422 }
 3423 
 3424 void
 3425 getcreddomainname(struct ucred *cred, char *buf, size_t size)
 3426 {
 3427 
 3428         mtx_lock(&cred->cr_prison->pr_mtx);
 3429         strlcpy(buf, cred->cr_prison->pr_domainname, size);
 3430         mtx_unlock(&cred->cr_prison->pr_mtx);
 3431 }
 3432 
 3433 void
 3434 getcredhostuuid(struct ucred *cred, char *buf, size_t size)
 3435 {
 3436 
 3437         mtx_lock(&cred->cr_prison->pr_mtx);
 3438         strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
 3439         mtx_unlock(&cred->cr_prison->pr_mtx);
 3440 }
 3441 
 3442 void
 3443 getcredhostid(struct ucred *cred, unsigned long *hostid)
 3444 {
 3445 
 3446         mtx_lock(&cred->cr_prison->pr_mtx);
 3447         *hostid = cred->cr_prison->pr_hostid;
 3448         mtx_unlock(&cred->cr_prison->pr_mtx);
 3449 }
 3450 
 3451 #ifdef VIMAGE
 3452 /*
 3453  * Determine whether the prison represented by cred owns
 3454  * its vnet rather than having it inherited.
 3455  *
 3456  * Returns 1 in case the prison owns the vnet, 0 otherwise.
 3457  */
 3458 int
 3459 prison_owns_vnet(struct ucred *cred)
 3460 {
 3461 
 3462         /*
 3463          * vnets cannot be added/removed after jail creation,
 3464          * so no need to lock here.
 3465          */
 3466         return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
 3467 }
 3468 #endif
 3469 
 3470 /*
 3471  * Determine whether the subject represented by cred can "see"
 3472  * status of a mount point.
 3473  * Returns: 0 for permitted, ENOENT otherwise.
 3474  * XXX: This function should be called cr_canseemount() and should be
 3475  *      placed in kern_prot.c.
 3476  */
 3477 int
 3478 prison_canseemount(struct ucred *cred, struct mount *mp)
 3479 {
 3480         struct prison *pr;
 3481         struct statfs *sp;
 3482         size_t len;
 3483 
 3484         pr = cred->cr_prison;
 3485         if (pr->pr_enforce_statfs == 0)
 3486                 return (0);
 3487         if (pr->pr_root->v_mount == mp)
 3488                 return (0);
 3489         if (pr->pr_enforce_statfs == 2)
 3490                 return (ENOENT);
 3491         /*
 3492          * If jail's chroot directory is set to "/" we should be able to see
 3493          * all mount-points from inside a jail.
 3494          * This is ugly check, but this is the only situation when jail's
 3495          * directory ends with '/'.
 3496          */
 3497         if (strcmp(pr->pr_path, "/") == 0)
 3498                 return (0);
 3499         len = strlen(pr->pr_path);
 3500         sp = &mp->mnt_stat;
 3501         if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
 3502                 return (ENOENT);
 3503         /*
 3504          * Be sure that we don't have situation where jail's root directory
 3505          * is "/some/path" and mount point is "/some/pathpath".
 3506          */
 3507         if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
 3508                 return (ENOENT);
 3509         return (0);
 3510 }
 3511 
 3512 void
 3513 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
 3514 {
 3515         char jpath[MAXPATHLEN];
 3516         struct prison *pr;
 3517         size_t len;
 3518 
 3519         pr = cred->cr_prison;
 3520         if (pr->pr_enforce_statfs == 0)
 3521                 return;
 3522         if (prison_canseemount(cred, mp) != 0) {
 3523                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3524                 strlcpy(sp->f_mntonname, "[restricted]",
 3525                     sizeof(sp->f_mntonname));
 3526                 return;
 3527         }
 3528         if (pr->pr_root->v_mount == mp) {
 3529                 /*
 3530                  * Clear current buffer data, so we are sure nothing from
 3531                  * the valid path left there.
 3532                  */
 3533                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3534                 *sp->f_mntonname = '/';
 3535                 return;
 3536         }
 3537         /*
 3538          * If jail's chroot directory is set to "/" we should be able to see
 3539          * all mount-points from inside a jail.
 3540          */
 3541         if (strcmp(pr->pr_path, "/") == 0)
 3542                 return;
 3543         len = strlen(pr->pr_path);
 3544         strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
 3545         /*
 3546          * Clear current buffer data, so we are sure nothing from
 3547          * the valid path left there.
 3548          */
 3549         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 3550         if (*jpath == '\0') {
 3551                 /* Should never happen. */
 3552                 *sp->f_mntonname = '/';
 3553         } else {
 3554                 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
 3555         }
 3556 }
 3557 
 3558 /*
 3559  * Check with permission for a specific privilege is granted within jail.  We
 3560  * have a specific list of accepted privileges; the rest are denied.
 3561  */
 3562 int
 3563 prison_priv_check(struct ucred *cred, int priv)
 3564 {
 3565 
 3566         if (!jailed(cred))
 3567                 return (0);
 3568 
 3569 #ifdef VIMAGE
 3570         /*
 3571          * Privileges specific to prisons with a virtual network stack.
 3572          * There might be a duplicate entry here in case the privilege
 3573          * is only granted conditionally in the legacy jail case.
 3574          */
 3575         switch (priv) {
 3576 #ifdef notyet
 3577                 /*
 3578                  * NFS-specific privileges.
 3579                  */
 3580         case PRIV_NFS_DAEMON:
 3581         case PRIV_NFS_LOCKD:
 3582 #endif
 3583                 /*
 3584                  * Network stack privileges.
 3585                  */
 3586         case PRIV_NET_BRIDGE:
 3587         case PRIV_NET_GRE:
 3588         case PRIV_NET_BPF:
 3589         case PRIV_NET_RAW:              /* Dup, cond. in legacy jail case. */
 3590         case PRIV_NET_ROUTE:
 3591         case PRIV_NET_TAP:
 3592         case PRIV_NET_SETIFMTU:
 3593         case PRIV_NET_SETIFFLAGS:
 3594         case PRIV_NET_SETIFCAP:
 3595         case PRIV_NET_SETIFDESCR:
 3596         case PRIV_NET_SETIFNAME :
 3597         case PRIV_NET_SETIFMETRIC:
 3598         case PRIV_NET_SETIFPHYS:
 3599         case PRIV_NET_SETIFMAC:
 3600         case PRIV_NET_ADDMULTI:
 3601         case PRIV_NET_DELMULTI:
 3602         case PRIV_NET_HWIOCTL:
 3603         case PRIV_NET_SETLLADDR:
 3604         case PRIV_NET_ADDIFGROUP:
 3605         case PRIV_NET_DELIFGROUP:
 3606         case PRIV_NET_IFCREATE:
 3607         case PRIV_NET_IFDESTROY:
 3608         case PRIV_NET_ADDIFADDR:
 3609         case PRIV_NET_DELIFADDR:
 3610         case PRIV_NET_LAGG:
 3611         case PRIV_NET_GIF:
 3612         case PRIV_NET_SETIFVNET:
 3613 
 3614                 /*
 3615                  * 802.11-related privileges.
 3616                  */
 3617         case PRIV_NET80211_GETKEY:
 3618 #ifdef notyet
 3619         case PRIV_NET80211_MANAGE:              /* XXX-BZ discuss with sam@ */
 3620 #endif
 3621 
 3622 #ifdef notyet
 3623                 /*
 3624                  * AppleTalk privileges.
 3625                  */
 3626         case PRIV_NETATALK_RESERVEDPORT:
 3627 
 3628                 /*
 3629                  * ATM privileges.
 3630                  */
 3631         case PRIV_NETATM_CFG:
 3632         case PRIV_NETATM_ADD:
 3633         case PRIV_NETATM_DEL:
 3634         case PRIV_NETATM_SET:
 3635 
 3636                 /*
 3637                  * Bluetooth privileges.
 3638                  */
 3639         case PRIV_NETBLUETOOTH_RAW:
 3640 #endif
 3641 
 3642                 /*
 3643                  * Netgraph and netgraph module privileges.
 3644                  */
 3645         case PRIV_NETGRAPH_CONTROL:
 3646 #ifdef notyet
 3647         case PRIV_NETGRAPH_TTY:
 3648 #endif
 3649 
 3650                 /*
 3651                  * IPv4 and IPv6 privileges.
 3652                  */
 3653         case PRIV_NETINET_IPFW:
 3654         case PRIV_NETINET_DIVERT:
 3655         case PRIV_NETINET_PF:
 3656         case PRIV_NETINET_DUMMYNET:
 3657         case PRIV_NETINET_CARP:
 3658         case PRIV_NETINET_MROUTE:
 3659         case PRIV_NETINET_RAW:
 3660         case PRIV_NETINET_ADDRCTRL6:
 3661         case PRIV_NETINET_ND6:
 3662         case PRIV_NETINET_SCOPE6:
 3663         case PRIV_NETINET_ALIFETIME6:
 3664         case PRIV_NETINET_IPSEC:
 3665         case PRIV_NETINET_BINDANY:
 3666 
 3667 #ifdef notyet
 3668                 /*
 3669                  * IPX/SPX privileges.
 3670                  */
 3671         case PRIV_NETIPX_RESERVEDPORT:
 3672         case PRIV_NETIPX_RAW:
 3673 
 3674                 /*
 3675                  * NCP privileges.
 3676                  */
 3677         case PRIV_NETNCP:
 3678 
 3679                 /*
 3680                  * SMB privileges.
 3681                  */
 3682         case PRIV_NETSMB:
 3683 #endif
 3684 
 3685         /*
 3686          * No default: or deny here.
 3687          * In case of no permit fall through to next switch().
 3688          */
 3689                 if (cred->cr_prison->pr_flags & PR_VNET)
 3690                         return (0);
 3691         }
 3692 #endif /* VIMAGE */
 3693 
 3694         switch (priv) {
 3695 
 3696                 /*
 3697                  * Allow ktrace privileges for root in jail.
 3698                  */
 3699         case PRIV_KTRACE:
 3700 
 3701 #if 0
 3702                 /*
 3703                  * Allow jailed processes to configure audit identity and
 3704                  * submit audit records (login, etc).  In the future we may
 3705                  * want to further refine the relationship between audit and
 3706                  * jail.
 3707                  */
 3708         case PRIV_AUDIT_GETAUDIT:
 3709         case PRIV_AUDIT_SETAUDIT:
 3710         case PRIV_AUDIT_SUBMIT:
 3711 #endif
 3712 
 3713                 /*
 3714                  * Allow jailed processes to manipulate process UNIX
 3715                  * credentials in any way they see fit.
 3716                  */
 3717         case PRIV_CRED_SETUID:
 3718         case PRIV_CRED_SETEUID:
 3719         case PRIV_CRED_SETGID:
 3720         case PRIV_CRED_SETEGID:
 3721         case PRIV_CRED_SETGROUPS:
 3722         case PRIV_CRED_SETREUID:
 3723         case PRIV_CRED_SETREGID:
 3724         case PRIV_CRED_SETRESUID:
 3725         case PRIV_CRED_SETRESGID:
 3726 
 3727                 /*
 3728                  * Jail implements visibility constraints already, so allow
 3729                  * jailed root to override uid/gid-based constraints.
 3730                  */
 3731         case PRIV_SEEOTHERGIDS:
 3732         case PRIV_SEEOTHERUIDS:
 3733 
 3734                 /*
 3735                  * Jail implements inter-process debugging limits already, so
 3736                  * allow jailed root various debugging privileges.
 3737                  */
 3738         case PRIV_DEBUG_DIFFCRED:
 3739         case PRIV_DEBUG_SUGID:
 3740         case PRIV_DEBUG_UNPRIV:
 3741 
 3742                 /*
 3743                  * Allow jail to set various resource limits and login
 3744                  * properties, and for now, exceed process resource limits.
 3745                  */
 3746         case PRIV_PROC_LIMIT:
 3747         case PRIV_PROC_SETLOGIN:
 3748         case PRIV_PROC_SETRLIMIT:
 3749 
 3750                 /*
 3751                  * System V and POSIX IPC privileges are granted in jail.
 3752                  */
 3753         case PRIV_IPC_READ:
 3754         case PRIV_IPC_WRITE:
 3755         case PRIV_IPC_ADMIN:
 3756         case PRIV_IPC_MSGSIZE:
 3757         case PRIV_MQ_ADMIN:
 3758 
 3759                 /*
 3760                  * Jail operations within a jail work on child jails.
 3761                  */
 3762         case PRIV_JAIL_ATTACH:
 3763         case PRIV_JAIL_SET:
 3764         case PRIV_JAIL_REMOVE:
 3765 
 3766                 /*
 3767                  * Jail implements its own inter-process limits, so allow
 3768                  * root processes in jail to change scheduling on other
 3769                  * processes in the same jail.  Likewise for signalling.
 3770                  */
 3771         case PRIV_SCHED_DIFFCRED:
 3772         case PRIV_SCHED_CPUSET:
 3773         case PRIV_SIGNAL_DIFFCRED:
 3774         case PRIV_SIGNAL_SUGID:
 3775 
 3776                 /*
 3777                  * Allow jailed processes to write to sysctls marked as jail
 3778                  * writable.
 3779                  */
 3780         case PRIV_SYSCTL_WRITEJAIL:
 3781 
 3782                 /*
 3783                  * Allow root in jail to manage a variety of quota
 3784                  * properties.  These should likely be conditional on a
 3785                  * configuration option.
 3786                  */
 3787         case PRIV_VFS_GETQUOTA:
 3788         case PRIV_VFS_SETQUOTA:
 3789 
 3790                 /*
 3791                  * Since Jail relies on chroot() to implement file system
 3792                  * protections, grant many VFS privileges to root in jail.
 3793                  * Be careful to exclude mount-related and NFS-related
 3794                  * privileges.
 3795                  */
 3796         case PRIV_VFS_READ:
 3797         case PRIV_VFS_WRITE:
 3798         case PRIV_VFS_ADMIN:
 3799         case PRIV_VFS_EXEC:
 3800         case PRIV_VFS_LOOKUP:
 3801         case PRIV_VFS_BLOCKRESERVE:     /* XXXRW: Slightly surprising. */
 3802         case PRIV_VFS_CHFLAGS_DEV:
 3803         case PRIV_VFS_CHOWN:
 3804         case PRIV_VFS_CHROOT:
 3805         case PRIV_VFS_RETAINSUGID:
 3806         case PRIV_VFS_FCHROOT:
 3807         case PRIV_VFS_LINK:
 3808         case PRIV_VFS_SETGID:
 3809         case PRIV_VFS_STAT:
 3810         case PRIV_VFS_STICKYFILE:
 3811                 return (0);
 3812 
 3813                 /*
 3814                  * Depending on the global setting, allow privilege of
 3815                  * setting system flags.
 3816                  */
 3817         case PRIV_VFS_SYSFLAGS:
 3818                 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
 3819                         return (0);
 3820                 else
 3821                         return (EPERM);
 3822 
 3823                 /*
 3824                  * Depending on the global setting, allow privilege of
 3825                  * mounting/unmounting file systems.
 3826                  */
 3827         case PRIV_VFS_MOUNT:
 3828         case PRIV_VFS_UNMOUNT:
 3829         case PRIV_VFS_MOUNT_NONUSER:
 3830         case PRIV_VFS_MOUNT_OWNER:
 3831                 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT)
 3832                         return (0);
 3833                 else
 3834                         return (EPERM);
 3835 
 3836                 /*
 3837                  * Allow jailed root to bind reserved ports and reuse in-use
 3838                  * ports.
 3839                  */
 3840         case PRIV_NETINET_RESERVEDPORT:
 3841         case PRIV_NETINET_REUSEPORT:
 3842                 return (0);
 3843 
 3844                 /*
 3845                  * Allow jailed root to set certian IPv4/6 (option) headers.
 3846                  */
 3847         case PRIV_NETINET_SETHDROPTS:
 3848                 return (0);
 3849 
 3850                 /*
 3851                  * Conditionally allow creating raw sockets in jail.
 3852                  */
 3853         case PRIV_NETINET_RAW:
 3854                 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
 3855                         return (0);
 3856                 else
 3857                         return (EPERM);
 3858 
 3859                 /*
 3860                  * Since jail implements its own visibility limits on netstat
 3861                  * sysctls, allow getcred.  This allows identd to work in
 3862                  * jail.
 3863                  */
 3864         case PRIV_NETINET_GETCRED:
 3865                 return (0);
 3866 
 3867         default:
 3868                 /*
 3869                  * In all remaining cases, deny the privilege request.  This
 3870                  * includes almost all network privileges, many system
 3871                  * configuration privileges.
 3872                  */
 3873                 return (EPERM);
 3874         }
 3875 }
 3876 
 3877 /*
 3878  * Return the part of pr2's name that is relative to pr1, or the whole name
 3879  * if it does not directly follow.
 3880  */
 3881 
 3882 char *
 3883 prison_name(struct prison *pr1, struct prison *pr2)
 3884 {
 3885         char *name;
 3886 
 3887         /* Jails see themselves as "" (if they see themselves at all). */
 3888         if (pr1 == pr2)
 3889                 return "";
 3890         name = pr2->pr_name;
 3891         if (prison_ischild(pr1, pr2)) {
 3892                 /*
 3893                  * pr1 isn't locked (and allprison_lock may not be either)
 3894                  * so its length can't be counted on.  But the number of dots
 3895                  * can be counted on - and counted.
 3896                  */
 3897                 for (; pr1 != &prison0; pr1 = pr1->pr_parent)
 3898                         name = strchr(name, '.') + 1;
 3899         }
 3900         return (name);
 3901 }
 3902 
 3903 /*
 3904  * Return the part of pr2's path that is relative to pr1, or the whole path
 3905  * if it does not directly follow.
 3906  */
 3907 static char *
 3908 prison_path(struct prison *pr1, struct prison *pr2)
 3909 {
 3910         char *path1, *path2;
 3911         int len1;
 3912 
 3913         path1 = pr1->pr_path;
 3914         path2 = pr2->pr_path;
 3915         if (!strcmp(path1, "/"))
 3916                 return (path2);
 3917         len1 = strlen(path1);
 3918         if (strncmp(path1, path2, len1))
 3919                 return (path2);
 3920         if (path2[len1] == '\0')
 3921                 return "/";
 3922         if (path2[len1] == '/')
 3923                 return (path2 + len1);
 3924         return (path2);
 3925 }
 3926 
 3927 
 3928 /*
 3929  * Jail-related sysctls.
 3930  */
 3931 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
 3932     "Jails");
 3933 
 3934 static int
 3935 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 3936 {
 3937         struct xprison *xp;
 3938         struct prison *pr, *cpr;
 3939 #ifdef INET
 3940         struct in_addr *ip4 = NULL;
 3941         int ip4s = 0;
 3942 #endif
 3943 #ifdef INET6
 3944         struct in_addr *ip6 = NULL;
 3945         int ip6s = 0;
 3946 #endif
 3947         int descend, error;
 3948 
 3949         xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
 3950         pr = req->td->td_ucred->cr_prison;
 3951         error = 0;
 3952         sx_slock(&allprison_lock);
 3953         FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 3954 #if defined(INET) || defined(INET6)
 3955  again:
 3956 #endif
 3957                 mtx_lock(&cpr->pr_mtx);
 3958 #ifdef INET
 3959                 if (cpr->pr_ip4s > 0) {
 3960                         if (ip4s < cpr->pr_ip4s) {
 3961                                 ip4s = cpr->pr_ip4s;
 3962                                 mtx_unlock(&cpr->pr_mtx);
 3963                                 ip4 = realloc(ip4, ip4s *
 3964                                     sizeof(struct in_addr), M_TEMP, M_WAITOK);
 3965                                 goto again;
 3966                         }
 3967                         bcopy(cpr->pr_ip4, ip4,
 3968                             cpr->pr_ip4s * sizeof(struct in_addr));
 3969                 }
 3970 #endif
 3971 #ifdef INET6
 3972                 if (cpr->pr_ip6s > 0) {
 3973                         if (ip6s < cpr->pr_ip6s) {
 3974                                 ip6s = cpr->pr_ip6s;
 3975                                 mtx_unlock(&cpr->pr_mtx);
 3976                                 ip6 = realloc(ip6, ip6s *
 3977                                     sizeof(struct in6_addr), M_TEMP, M_WAITOK);
 3978                                 goto again;
 3979                         }
 3980                         bcopy(cpr->pr_ip6, ip6,
 3981                             cpr->pr_ip6s * sizeof(struct in6_addr));
 3982                 }
 3983 #endif
 3984                 if (cpr->pr_ref == 0) {
 3985                         mtx_unlock(&cpr->pr_mtx);
 3986                         continue;
 3987                 }
 3988                 bzero(xp, sizeof(*xp));
 3989                 xp->pr_version = XPRISON_VERSION;
 3990                 xp->pr_id = cpr->pr_id;
 3991                 xp->pr_state = cpr->pr_uref > 0
 3992                     ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
 3993                 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
 3994                 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
 3995                 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
 3996 #ifdef INET
 3997                 xp->pr_ip4s = cpr->pr_ip4s;
 3998 #endif
 3999 #ifdef INET6
 4000                 xp->pr_ip6s = cpr->pr_ip6s;
 4001 #endif
 4002                 mtx_unlock(&cpr->pr_mtx);
 4003                 error = SYSCTL_OUT(req, xp, sizeof(*xp));
 4004                 if (error)
 4005                         break;
 4006 #ifdef INET
 4007                 if (xp->pr_ip4s > 0) {
 4008                         error = SYSCTL_OUT(req, ip4,
 4009                             xp->pr_ip4s * sizeof(struct in_addr));
 4010                         if (error)
 4011                                 break;
 4012                 }
 4013 #endif
 4014 #ifdef INET6
 4015                 if (xp->pr_ip6s > 0) {
 4016                         error = SYSCTL_OUT(req, ip6,
 4017                             xp->pr_ip6s * sizeof(struct in6_addr));
 4018                         if (error)
 4019                                 break;
 4020                 }
 4021 #endif
 4022         }
 4023         sx_sunlock(&allprison_lock);
 4024         free(xp, M_TEMP);
 4025 #ifdef INET
 4026         free(ip4, M_TEMP);
 4027 #endif
 4028 #ifdef INET6
 4029         free(ip6, M_TEMP);
 4030 #endif
 4031         return (error);
 4032 }
 4033 
 4034 SYSCTL_OID(_security_jail, OID_AUTO, list,
 4035     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 4036     sysctl_jail_list, "S", "List of active jails");
 4037 
 4038 static int
 4039 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
 4040 {
 4041         int error, injail;
 4042 
 4043         injail = jailed(req->td->td_ucred);
 4044         error = SYSCTL_OUT(req, &injail, sizeof(injail));
 4045 
 4046         return (error);
 4047 }
 4048 
 4049 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
 4050     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 4051     sysctl_jail_jailed, "I", "Process in jail?");
 4052 
 4053 #if defined(INET) || defined(INET6)
 4054 SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
 4055     &jail_max_af_ips, 0,
 4056     "Number of IP addresses a jail may have at most per address family");
 4057 #endif
 4058 
 4059 /*
 4060  * Default parameters for jail(2) compatability.  For historical reasons,
 4061  * the sysctl names have varying similarity to the parameter names.  Prisons
 4062  * just see their own parameters, and can't change them.
 4063  */
 4064 static int
 4065 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
 4066 {
 4067         struct prison *pr;
 4068         int allow, error, i;
 4069 
 4070         pr = req->td->td_ucred->cr_prison;
 4071         allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow;
 4072 
 4073         /* Get the current flag value, and convert it to a boolean. */
 4074         i = (allow & arg2) ? 1 : 0;
 4075         if (arg1 != NULL)
 4076                 i = !i;
 4077         error = sysctl_handle_int(oidp, &i, 0, req);
 4078         if (error || !req->newptr)
 4079                 return (error);
 4080         i = i ? arg2 : 0;
 4081         if (arg1 != NULL)
 4082                 i ^= arg2;
 4083         /*
 4084          * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
 4085          * for writing.
 4086          */
 4087         mtx_lock(&prison0.pr_mtx);
 4088         jail_default_allow = (jail_default_allow & ~arg2) | i;
 4089         mtx_unlock(&prison0.pr_mtx);
 4090         return (0);
 4091 }
 4092 
 4093 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
 4094     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4095     NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
 4096     "Processes in jail can set their hostnames");
 4097 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
 4098     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4099     (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
 4100     "Processes in jail are limited to creating UNIX/IP/route sockets only");
 4101 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
 4102     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4103     NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
 4104     "Processes in jail can use System V IPC primitives");
 4105 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
 4106     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4107     NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
 4108     "Prison root can create raw sockets");
 4109 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
 4110     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4111     NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
 4112     "Processes in jail can alter system file flags");
 4113 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
 4114     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4115     NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
 4116     "Processes in jail can mount/unmount jail-friendly file systems");
 4117 
 4118 static int
 4119 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
 4120 {
 4121         struct prison *pr;
 4122         int level, error;
 4123 
 4124         pr = req->td->td_ucred->cr_prison;
 4125         level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
 4126         error = sysctl_handle_int(oidp, &level, 0, req);
 4127         if (error || !req->newptr)
 4128                 return (error);
 4129         *(int *)arg1 = level;
 4130         return (0);
 4131 }
 4132 
 4133 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
 4134     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 4135     &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
 4136     sysctl_jail_default_level, "I",
 4137     "Processes in jail cannot see all mounted file systems");
 4138 
 4139 /*
 4140  * Nodes to describe jail parameters.  Maximum length of string parameters
 4141  * is returned in the string itself, and the other parameters exist merely
 4142  * to make themselves and their types known.
 4143  */
 4144 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0,
 4145     "Jail parameters");
 4146 
 4147 int
 4148 sysctl_jail_param(SYSCTL_HANDLER_ARGS)
 4149 {
 4150         int i;
 4151         long l;
 4152         size_t s;
 4153         char numbuf[12];
 4154 
 4155         switch (oidp->oid_kind & CTLTYPE)
 4156         {
 4157         case CTLTYPE_LONG:
 4158         case CTLTYPE_ULONG:
 4159                 l = 0;
 4160 #ifdef SCTL_MASK32
 4161                 if (!(req->flags & SCTL_MASK32))
 4162 #endif
 4163                         return (SYSCTL_OUT(req, &l, sizeof(l)));
 4164         case CTLTYPE_INT:
 4165         case CTLTYPE_UINT:
 4166                 i = 0;
 4167                 return (SYSCTL_OUT(req, &i, sizeof(i)));
 4168         case CTLTYPE_STRING:
 4169                 snprintf(numbuf, sizeof(numbuf), "%d", arg2);
 4170                 return
 4171                     (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
 4172         case CTLTYPE_STRUCT:
 4173                 s = (size_t)arg2;
 4174                 return (SYSCTL_OUT(req, &s, sizeof(s)));
 4175         }
 4176         return (0);
 4177 }
 4178 
 4179 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
 4180 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
 4181 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
 4182 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
 4183 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
 4184     "I", "Jail secure level");
 4185 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
 4186     "I", "Jail cannot see all mounted file systems");
 4187 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
 4188     "B", "Jail persistence");
 4189 #ifdef VIMAGE
 4190 SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
 4191     "E,jailsys", "Virtual network stack");
 4192 #endif
 4193 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
 4194     "B", "Jail is in the process of shutting down");
 4195 
 4196 SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
 4197 SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
 4198     "I", "Current number of child jails");
 4199 SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
 4200     "I", "Maximum number of child jails");
 4201 
 4202 SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
 4203 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
 4204     "Jail hostname");
 4205 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
 4206     "Jail NIS domainname");
 4207 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
 4208     "Jail host UUID");
 4209 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
 4210     "LU", "Jail host ID");
 4211 
 4212 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
 4213 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
 4214 
 4215 #ifdef INET
 4216 SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
 4217     "Jail IPv4 address virtualization");
 4218 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
 4219     "S,in_addr,a", "Jail IPv4 addresses");
 4220 SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 4221     "B", "Do (not) use IPv4 source address selection rather than the "
 4222     "primary jail IPv4 address.");
 4223 #endif
 4224 #ifdef INET6
 4225 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
 4226     "Jail IPv6 address virtualization");
 4227 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
 4228     "S,in6_addr,a", "Jail IPv6 addresses");
 4229 SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
 4230     "B", "Do (not) use IPv6 source address selection rather than the "
 4231     "primary jail IPv6 address.");
 4232 #endif
 4233 
 4234 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
 4235 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
 4236     "B", "Jail may set hostname");
 4237 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
 4238     "B", "Jail may use SYSV IPC");
 4239 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
 4240     "B", "Jail may create raw sockets");
 4241 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
 4242     "B", "Jail may alter system file flags");
 4243 SYSCTL_JAIL_PARAM(_allow, mount, CTLTYPE_INT | CTLFLAG_RW,
 4244     "B", "Jail may mount/unmount jail-friendly file systems");
 4245 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
 4246     "B", "Jail may set file quotas");
 4247 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
 4248     "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
 4249 
 4250 
 4251 #ifdef DDB
 4252 
 4253 static void
 4254 db_show_prison(struct prison *pr)
 4255 {
 4256         int fi;
 4257 #if defined(INET) || defined(INET6)
 4258         int ii;
 4259 #endif
 4260         unsigned jsf;
 4261 #ifdef INET6
 4262         char ip6buf[INET6_ADDRSTRLEN];
 4263 #endif
 4264 
 4265         db_printf("prison %p:\n", pr);
 4266         db_printf(" jid             = %d\n", pr->pr_id);
 4267         db_printf(" name            = %s\n", pr->pr_name);
 4268         db_printf(" parent          = %p\n", pr->pr_parent);
 4269         db_printf(" ref             = %d\n", pr->pr_ref);
 4270         db_printf(" uref            = %d\n", pr->pr_uref);
 4271         db_printf(" path            = %s\n", pr->pr_path);
 4272         db_printf(" cpuset          = %d\n", pr->pr_cpuset
 4273             ? pr->pr_cpuset->cs_id : -1);
 4274 #ifdef VIMAGE
 4275         db_printf(" vnet            = %p\n", pr->pr_vnet);
 4276 #endif
 4277         db_printf(" root            = %p\n", pr->pr_root);
 4278         db_printf(" securelevel     = %d\n", pr->pr_securelevel);
 4279         db_printf(" children.max    = %d\n", pr->pr_childmax);
 4280         db_printf(" children.cur    = %d\n", pr->pr_childcount);
 4281         db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
 4282         db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
 4283         db_printf(" flags           = 0x%x", pr->pr_flags);
 4284         for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
 4285             fi++)
 4286                 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi)))
 4287                         db_printf(" %s", pr_flag_names[fi]);
 4288         for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
 4289             fi++) {
 4290                 jsf = pr->pr_flags &
 4291                     (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
 4292                 db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name,
 4293                     pr_flag_jailsys[fi].disable && 
 4294                       (jsf == pr_flag_jailsys[fi].disable) ? "disable"
 4295                     : (jsf == pr_flag_jailsys[fi].new) ? "new"
 4296                     : "inherit");
 4297         }
 4298         db_printf(" allow           = 0x%x", pr->pr_allow);
 4299         for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
 4300             fi++)
 4301                 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi)))
 4302                         db_printf(" %s", pr_allow_names[fi]);
 4303         db_printf("\n");
 4304         db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
 4305         db_printf(" host.hostname   = %s\n", pr->pr_hostname);
 4306         db_printf(" host.domainname = %s\n", pr->pr_domainname);
 4307         db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
 4308         db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
 4309 #ifdef INET
 4310         db_printf(" ip4s            = %d\n", pr->pr_ip4s);
 4311         for (ii = 0; ii < pr->pr_ip4s; ii++)
 4312                 db_printf(" %s %s\n",
 4313                     ii == 0 ? "ip4.addr        =" : "                 ",
 4314                     inet_ntoa(pr->pr_ip4[ii]));
 4315 #endif
 4316 #ifdef INET6
 4317         db_printf(" ip6s            = %d\n", pr->pr_ip6s);
 4318         for (ii = 0; ii < pr->pr_ip6s; ii++)
 4319                 db_printf(" %s %s\n",
 4320                     ii == 0 ? "ip6.addr        =" : "                 ",
 4321                     ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
 4322 #endif
 4323 }
 4324 
 4325 DB_SHOW_COMMAND(prison, db_show_prison_command)
 4326 {
 4327         struct prison *pr;
 4328 
 4329         if (!have_addr) {
 4330                 /*
 4331                  * Show all prisons in the list, and prison0 which is not
 4332                  * listed.
 4333                  */
 4334                 db_show_prison(&prison0);
 4335                 if (!db_pager_quit) {
 4336                         TAILQ_FOREACH(pr, &allprison, pr_list) {
 4337                                 db_show_prison(pr);
 4338                                 if (db_pager_quit)
 4339                                         break;
 4340                         }
 4341                 }
 4342                 return;
 4343         }
 4344 
 4345         if (addr == 0)
 4346                 pr = &prison0;
 4347         else {
 4348                 /* Look for a prison with the ID and with references. */
 4349                 TAILQ_FOREACH(pr, &allprison, pr_list)
 4350                         if (pr->pr_id == addr && pr->pr_ref > 0)
 4351                                 break;
 4352                 if (pr == NULL)
 4353                         /* Look again, without requiring a reference. */
 4354                         TAILQ_FOREACH(pr, &allprison, pr_list)
 4355                                 if (pr->pr_id == addr)
 4356                                         break;
 4357                 if (pr == NULL)
 4358                         /* Assume address points to a valid prison. */
 4359                         pr = (struct prison *)addr;
 4360         }
 4361         db_show_prison(pr);
 4362 }
 4363 
 4364 #endif /* DDB */

Cache object: c548667be4c6b8c752c027217890ce63


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.