The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999 Poul-Henning Kamp.
    3  * Copyright (c) 2008 Bjoern A. Zeeb.
    4  * All rights reserved.
    5  * 
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include "opt_ddb.h"
   32 #include "opt_inet.h"
   33 #include "opt_inet6.h"
   34 #include "opt_mac.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/types.h>
   38 #include <sys/kernel.h>
   39 #include <sys/systm.h>
   40 #include <sys/errno.h>
   41 #include <sys/sysproto.h>
   42 #include <sys/malloc.h>
   43 #include <sys/priv.h>
   44 #include <sys/proc.h>
   45 #include <sys/taskqueue.h>
   46 #include <sys/jail.h>
   47 #include <sys/lock.h>
   48 #include <sys/mutex.h>
   49 #include <sys/sx.h>
   50 #include <sys/namei.h>
   51 #include <sys/mount.h>
   52 #include <sys/queue.h>
   53 #include <sys/socket.h>
   54 #include <sys/syscallsubr.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/vnode.h>
   57 #include <net/if.h>
   58 #include <netinet/in.h>
   59 #ifdef DDB
   60 #include <ddb/ddb.h>
   61 #ifdef INET6
   62 #include <netinet6/in6_var.h>
   63 #endif /* INET6 */
   64 #endif /* DDB */
   65 
   66 #include <security/mac/mac_framework.h>
   67 
   68 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
   69 
   70 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
   71     "Jail rules");
   72 
   73 int     jail_set_hostname_allowed = 1;
   74 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
   75     &jail_set_hostname_allowed, 0,
   76     "Processes in jail can set their hostnames");
   77 
   78 int     jail_socket_unixiproute_only = 1;
   79 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
   80     &jail_socket_unixiproute_only, 0,
   81     "Processes in jail are limited to creating UNIX/IP/route sockets only");
   82 
   83 int     jail_sysvipc_allowed = 0;
   84 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
   85     &jail_sysvipc_allowed, 0,
   86     "Processes in jail can use System V IPC primitives");
   87 
   88 static int jail_enforce_statfs = 2;
   89 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
   90     &jail_enforce_statfs, 0,
   91     "Processes in jail cannot see all mounted file systems");
   92 
   93 int     jail_allow_raw_sockets = 0;
   94 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
   95     &jail_allow_raw_sockets, 0,
   96     "Prison root can create raw sockets");
   97 
   98 int     jail_chflags_allowed = 0;
   99 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
  100     &jail_chflags_allowed, 0,
  101     "Processes in jail can alter system file flags");
  102 
  103 int     jail_mount_allowed = 0;
  104 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
  105     &jail_mount_allowed, 0,
  106     "Processes in jail can mount/unmount jail-friendly file systems");
  107 
  108 int     jail_max_af_ips = 255;
  109 SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
  110     &jail_max_af_ips, 0,
  111     "Number of IP addresses a jail may have at most per address family");
  112 
  113 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
  114 struct  prisonlist allprison;
  115 struct  sx allprison_lock;
  116 int     lastprid = 0;
  117 int     prisoncount = 0;
  118 
  119 /*
  120  * List of jail services. Protected by allprison_lock.
  121  */
  122 TAILQ_HEAD(prison_services_head, prison_service);
  123 static struct prison_services_head prison_services =
  124     TAILQ_HEAD_INITIALIZER(prison_services);
  125 static int prison_service_slots = 0;
  126 
  127 struct prison_service {
  128         prison_create_t ps_create;
  129         prison_destroy_t ps_destroy;
  130         int             ps_slotno;
  131         TAILQ_ENTRY(prison_service) ps_next;
  132         char    ps_name[0];
  133 };
  134 
  135 static void              init_prison(void *);
  136 static void              prison_complete(void *context, int pending);
  137 static int               sysctl_jail_list(SYSCTL_HANDLER_ARGS);
  138 #ifdef INET
  139 static int              _prison_check_ip4(struct prison *, struct in_addr *);
  140 #endif
  141 #ifdef INET6
  142 static int              _prison_check_ip6(struct prison *, struct in6_addr *);
  143 #endif
  144 
  145 static void
  146 init_prison(void *data __unused)
  147 {
  148 
  149         sx_init(&allprison_lock, "allprison");
  150         LIST_INIT(&allprison);
  151 }
  152 
  153 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
  154 
  155 #ifdef INET
  156 static int
  157 qcmp_v4(const void *ip1, const void *ip2)
  158 {
  159         in_addr_t iaa, iab;
  160 
  161         /*
  162          * We need to compare in HBO here to get the list sorted as expected
  163          * by the result of the code.  Sorting NBO addresses gives you
  164          * interesting results.  If you do not understand, do not try.
  165          */
  166         iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
  167         iab = ntohl(((const struct in_addr *)ip2)->s_addr);
  168 
  169         /*
  170          * Do not simply return the difference of the two numbers, the int is
  171          * not wide enough.
  172          */
  173         if (iaa > iab)
  174                 return (1);
  175         else if (iaa < iab)
  176                 return (-1);
  177         else
  178                 return (0);
  179 }
  180 #endif
  181 
  182 #ifdef INET6
  183 static int
  184 qcmp_v6(const void *ip1, const void *ip2)
  185 {
  186         const struct in6_addr *ia6a, *ia6b;
  187         int i, rc;
  188 
  189         ia6a = (const struct in6_addr *)ip1;
  190         ia6b = (const struct in6_addr *)ip2;
  191 
  192         rc = 0;
  193         for (i=0; rc == 0 && i < sizeof(struct in6_addr); i++) {
  194                 if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
  195                         rc = 1;
  196                 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
  197                         rc = -1;
  198         }
  199         return (rc);
  200 }
  201 #endif
  202 
  203 #if defined(INET) || defined(INET6)
  204 static int
  205 prison_check_conflicting_ips(struct prison *p)
  206 {
  207         struct prison *pr;
  208         int i;
  209 
  210         sx_assert(&allprison_lock, SX_LOCKED);
  211 
  212         if (p->pr_ip4s == 0 && p->pr_ip6s == 0)
  213                 return (0);
  214 
  215         LIST_FOREACH(pr, &allprison, pr_list) {
  216                 /*
  217                  * Skip 'dying' prisons to avoid problems when
  218                  * restarting multi-IP jails.
  219                  */
  220                 if (pr->pr_state == PRISON_STATE_DYING)
  221                         continue;
  222 
  223                 /*
  224                  * We permit conflicting IPs if there is no
  225                  * more than 1 IP on eeach jail.
  226                  * In case there is one duplicate on a jail with
  227                  * more than one IP stop checking and return error.
  228                  */
  229 #ifdef INET
  230                 if ((p->pr_ip4s >= 1 && pr->pr_ip4s > 1) ||
  231                     (p->pr_ip4s > 1 && pr->pr_ip4s >= 1)) {
  232                         for (i = 0; i < p->pr_ip4s; i++) {
  233                                 if (_prison_check_ip4(pr, &p->pr_ip4[i]) == 0)
  234                                         return (EINVAL);
  235                         }
  236                 }
  237 #endif
  238 #ifdef INET6
  239                 if ((p->pr_ip6s >= 1 && pr->pr_ip6s > 1) ||
  240                     (p->pr_ip6s > 1 && pr->pr_ip6s >= 1)) {
  241                         for (i = 0; i < p->pr_ip6s; i++) {
  242                                 if (_prison_check_ip6(pr, &p->pr_ip6[i]) == 0)
  243                                         return (EINVAL);
  244                         }
  245                 }
  246 #endif
  247         }
  248 
  249         return (0);
  250 }
  251 
  252 static int
  253 jail_copyin_ips(struct jail *j)
  254 {
  255 #ifdef INET
  256         struct in_addr  *ip4;
  257 #endif
  258 #ifdef INET6
  259         struct in6_addr *ip6;
  260 #endif
  261         int error, i;
  262 
  263         /*
  264          * Copy in addresses, check for duplicate addresses and do some
  265          * simple 0 and broadcast checks. If users give other bogus addresses
  266          * it is their problem.
  267          *
  268          * IP addresses are all sorted but ip[0] to preserve the primary IP
  269          * address as given from userland.  This special IP is used for
  270          * unbound outgoing connections as well for "loopback" traffic.
  271          */
  272 #ifdef INET
  273         ip4 = NULL;
  274 #endif
  275 #ifdef INET6
  276         ip6 = NULL;
  277 #endif
  278 #ifdef INET
  279         if (j->ip4s > 0) {
  280                 ip4 = (struct in_addr *)malloc(j->ip4s * sizeof(struct in_addr),
  281                     M_PRISON, M_WAITOK | M_ZERO);
  282                 error = copyin(j->ip4, ip4, j->ip4s * sizeof(struct in_addr));
  283                 if (error)
  284                         goto e_free_ip;
  285                 /* Sort all but the first IPv4 address. */
  286                 if (j->ip4s > 1)
  287                         qsort((ip4 + 1), j->ip4s - 1,
  288                             sizeof(struct in_addr), qcmp_v4);
  289 
  290                 /*
  291                  * We do not have to care about byte order for these checks
  292                  * so we will do them in NBO.
  293                  */
  294                 for (i=0; i<j->ip4s; i++) {
  295                         if (ip4[i].s_addr == htonl(INADDR_ANY) ||
  296                             ip4[i].s_addr == htonl(INADDR_BROADCAST)) {
  297                                 error = EINVAL;
  298                                 goto e_free_ip;
  299                         }
  300                         if ((i+1) < j->ip4s &&
  301                             (ip4[0].s_addr == ip4[i+1].s_addr ||
  302                             ip4[i].s_addr == ip4[i+1].s_addr)) {
  303                                 error = EINVAL;
  304                                 goto e_free_ip;
  305                         }
  306                 }
  307 
  308                 j->ip4 = ip4;
  309         } else
  310                 j->ip4 = NULL;
  311 #endif
  312 #ifdef INET6
  313         if (j->ip6s > 0) {
  314                 ip6 = (struct in6_addr *)malloc(j->ip6s * sizeof(struct in6_addr),
  315                     M_PRISON, M_WAITOK | M_ZERO);
  316                 error = copyin(j->ip6, ip6, j->ip6s * sizeof(struct in6_addr));
  317                 if (error)
  318                         goto e_free_ip;
  319                 /* Sort all but the first IPv6 address. */
  320                 if (j->ip6s > 1)
  321                         qsort((ip6 + 1), j->ip6s - 1,
  322                             sizeof(struct in6_addr), qcmp_v6);
  323                 for (i=0; i<j->ip6s; i++) {
  324                         if (IN6_IS_ADDR_UNSPECIFIED(&ip6[i])) {
  325                                 error = EINVAL;
  326                                 goto e_free_ip;
  327                         }
  328                         if ((i+1) < j->ip6s &&
  329                             (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[i+1]) ||
  330                             IN6_ARE_ADDR_EQUAL(&ip6[i], &ip6[i+1]))) {
  331                                 error = EINVAL;
  332                                 goto e_free_ip;
  333                         }
  334                 }
  335 
  336                 j->ip6 = ip6;
  337         } else
  338                 j->ip6 = NULL;
  339 #endif
  340         return (0);
  341 
  342 e_free_ip:
  343 #ifdef INET6
  344         free(ip6, M_PRISON);
  345 #endif
  346 #ifdef INET
  347         free(ip4, M_PRISON);
  348 #endif
  349         return (error);
  350 }
  351 #endif /* INET || INET6 */
  352 
  353 static int
  354 jail_handle_ips(struct jail *j)
  355 {
  356 #if defined(INET) || defined(INET6)
  357         int error;
  358 #endif
  359 
  360         /*
  361          * Finish conversion for older versions, copyin and setup IPs.
  362          */
  363         switch (j->version) {
  364         case 0: 
  365         {
  366 #ifdef INET
  367                 /* FreeBSD single IPv4 jails. */
  368                 struct in_addr *ip4;
  369 
  370                 if (j->ip4s == INADDR_ANY || j->ip4s == INADDR_BROADCAST)
  371                         return (EINVAL);
  372                 ip4 = (struct in_addr *)malloc(sizeof(struct in_addr),
  373                     M_PRISON, M_WAITOK | M_ZERO);
  374 
  375                 /*
  376                  * Jail version 0 still used HBO for the IPv4 address.
  377                  */
  378                 ip4->s_addr = htonl(j->ip4s);
  379                 j->ip4s = 1;
  380                 j->ip4 = ip4;
  381                 break;
  382 #else
  383                 return (EINVAL);
  384 #endif
  385         }
  386 
  387         case 1:
  388                 /*
  389                  * Version 1 was used by multi-IPv4 jail implementations
  390                  * that never made it into the official kernel.
  391                  * We should never hit this here; jail() should catch it.
  392                  */
  393                 return (EINVAL);
  394 
  395         case 2: /* JAIL_API_VERSION */
  396                 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
  397 #if defined(INET) || defined(INET6)
  398 #ifdef INET
  399                 if (j->ip4s > jail_max_af_ips)
  400                         return (EINVAL);
  401 #else
  402                 if (j->ip4s != 0)
  403                         return (EINVAL);
  404 #endif
  405 #ifdef INET6
  406                 if (j->ip6s > jail_max_af_ips)
  407                         return (EINVAL);
  408 #else
  409                 if (j->ip6s != 0)
  410                         return (EINVAL);
  411 #endif
  412                 error = jail_copyin_ips(j);
  413                 if (error)
  414                         return (error);
  415 #endif
  416                 break;
  417 
  418         default:
  419                 /* Sci-Fi jails are not supported, sorry. */
  420                 return (EINVAL);
  421         }
  422 
  423         return (0);
  424 }
  425 
  426 
  427 /*
  428  * struct jail_args {
  429  *      struct jail *jail;
  430  * };
  431  */
  432 int
  433 jail(struct thread *td, struct jail_args *uap)
  434 {
  435         uint32_t version;
  436         int error;
  437         struct jail j;
  438 
  439         error = copyin(uap->jail, &version, sizeof(uint32_t));
  440         if (error)
  441                 return (error);
  442 
  443         switch (version) {
  444         case 0: 
  445                 /* FreeBSD single IPv4 jails. */
  446         {
  447                 struct jail_v0 j0;
  448 
  449                 bzero(&j, sizeof(struct jail));
  450                 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
  451                 if (error)
  452                         return (error);
  453                 j.version = j0.version;
  454                 j.path = j0.path;
  455                 j.hostname = j0.hostname;
  456                 j.ip4s = j0.ip_number;
  457                 break;
  458         }
  459 
  460         case 1:
  461                 /*
  462                  * Version 1 was used by multi-IPv4 jail implementations
  463                  * that never made it into the official kernel.
  464                  */
  465                 return (EINVAL);
  466 
  467         case 2: /* JAIL_API_VERSION */
  468                 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
  469                 error = copyin(uap->jail, &j, sizeof(struct jail));
  470                 if (error)
  471                         return (error);
  472                 break;
  473 
  474         default:
  475                 /* Sci-Fi jails are not supported, sorry. */
  476                 return (EINVAL);
  477         }
  478         return (kern_jail(td, &j));
  479 }
  480 
  481 int
  482 kern_jail(struct thread *td, struct jail *j)
  483 {
  484         struct nameidata nd;
  485         struct prison *pr, *tpr;
  486         struct prison_service *psrv;
  487         struct jail_attach_args jaa;
  488         int vfslocked, error, tryprid;
  489 
  490         KASSERT(j != NULL, ("%s: j is NULL", __func__));
  491 
  492         /* Handle addresses - convert old structs, copyin, check IPs. */
  493         error = jail_handle_ips(j);
  494         if (error)
  495                 return (error);
  496 
  497         /* Allocate struct prison and fill it with life. */
  498         pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
  499         mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
  500         pr->pr_ref = 1;
  501         error = copyinstr(j->path, &pr->pr_path, sizeof(pr->pr_path), NULL);
  502         if (error)
  503                 goto e_killmtx;
  504         NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
  505             pr->pr_path, td);
  506         error = namei(&nd);
  507         if (error)
  508                 goto e_killmtx;
  509         vfslocked = NDHASGIANT(&nd);
  510         pr->pr_root = nd.ni_vp;
  511         VOP_UNLOCK(nd.ni_vp, 0, td);
  512         NDFREE(&nd, NDF_ONLY_PNBUF);
  513         VFS_UNLOCK_GIANT(vfslocked);
  514         error = copyinstr(j->hostname, &pr->pr_host, sizeof(pr->pr_host), NULL);
  515         if (error)
  516                 goto e_dropvnref;
  517         if (j->jailname != NULL) {
  518                 error = copyinstr(j->jailname, &pr->pr_name,
  519                     sizeof(pr->pr_name), NULL);
  520                 if (error)
  521                         goto e_dropvnref;
  522         }
  523         if (j->ip4s > 0) {
  524                 pr->pr_ip4 = j->ip4;
  525                 pr->pr_ip4s = j->ip4s;
  526         }
  527 #ifdef INET6
  528         if (j->ip6s > 0) {
  529                 pr->pr_ip6 = j->ip6;
  530                 pr->pr_ip6s = j->ip6s;
  531         }
  532 #endif
  533         pr->pr_linux = NULL;
  534         pr->pr_securelevel = securelevel;
  535         if (prison_service_slots == 0)
  536                 pr->pr_slots = NULL;
  537         else {
  538                 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
  539                     M_PRISON, M_ZERO | M_WAITOK);
  540         }
  541 
  542         /*
  543          * Pre-set prison state to ALIVE upon cration.  This is needed so we
  544          * can later attach the process to it, etc (avoiding another extra
  545          * state for ther process of creation, complicating things).
  546          */
  547         pr->pr_state = PRISON_STATE_ALIVE;
  548 
  549         /* Allocate a dedicated cpuset for each jail. */
  550         error = cpuset_create_root(td, &pr->pr_cpuset);
  551         if (error)
  552                 goto e_dropvnref;
  553 
  554         sx_xlock(&allprison_lock);
  555         /* Make sure we cannot run into problems with ambiguous bind()ings. */
  556 #if defined(INET) || defined(INET6)
  557         error = prison_check_conflicting_ips(pr);
  558         if (error) {
  559                 sx_xunlock(&allprison_lock);
  560                 goto e_dropcpuset;
  561         }
  562 #endif
  563 
  564         /* Determine next pr_id and add prison to allprison list. */
  565         tryprid = lastprid + 1;
  566         if (tryprid == JAIL_MAX)
  567                 tryprid = 1;
  568 next:
  569         LIST_FOREACH(tpr, &allprison, pr_list) {
  570                 if (tpr->pr_id == tryprid) {
  571                         tryprid++;
  572                         if (tryprid == JAIL_MAX) {
  573                                 sx_xunlock(&allprison_lock);
  574                                 error = EAGAIN;
  575                                 goto e_dropcpuset;
  576                         }
  577                         goto next;
  578                 }
  579         }
  580         pr->pr_id = jaa.jid = lastprid = tryprid;
  581         LIST_INSERT_HEAD(&allprison, pr, pr_list);
  582         prisoncount++;
  583         sx_downgrade(&allprison_lock);
  584         TAILQ_FOREACH(psrv, &prison_services, ps_next) {
  585                 psrv->ps_create(psrv, pr);
  586         }
  587         sx_sunlock(&allprison_lock);
  588 
  589         error = jail_attach(td, &jaa);
  590         if (error)
  591                 goto e_dropprref;
  592         mtx_lock(&pr->pr_mtx);
  593         pr->pr_ref--;
  594         mtx_unlock(&pr->pr_mtx);
  595         td->td_retval[0] = jaa.jid;
  596         return (0);
  597 e_dropprref:
  598         sx_xlock(&allprison_lock);
  599         LIST_REMOVE(pr, pr_list);
  600         prisoncount--;
  601         sx_downgrade(&allprison_lock);
  602         TAILQ_FOREACH(psrv, &prison_services, ps_next) {
  603                 psrv->ps_destroy(psrv, pr);
  604         }
  605         sx_sunlock(&allprison_lock);
  606 e_dropcpuset:
  607         cpuset_rel(pr->pr_cpuset);
  608 e_dropvnref:
  609         if (pr->pr_slots != NULL)
  610                 free(pr->pr_slots, M_PRISON);
  611         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
  612         vrele(pr->pr_root);
  613         VFS_UNLOCK_GIANT(vfslocked);
  614 e_killmtx:
  615         mtx_destroy(&pr->pr_mtx);
  616         free(pr, M_PRISON);
  617 #ifdef INET6
  618         free(j->ip6, M_PRISON);
  619 #endif
  620 #ifdef INET
  621         free(j->ip4, M_PRISON);
  622 #endif
  623         return (error);
  624 }
  625 
  626 /*
  627  * struct jail_attach_args {
  628  *      int jid;
  629  * };
  630  */
  631 int
  632 jail_attach(struct thread *td, struct jail_attach_args *uap)
  633 {
  634         struct proc *p;
  635         struct ucred *newcred, *oldcred;
  636         struct prison *pr;
  637         int vfslocked, error;
  638 
  639         /*
  640          * XXX: Note that there is a slight race here if two threads
  641          * in the same privileged process attempt to attach to two
  642          * different jails at the same time.  It is important for
  643          * user processes not to do this, or they might end up with
  644          * a process root from one prison, but attached to the jail
  645          * of another.
  646          */
  647         error = priv_check(td, PRIV_JAIL_ATTACH);
  648         if (error)
  649                 return (error);
  650 
  651         p = td->td_proc;
  652         sx_slock(&allprison_lock);
  653         pr = prison_find(uap->jid);
  654         if (pr == NULL) {
  655                 sx_sunlock(&allprison_lock);
  656                 return (EINVAL);
  657         }
  658 
  659         /*
  660          * Do not allow a process to attach to a prison that is not
  661          * considered to be "ALIVE".
  662          */
  663         if (pr->pr_state != PRISON_STATE_ALIVE) {
  664                 mtx_unlock(&pr->pr_mtx);
  665                 sx_sunlock(&allprison_lock);
  666                 return (EINVAL);
  667         }
  668         pr->pr_ref++;
  669         mtx_unlock(&pr->pr_mtx);
  670         sx_sunlock(&allprison_lock);
  671 
  672         /*
  673          * Reparent the newly attached process to this jail.
  674          */
  675         error = cpuset_setproc_update_set(p, pr->pr_cpuset);
  676         if (error)
  677                 goto e_unref;
  678 
  679         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
  680         vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
  681         if ((error = change_dir(pr->pr_root, td)) != 0)
  682                 goto e_unlock;
  683 #ifdef MAC
  684         if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
  685                 goto e_unlock;
  686 #endif
  687         VOP_UNLOCK(pr->pr_root, 0, td);
  688         change_root(pr->pr_root, td);
  689         VFS_UNLOCK_GIANT(vfslocked);
  690 
  691         newcred = crget();
  692         PROC_LOCK(p);
  693         oldcred = p->p_ucred;
  694         setsugid(p);
  695         crcopy(newcred, oldcred);
  696         newcred->cr_prison = pr;
  697         p->p_ucred = newcred;
  698         prison_proc_hold(pr);
  699         PROC_UNLOCK(p);
  700         crfree(oldcred);
  701         return (0);
  702 e_unlock:
  703         VOP_UNLOCK(pr->pr_root, 0, td);
  704         VFS_UNLOCK_GIANT(vfslocked);
  705 e_unref:
  706         mtx_lock(&pr->pr_mtx);
  707         pr->pr_ref--;
  708         mtx_unlock(&pr->pr_mtx);
  709         return (error);
  710 }
  711 
  712 /*
  713  * Returns a locked prison instance, or NULL on failure.
  714  */
  715 struct prison *
  716 prison_find(int prid)
  717 {
  718         struct prison *pr;
  719 
  720         sx_assert(&allprison_lock, SX_LOCKED);
  721         LIST_FOREACH(pr, &allprison, pr_list) {
  722                 if (pr->pr_id == prid) {
  723                         mtx_lock(&pr->pr_mtx);
  724                         if (pr->pr_ref == 0) {
  725                                 mtx_unlock(&pr->pr_mtx);
  726                                 break;
  727                         }
  728                         return (pr);
  729                 }
  730         }
  731         return (NULL);
  732 }
  733 
  734 void
  735 prison_free_locked(struct prison *pr)
  736 {
  737 
  738         mtx_assert(&pr->pr_mtx, MA_OWNED);
  739         pr->pr_ref--;
  740         if (pr->pr_ref == 0) {
  741                 mtx_unlock(&pr->pr_mtx);
  742                 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
  743                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
  744                 return;
  745         }
  746         mtx_unlock(&pr->pr_mtx);
  747 }
  748 
  749 void
  750 prison_free(struct prison *pr)
  751 {
  752 
  753         mtx_lock(&pr->pr_mtx);
  754         prison_free_locked(pr);
  755 }
  756 
  757 static void
  758 prison_complete(void *context, int pending)
  759 {
  760         struct prison_service *psrv;
  761         struct prison *pr;
  762         int vfslocked;
  763 
  764         pr = (struct prison *)context;
  765 
  766         sx_xlock(&allprison_lock);
  767         LIST_REMOVE(pr, pr_list);
  768         prisoncount--;
  769         sx_downgrade(&allprison_lock);
  770         TAILQ_FOREACH(psrv, &prison_services, ps_next) {
  771                 psrv->ps_destroy(psrv, pr);
  772         }
  773         sx_sunlock(&allprison_lock);
  774 
  775         cpuset_rel(pr->pr_cpuset);
  776 
  777         if (pr->pr_slots != NULL)
  778                 free(pr->pr_slots, M_PRISON);
  779 
  780         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
  781         vrele(pr->pr_root);
  782         VFS_UNLOCK_GIANT(vfslocked);
  783 
  784         mtx_destroy(&pr->pr_mtx);
  785         free(pr->pr_linux, M_PRISON);
  786 #ifdef INET6
  787         free(pr->pr_ip6, M_PRISON);
  788 #endif
  789 #ifdef INET
  790         free(pr->pr_ip4, M_PRISON);
  791 #endif
  792         free(pr, M_PRISON);
  793 }
  794 
  795 void
  796 prison_hold_locked(struct prison *pr)
  797 {
  798 
  799         mtx_assert(&pr->pr_mtx, MA_OWNED);
  800         KASSERT(pr->pr_ref > 0,
  801             ("Trying to hold dead prison (id=%d).", pr->pr_id));
  802         pr->pr_ref++;
  803 }
  804 
  805 void
  806 prison_hold(struct prison *pr)
  807 {
  808 
  809         mtx_lock(&pr->pr_mtx);
  810         prison_hold_locked(pr);
  811         mtx_unlock(&pr->pr_mtx);
  812 }
  813 
  814 void
  815 prison_proc_hold(struct prison *pr)
  816 {
  817 
  818         mtx_lock(&pr->pr_mtx);
  819         KASSERT(pr->pr_state == PRISON_STATE_ALIVE,
  820             ("Cannot add a process to a non-alive prison (id=%d).", pr->pr_id));
  821         pr->pr_nprocs++;
  822         mtx_unlock(&pr->pr_mtx);
  823 }
  824 
  825 void
  826 prison_proc_free(struct prison *pr)
  827 {
  828 
  829         mtx_lock(&pr->pr_mtx);
  830         KASSERT(pr->pr_state == PRISON_STATE_ALIVE && pr->pr_nprocs > 0,
  831             ("Trying to kill a process in a dead prison (id=%d).", pr->pr_id));
  832         pr->pr_nprocs--;
  833         if (pr->pr_nprocs == 0)
  834                 pr->pr_state = PRISON_STATE_DYING;
  835         mtx_unlock(&pr->pr_mtx);
  836 }
  837 
  838 
  839 #ifdef INET
  840 /*
  841  * Pass back primary IPv4 address of this jail.
  842  *
  843  * If not jailed return success but do not alter the address.  Caller has to
  844  * make sure to intialize it correctly (e.g. INADDR_ANY).
  845  *
  846  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
  847  * Address returned in NBO.
  848  */
  849 int
  850 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
  851 {
  852 
  853         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
  854         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
  855 
  856         if (!jailed(cred))
  857                 /* Do not change address passed in. */
  858                 return (0);
  859 
  860         if (cred->cr_prison->pr_ip4 == NULL)
  861                 return (EAFNOSUPPORT);
  862 
  863         ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
  864         return (0);
  865 }
  866 
  867 /*
  868  * Make sure our (source) address is set to something meaningful to this
  869  * jail.
  870  *
  871  * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
  872  * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
  873  * Address passed in in NBO and returned in NBO.
  874  */
  875 int
  876 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
  877 {
  878         struct in_addr ia0;
  879 
  880         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
  881         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
  882 
  883         if (!jailed(cred))
  884                 return (0);
  885         if (cred->cr_prison->pr_ip4 == NULL)
  886                 return (EAFNOSUPPORT);
  887 
  888         ia0.s_addr = ntohl(ia->s_addr);
  889         if (ia0.s_addr == INADDR_LOOPBACK) {
  890                 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
  891                 return (0);
  892         }
  893 
  894         if (ia0.s_addr == INADDR_ANY) {
  895                 /*
  896                  * In case there is only 1 IPv4 address, bind directly.
  897                  */
  898                 if (cred->cr_prison->pr_ip4s == 1)
  899                         ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
  900                 return (0);
  901         }
  902 
  903         return (_prison_check_ip4(cred->cr_prison, ia));
  904 }
  905 
  906 /*
  907  * Rewrite destination address in case we will connect to loopback address.
  908  *
  909  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
  910  * Address passed in in NBO and returned in NBO.
  911  */
  912 int
  913 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
  914 {
  915 
  916         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
  917         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
  918 
  919         if (!jailed(cred))
  920                 return (0);
  921         if (cred->cr_prison->pr_ip4 == NULL)
  922                 return (EAFNOSUPPORT);
  923 
  924         if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
  925                 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
  926                 return (0);
  927         }
  928 
  929         /*
  930          * Return success because nothing had to be changed.
  931          */
  932         return (0);
  933 }
  934 
  935 /*
  936  * Check if given address belongs to the jail referenced by cred/prison.
  937  *
  938  * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
  939  * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
  940  * Address passed in in NBO.
  941  */
  942 static int
  943 _prison_check_ip4(struct prison *pr, struct in_addr *ia)
  944 {
  945         int i, a, z, d;
  946 
  947         /*
  948          * Check the primary IP.
  949          */
  950         if (pr->pr_ip4[0].s_addr == ia->s_addr)
  951                 return (0);
  952 
  953         /*
  954          * All the other IPs are sorted so we can do a binary search.
  955          */
  956         a = 0;
  957         z = pr->pr_ip4s - 2;
  958         while (a <= z) {
  959                 i = (a + z) / 2;
  960                 d = qcmp_v4(&pr->pr_ip4[i+1], ia);
  961                 if (d > 0)
  962                         z = i - 1;
  963                 else if (d < 0)
  964                         a = i + 1;
  965                 else
  966                         return (0);
  967         }
  968 
  969         return (EADDRNOTAVAIL);
  970 }
  971 
  972 int
  973 prison_check_ip4(struct ucred *cred, struct in_addr *ia)
  974 {
  975 
  976         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
  977         KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
  978 
  979         if (!jailed(cred))
  980                 return (0);
  981         if (cred->cr_prison->pr_ip4 == NULL)
  982                 return (EAFNOSUPPORT);
  983 
  984         return (_prison_check_ip4(cred->cr_prison, ia));
  985 }
  986 #endif
  987 
  988 #ifdef INET6
  989 /*
  990  * Pass back primary IPv6 address for this jail.
  991  *
  992  * If not jailed return success but do not alter the address.  Caller has to
  993  * make sure to intialize it correctly (e.g. IN6ADDR_ANY_INIT).
  994  *
  995  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
  996  */
  997 int
  998 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
  999 {
 1000 
 1001         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 1002         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 1003 
 1004         if (!jailed(cred))
 1005                 return (0);
 1006         if (cred->cr_prison->pr_ip6 == NULL)
 1007                 return (EAFNOSUPPORT);
 1008 
 1009         bcopy(&cred->cr_prison->pr_ip6[0], ia6, sizeof(struct in6_addr));
 1010         return (0);
 1011 }
 1012 
 1013 /*
 1014  * Make sure our (source) address is set to something meaningful to this jail.
 1015  *
 1016  * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
 1017  * when needed while binding.
 1018  *
 1019  * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
 1020  * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
 1021  */
 1022 int
 1023 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
 1024 {
 1025 
 1026         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 1027         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 1028 
 1029         if (!jailed(cred))
 1030                 return (0);
 1031         if (cred->cr_prison->pr_ip6 == NULL)
 1032                 return (EAFNOSUPPORT);
 1033 
 1034         if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 1035                 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
 1036                     sizeof(struct in6_addr));
 1037                 return (0);
 1038         }
 1039 
 1040         if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
 1041                 /*
 1042                  * In case there is only 1 IPv6 address, and v6only is true,
 1043                  * then bind directly.
 1044                  */
 1045                 if (v6only != 0 && cred->cr_prison->pr_ip6s == 1)
 1046                         bcopy(&cred->cr_prison->pr_ip6[0], ia6,
 1047                             sizeof(struct in6_addr));
 1048                 return (0);
 1049         }
 1050 
 1051         return (_prison_check_ip6(cred->cr_prison, ia6));
 1052 }
 1053 
 1054 /*
 1055  * Rewrite destination address in case we will connect to loopback address.
 1056  *
 1057  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
 1058  */
 1059 int
 1060 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
 1061 {
 1062 
 1063         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 1064         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 1065 
 1066         if (!jailed(cred))
 1067                 return (0);
 1068         if (cred->cr_prison->pr_ip6 == NULL)
 1069                 return (EAFNOSUPPORT);
 1070 
 1071         if (IN6_IS_ADDR_LOOPBACK(ia6)) {
 1072                 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
 1073                     sizeof(struct in6_addr));
 1074                 return (0);
 1075         }
 1076 
 1077         /*
 1078          * Return success because nothing had to be changed.
 1079          */
 1080         return (0);
 1081 }
 1082 
 1083 /*
 1084  * Check if given address belongs to the jail referenced by cred/prison.
 1085  *
 1086  * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
 1087  * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
 1088  */
 1089 static int
 1090 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
 1091 {
 1092         int i, a, z, d;
 1093 
 1094         /*
 1095          * Check the primary IP.
 1096          */
 1097         if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
 1098                 return (0);
 1099 
 1100         /*
 1101          * All the other IPs are sorted so we can do a binary search.
 1102          */
 1103         a = 0;
 1104         z = pr->pr_ip6s - 2;
 1105         while (a <= z) {
 1106                 i = (a + z) / 2;
 1107                 d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
 1108                 if (d > 0)
 1109                         z = i - 1;
 1110                 else if (d < 0)
 1111                         a = i + 1;
 1112                 else
 1113                         return (0);
 1114         }
 1115 
 1116         return (EADDRNOTAVAIL);
 1117 }
 1118 
 1119 int
 1120 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
 1121 {
 1122 
 1123         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 1124         KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
 1125 
 1126         if (!jailed(cred))
 1127                 return (0);
 1128         if (cred->cr_prison->pr_ip6 == NULL)
 1129                 return (EAFNOSUPPORT);
 1130 
 1131         return (_prison_check_ip6(cred->cr_prison, ia6));
 1132 }
 1133 #endif
 1134 
 1135 /*
 1136  * Check if a jail supports the given address family.
 1137  *
 1138  * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
 1139  * if not.
 1140  */
 1141 int
 1142 prison_check_af(struct ucred *cred, int af)
 1143 {
 1144         int error;
 1145 
 1146         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 1147 
 1148 
 1149         if (!jailed(cred))
 1150                 return (0);
 1151 
 1152         error = 0;
 1153         switch (af)
 1154         {
 1155 #ifdef INET
 1156         case AF_INET:
 1157                 if (cred->cr_prison->pr_ip4 == NULL)
 1158                         error = EAFNOSUPPORT;
 1159                 break;
 1160 #endif
 1161 #ifdef INET6
 1162         case AF_INET6:
 1163                 if (cred->cr_prison->pr_ip6 == NULL)
 1164                         error = EAFNOSUPPORT;
 1165                 break;
 1166 #endif
 1167         case AF_LOCAL:
 1168         case AF_ROUTE:
 1169                 break;
 1170         default:
 1171                 if (jail_socket_unixiproute_only)
 1172                         error = EAFNOSUPPORT;
 1173         }
 1174         return (error);
 1175 }
 1176 
 1177 /*
 1178  * Check if given address belongs to the jail referenced by cred (wrapper to
 1179  * prison_check_ip[46]).
 1180  *
 1181  * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
 1182  * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow
 1183  * the address family.  IPv4 Address passed in in NBO.
 1184  */
 1185 int
 1186 prison_if(struct ucred *cred, struct sockaddr *sa)
 1187 {
 1188 #ifdef INET
 1189         struct sockaddr_in *sai;
 1190 #endif
 1191 #ifdef INET6
 1192         struct sockaddr_in6 *sai6;
 1193 #endif
 1194         int error;
 1195 
 1196         KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 1197         KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 1198 
 1199         error = 0;
 1200         switch (sa->sa_family)
 1201         {
 1202 #ifdef INET
 1203         case AF_INET:
 1204                 sai = (struct sockaddr_in *)sa;
 1205                 error = prison_check_ip4(cred, &sai->sin_addr);
 1206                 break;
 1207 #endif
 1208 #ifdef INET6
 1209         case AF_INET6:
 1210                 sai6 = (struct sockaddr_in6 *)sa;
 1211                 error = prison_check_ip6(cred, &sai6->sin6_addr);
 1212                 break;
 1213 #endif
 1214         default:
 1215                 if (jailed(cred) && jail_socket_unixiproute_only)
 1216                         error = EAFNOSUPPORT;
 1217         }
 1218         return (error);
 1219 }
 1220 
 1221 /*
 1222  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
 1223  */
 1224 int
 1225 prison_check(struct ucred *cred1, struct ucred *cred2)
 1226 {
 1227 
 1228         if (jailed(cred1)) {
 1229                 if (!jailed(cred2))
 1230                         return (ESRCH);
 1231                 if (cred2->cr_prison != cred1->cr_prison)
 1232                         return (ESRCH);
 1233         }
 1234 
 1235         return (0);
 1236 }
 1237 
 1238 /*
 1239  * Return 1 if the passed credential is in a jail, otherwise 0.
 1240  */
 1241 int
 1242 jailed(struct ucred *cred)
 1243 {
 1244 
 1245         return (cred->cr_prison != NULL);
 1246 }
 1247 
 1248 /*
 1249  * Return the correct hostname for the passed credential.
 1250  */
 1251 void
 1252 getcredhostname(struct ucred *cred, char *buf, size_t size)
 1253 {
 1254 
 1255         if (jailed(cred)) {
 1256                 mtx_lock(&cred->cr_prison->pr_mtx);
 1257                 strlcpy(buf, cred->cr_prison->pr_host, size);
 1258                 mtx_unlock(&cred->cr_prison->pr_mtx);
 1259         } else
 1260                 strlcpy(buf, hostname, size);
 1261 }
 1262 
 1263 /*
 1264  * Determine whether the subject represented by cred can "see"
 1265  * status of a mount point.
 1266  * Returns: 0 for permitted, ENOENT otherwise.
 1267  * XXX: This function should be called cr_canseemount() and should be
 1268  *      placed in kern_prot.c.
 1269  */
 1270 int
 1271 prison_canseemount(struct ucred *cred, struct mount *mp)
 1272 {
 1273         struct prison *pr;
 1274         struct statfs *sp;
 1275         size_t len;
 1276 
 1277         if (!jailed(cred) || jail_enforce_statfs == 0)
 1278                 return (0);
 1279         pr = cred->cr_prison;
 1280         if (pr->pr_root->v_mount == mp)
 1281                 return (0);
 1282         if (jail_enforce_statfs == 2)
 1283                 return (ENOENT);
 1284         /*
 1285          * If jail's chroot directory is set to "/" we should be able to see
 1286          * all mount-points from inside a jail.
 1287          * This is ugly check, but this is the only situation when jail's
 1288          * directory ends with '/'.
 1289          */
 1290         if (strcmp(pr->pr_path, "/") == 0)
 1291                 return (0);
 1292         len = strlen(pr->pr_path);
 1293         sp = &mp->mnt_stat;
 1294         if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
 1295                 return (ENOENT);
 1296         /*
 1297          * Be sure that we don't have situation where jail's root directory
 1298          * is "/some/path" and mount point is "/some/pathpath".
 1299          */
 1300         if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
 1301                 return (ENOENT);
 1302         return (0);
 1303 }
 1304 
 1305 void
 1306 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
 1307 {
 1308         char jpath[MAXPATHLEN];
 1309         struct prison *pr;
 1310         size_t len;
 1311 
 1312         if (!jailed(cred) || jail_enforce_statfs == 0)
 1313                 return;
 1314         pr = cred->cr_prison;
 1315         if (prison_canseemount(cred, mp) != 0) {
 1316                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 1317                 strlcpy(sp->f_mntonname, "[restricted]",
 1318                     sizeof(sp->f_mntonname));
 1319                 return;
 1320         }
 1321         if (pr->pr_root->v_mount == mp) {
 1322                 /*
 1323                  * Clear current buffer data, so we are sure nothing from
 1324                  * the valid path left there.
 1325                  */
 1326                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 1327                 *sp->f_mntonname = '/';
 1328                 return;
 1329         }
 1330         /*
 1331          * If jail's chroot directory is set to "/" we should be able to see
 1332          * all mount-points from inside a jail.
 1333          */
 1334         if (strcmp(pr->pr_path, "/") == 0)
 1335                 return;
 1336         len = strlen(pr->pr_path);
 1337         strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
 1338         /*
 1339          * Clear current buffer data, so we are sure nothing from
 1340          * the valid path left there.
 1341          */
 1342         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 1343         if (*jpath == '\0') {
 1344                 /* Should never happen. */
 1345                 *sp->f_mntonname = '/';
 1346         } else {
 1347                 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
 1348         }
 1349 }
 1350 
 1351 /*
 1352  * Check with permission for a specific privilege is granted within jail.  We
 1353  * have a specific list of accepted privileges; the rest are denied.
 1354  */
 1355 int
 1356 prison_priv_check(struct ucred *cred, int priv)
 1357 {
 1358 
 1359         if (!jailed(cred))
 1360                 return (0);
 1361 
 1362         switch (priv) {
 1363 
 1364                 /*
 1365                  * Allow ktrace privileges for root in jail.
 1366                  */
 1367         case PRIV_KTRACE:
 1368 
 1369 #if 0
 1370                 /*
 1371                  * Allow jailed processes to configure audit identity and
 1372                  * submit audit records (login, etc).  In the future we may
 1373                  * want to further refine the relationship between audit and
 1374                  * jail.
 1375                  */
 1376         case PRIV_AUDIT_GETAUDIT:
 1377         case PRIV_AUDIT_SETAUDIT:
 1378         case PRIV_AUDIT_SUBMIT:
 1379 #endif
 1380 
 1381                 /*
 1382                  * Allow jailed processes to manipulate process UNIX
 1383                  * credentials in any way they see fit.
 1384                  */
 1385         case PRIV_CRED_SETUID:
 1386         case PRIV_CRED_SETEUID:
 1387         case PRIV_CRED_SETGID:
 1388         case PRIV_CRED_SETEGID:
 1389         case PRIV_CRED_SETGROUPS:
 1390         case PRIV_CRED_SETREUID:
 1391         case PRIV_CRED_SETREGID:
 1392         case PRIV_CRED_SETRESUID:
 1393         case PRIV_CRED_SETRESGID:
 1394 
 1395                 /*
 1396                  * Jail implements visibility constraints already, so allow
 1397                  * jailed root to override uid/gid-based constraints.
 1398                  */
 1399         case PRIV_SEEOTHERGIDS:
 1400         case PRIV_SEEOTHERUIDS:
 1401 
 1402                 /*
 1403                  * Jail implements inter-process debugging limits already, so
 1404                  * allow jailed root various debugging privileges.
 1405                  */
 1406         case PRIV_DEBUG_DIFFCRED:
 1407         case PRIV_DEBUG_SUGID:
 1408         case PRIV_DEBUG_UNPRIV:
 1409 
 1410                 /*
 1411                  * Allow jail to set various resource limits and login
 1412                  * properties, and for now, exceed process resource limits.
 1413                  */
 1414         case PRIV_PROC_LIMIT:
 1415         case PRIV_PROC_SETLOGIN:
 1416         case PRIV_PROC_SETRLIMIT:
 1417 
 1418                 /*
 1419                  * System V and POSIX IPC privileges are granted in jail.
 1420                  */
 1421         case PRIV_IPC_READ:
 1422         case PRIV_IPC_WRITE:
 1423         case PRIV_IPC_ADMIN:
 1424         case PRIV_IPC_MSGSIZE:
 1425         case PRIV_MQ_ADMIN:
 1426 
 1427                 /*
 1428                  * Jail implements its own inter-process limits, so allow
 1429                  * root processes in jail to change scheduling on other
 1430                  * processes in the same jail.  Likewise for signalling.
 1431                  */
 1432         case PRIV_SCHED_DIFFCRED:
 1433         case PRIV_SCHED_CPUSET:
 1434         case PRIV_SIGNAL_DIFFCRED:
 1435         case PRIV_SIGNAL_SUGID:
 1436 
 1437                 /*
 1438                  * Allow jailed processes to write to sysctls marked as jail
 1439                  * writable.
 1440                  */
 1441         case PRIV_SYSCTL_WRITEJAIL:
 1442 
 1443                 /*
 1444                  * Allow root in jail to manage a variety of quota
 1445                  * properties.  These should likely be conditional on a
 1446                  * configuration option.
 1447                  */
 1448         case PRIV_VFS_GETQUOTA:
 1449         case PRIV_VFS_SETQUOTA:
 1450 
 1451                 /*
 1452                  * Since Jail relies on chroot() to implement file system
 1453                  * protections, grant many VFS privileges to root in jail.
 1454                  * Be careful to exclude mount-related and NFS-related
 1455                  * privileges.
 1456                  */
 1457         case PRIV_VFS_READ:
 1458         case PRIV_VFS_WRITE:
 1459         case PRIV_VFS_ADMIN:
 1460         case PRIV_VFS_EXEC:
 1461         case PRIV_VFS_LOOKUP:
 1462         case PRIV_VFS_BLOCKRESERVE:     /* XXXRW: Slightly surprising. */
 1463         case PRIV_VFS_CHFLAGS_DEV:
 1464         case PRIV_VFS_CHOWN:
 1465         case PRIV_VFS_CHROOT:
 1466         case PRIV_VFS_RETAINSUGID:
 1467         case PRIV_VFS_FCHROOT:
 1468         case PRIV_VFS_LINK:
 1469         case PRIV_VFS_SETGID:
 1470         case PRIV_VFS_STICKYFILE:
 1471                 return (0);
 1472 
 1473                 /*
 1474                  * Depending on the global setting, allow privilege of
 1475                  * setting system flags.
 1476                  */
 1477         case PRIV_VFS_SYSFLAGS:
 1478                 if (jail_chflags_allowed)
 1479                         return (0);
 1480                 else
 1481                         return (EPERM);
 1482 
 1483                 /*
 1484                  * Depending on the global setting, allow privilege of
 1485                  * mounting/unmounting file systems.
 1486                  */
 1487         case PRIV_VFS_MOUNT:
 1488         case PRIV_VFS_UNMOUNT:
 1489         case PRIV_VFS_MOUNT_NONUSER:
 1490         case PRIV_VFS_MOUNT_OWNER:
 1491                 if (jail_mount_allowed)
 1492                         return (0);
 1493                 else
 1494                         return (EPERM);
 1495 
 1496                 /*
 1497                  * Allow jailed root to bind reserved ports and reuse in-use
 1498                  * ports.
 1499                  */
 1500         case PRIV_NETINET_RESERVEDPORT:
 1501         case PRIV_NETINET_REUSEPORT:
 1502                 return (0);
 1503 
 1504                 /*
 1505                  * Allow jailed root to set certian IPv4/6 (option) headers.
 1506                  */
 1507         case PRIV_NETINET_SETHDROPTS:
 1508                 return (0);
 1509 
 1510                 /*
 1511                  * Conditionally allow creating raw sockets in jail.
 1512                  */
 1513         case PRIV_NETINET_RAW:
 1514                 if (jail_allow_raw_sockets)
 1515                         return (0);
 1516                 else
 1517                         return (EPERM);
 1518 
 1519                 /*
 1520                  * Since jail implements its own visibility limits on netstat
 1521                  * sysctls, allow getcred.  This allows identd to work in
 1522                  * jail.
 1523                  */
 1524         case PRIV_NETINET_GETCRED:
 1525                 return (0);
 1526 
 1527         default:
 1528                 /*
 1529                  * In all remaining cases, deny the privilege request.  This
 1530                  * includes almost all network privileges, many system
 1531                  * configuration privileges.
 1532                  */
 1533                 return (EPERM);
 1534         }
 1535 }
 1536 
 1537 /*
 1538  * Register jail service. Provides 'create' and 'destroy' methods.
 1539  * 'create' method will be called for every existing jail and all
 1540  * jails in the future as they beeing created.
 1541  * 'destroy' method will be called for every jail going away and
 1542  * for all existing jails at the time of service deregistration.
 1543  */
 1544 struct prison_service *
 1545 prison_service_register(const char *name, prison_create_t create,
 1546     prison_destroy_t destroy)
 1547 {
 1548         struct prison_service *psrv, *psrv2;
 1549         struct prison *pr;
 1550         int reallocate = 1, slotno = 0;
 1551         void **slots, **oldslots;
 1552 
 1553         psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
 1554             M_WAITOK | M_ZERO);
 1555         psrv->ps_create = create;
 1556         psrv->ps_destroy = destroy;
 1557         strcpy(psrv->ps_name, name);
 1558         /*
 1559          * Grab the allprison_lock here, so we won't miss any jail
 1560          * creation/destruction.
 1561          */
 1562         sx_xlock(&allprison_lock);
 1563 #ifdef INVARIANTS
 1564         /*
 1565          * Verify if service is not already registered.
 1566          */
 1567         TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
 1568                 KASSERT(strcmp(psrv2->ps_name, name) != 0,
 1569                     ("jail service %s already registered", name));
 1570         }
 1571 #endif
 1572         /*
 1573          * Find free slot. When there is no existing free slot available,
 1574          * allocate one at the end.
 1575          */
 1576         TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
 1577                 if (psrv2->ps_slotno != slotno) {
 1578                         KASSERT(slotno < psrv2->ps_slotno,
 1579                             ("Invalid slotno (slotno=%d >= ps_slotno=%d",
 1580                             slotno, psrv2->ps_slotno));
 1581                         /* We found free slot. */
 1582                         reallocate = 0;
 1583                         break;
 1584                 }
 1585                 slotno++;
 1586         }
 1587         psrv->ps_slotno = slotno;
 1588         /*
 1589          * Keep the list sorted by slot number.
 1590          */
 1591         if (psrv2 != NULL) {
 1592                 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
 1593                 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
 1594         } else {
 1595                 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
 1596                 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
 1597         }
 1598         prison_service_slots++;
 1599         sx_downgrade(&allprison_lock);
 1600         /*
 1601          * Allocate memory for new slot if we didn't found empty one.
 1602          * Do not use realloc(9), because pr_slots is protected with a mutex,
 1603          * so we can't sleep.
 1604          */
 1605         LIST_FOREACH(pr, &allprison, pr_list) {
 1606                 if (reallocate) {
 1607                         /* First allocate memory with M_WAITOK. */
 1608                         slots = malloc(sizeof(*slots) * prison_service_slots,
 1609                             M_PRISON, M_WAITOK);
 1610                         /* Now grab the mutex and replace pr_slots. */
 1611                         mtx_lock(&pr->pr_mtx);
 1612                         oldslots = pr->pr_slots;
 1613                         if (psrv->ps_slotno > 0) {
 1614                                 bcopy(oldslots, slots,
 1615                                     sizeof(*slots) * (prison_service_slots - 1));
 1616                         }
 1617                         slots[psrv->ps_slotno] = NULL;
 1618                         pr->pr_slots = slots;
 1619                         mtx_unlock(&pr->pr_mtx);
 1620                         if (oldslots != NULL)
 1621                                 free(oldslots, M_PRISON);
 1622                 }
 1623                 /*
 1624                  * Call 'create' method for each existing jail.
 1625                  */
 1626                 psrv->ps_create(psrv, pr);
 1627         }
 1628         sx_sunlock(&allprison_lock);
 1629 
 1630         return (psrv);
 1631 }
 1632 
 1633 void
 1634 prison_service_deregister(struct prison_service *psrv)
 1635 {
 1636         struct prison *pr;
 1637         void **slots, **oldslots;
 1638         int last = 0;
 1639 
 1640         sx_xlock(&allprison_lock);
 1641         if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
 1642                 last = 1;
 1643         TAILQ_REMOVE(&prison_services, psrv, ps_next);
 1644         prison_service_slots--;
 1645         sx_downgrade(&allprison_lock);
 1646         LIST_FOREACH(pr, &allprison, pr_list) {
 1647                 /*
 1648                  * Call 'destroy' method for every currently existing jail.
 1649                  */
 1650                 psrv->ps_destroy(psrv, pr);
 1651                 /*
 1652                  * If this is the last slot, free the memory allocated for it.
 1653                  */
 1654                 if (last) {
 1655                         if (prison_service_slots == 0)
 1656                                 slots = NULL;
 1657                         else {
 1658                                 slots = malloc(sizeof(*slots) * prison_service_slots,
 1659                                     M_PRISON, M_WAITOK);
 1660                         }
 1661                         mtx_lock(&pr->pr_mtx);
 1662                         oldslots = pr->pr_slots;
 1663                         /*
 1664                          * We require setting slot to NULL after freeing it,
 1665                          * this way we can check for memory leaks here.
 1666                          */
 1667                         KASSERT(oldslots[psrv->ps_slotno] == NULL,
 1668                             ("Slot %d (service %s, jailid=%d) still contains data?",
 1669                              psrv->ps_slotno, psrv->ps_name, pr->pr_id));
 1670                         if (psrv->ps_slotno > 0) {
 1671                                 bcopy(oldslots, slots,
 1672                                     sizeof(*slots) * prison_service_slots);
 1673                         }
 1674                         pr->pr_slots = slots;
 1675                         mtx_unlock(&pr->pr_mtx);
 1676                         KASSERT(oldslots != NULL, ("oldslots == NULL"));
 1677                         free(oldslots, M_PRISON);
 1678                 }
 1679         }
 1680         sx_sunlock(&allprison_lock);
 1681         free(psrv, M_PRISON);
 1682 }
 1683 
 1684 /*
 1685  * Function sets data for the given jail in slot assigned for the given
 1686  * jail service.
 1687  */
 1688 void
 1689 prison_service_data_set(struct prison_service *psrv, struct prison *pr,
 1690     void *data)
 1691 {
 1692 
 1693         mtx_assert(&pr->pr_mtx, MA_OWNED);
 1694         pr->pr_slots[psrv->ps_slotno] = data;
 1695 }
 1696 
 1697 /*
 1698  * Function clears slots assigned for the given jail service in the given
 1699  * prison structure and returns current slot data.
 1700  */
 1701 void *
 1702 prison_service_data_del(struct prison_service *psrv, struct prison *pr)
 1703 {
 1704         void *data;
 1705 
 1706         mtx_assert(&pr->pr_mtx, MA_OWNED);
 1707         data = pr->pr_slots[psrv->ps_slotno];
 1708         pr->pr_slots[psrv->ps_slotno] = NULL;
 1709         return (data);
 1710 }
 1711 
 1712 /*
 1713  * Function returns current data from the slot assigned to the given jail
 1714  * service for the given jail.
 1715  */
 1716 void *
 1717 prison_service_data_get(struct prison_service *psrv, struct prison *pr)
 1718 {
 1719 
 1720         mtx_assert(&pr->pr_mtx, MA_OWNED);
 1721         return (pr->pr_slots[psrv->ps_slotno]);
 1722 }
 1723 
 1724 static int
 1725 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 1726 {
 1727         struct xprison *xp, *sxp;
 1728         struct prison *pr;
 1729         char *p;
 1730         size_t len;
 1731         int count, error;
 1732 
 1733         if (jailed(req->td->td_ucred))
 1734                 return (0);
 1735 
 1736         sx_slock(&allprison_lock);
 1737         if ((count = prisoncount) == 0) {
 1738                 sx_sunlock(&allprison_lock);
 1739                 return (0);
 1740         }
 1741 
 1742         len = sizeof(*xp) * count;
 1743         LIST_FOREACH(pr, &allprison, pr_list) {
 1744 #ifdef INET
 1745                 len += pr->pr_ip4s * sizeof(struct in_addr);
 1746 #endif
 1747 #ifdef INET6
 1748                 len += pr->pr_ip6s * sizeof(struct in6_addr);
 1749 #endif
 1750         }
 1751 
 1752         sxp = xp = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 1753 
 1754         LIST_FOREACH(pr, &allprison, pr_list) {
 1755                 xp->pr_version = XPRISON_VERSION;
 1756                 xp->pr_id = pr->pr_id;
 1757                 xp->pr_state = pr->pr_state;
 1758                 xp->pr_cpusetid = pr->pr_cpuset->cs_id;
 1759                 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
 1760                 mtx_lock(&pr->pr_mtx);
 1761                 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
 1762                 strlcpy(xp->pr_name, pr->pr_name, sizeof(xp->pr_name));
 1763                 mtx_unlock(&pr->pr_mtx);
 1764 #ifdef INET
 1765                 xp->pr_ip4s = pr->pr_ip4s;
 1766 #endif
 1767 #ifdef INET6
 1768                 xp->pr_ip6s = pr->pr_ip6s;
 1769 #endif
 1770                 p = (char *)(xp + 1);
 1771 #ifdef INET
 1772                 if (pr->pr_ip4s > 0) {
 1773                         bcopy(pr->pr_ip4, (struct in_addr *)p,
 1774                             pr->pr_ip4s * sizeof(struct in_addr));
 1775                         p += (pr->pr_ip4s * sizeof(struct in_addr));
 1776                 }
 1777 #endif
 1778 #ifdef INET6
 1779                 if (pr->pr_ip6s > 0) {
 1780                         bcopy(pr->pr_ip6, (struct in6_addr *)p,
 1781                             pr->pr_ip6s * sizeof(struct in6_addr));
 1782                         p += (pr->pr_ip6s * sizeof(struct in6_addr));
 1783                 }
 1784 #endif
 1785                 xp = (struct xprison *)p;
 1786         }
 1787         sx_sunlock(&allprison_lock);
 1788 
 1789         error = SYSCTL_OUT(req, sxp, len);
 1790         free(sxp, M_TEMP);
 1791         return (error);
 1792 }
 1793 
 1794 SYSCTL_OID(_security_jail, OID_AUTO, list,
 1795     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 1796     sysctl_jail_list, "S", "List of active jails");
 1797 
 1798 static int
 1799 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
 1800 {
 1801         int error, injail;
 1802 
 1803         injail = jailed(req->td->td_ucred);
 1804         error = SYSCTL_OUT(req, &injail, sizeof(injail));
 1805 
 1806         return (error);
 1807 }
 1808 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
 1809     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
 1810     sysctl_jail_jailed, "I", "Process in jail?");
 1811 
 1812 #ifdef DDB
 1813 DB_SHOW_COMMAND(jails, db_show_jails)
 1814 {
 1815         struct prison *pr;
 1816 #ifdef INET
 1817         struct in_addr ia;
 1818 #endif
 1819 #ifdef INET6
 1820         char ip6buf[INET6_ADDRSTRLEN];
 1821 #endif
 1822         const char *state;
 1823 #if defined(INET) || defined(INET6)
 1824         int i;
 1825 #endif
 1826 
 1827         db_printf(
 1828             "   JID  pr_ref  pr_nprocs  pr_ip4s  pr_ip6s\n");
 1829         db_printf(
 1830             "        Hostname                      Path\n");
 1831         db_printf(
 1832             "        Name                          State\n");
 1833         db_printf(
 1834             "        Cpusetid\n");
 1835         db_printf(
 1836             "        IP Address(es)\n");
 1837         LIST_FOREACH(pr, &allprison, pr_list) {
 1838                 db_printf("%6d  %6d  %9d  %7d  %7d\n",
 1839                     pr->pr_id, pr->pr_ref, pr->pr_nprocs,
 1840                     pr->pr_ip4s, pr->pr_ip6s);
 1841                 db_printf("%6s  %-29.29s %.74s\n",
 1842                     "", pr->pr_host, pr->pr_path);
 1843                 if (pr->pr_state < 0 || pr->pr_state >= (int)((sizeof(
 1844                     prison_states) / sizeof(struct prison_state))))
 1845                         state = "(bogus)";
 1846                 else
 1847                         state = prison_states[pr->pr_state].state_name;
 1848                 db_printf("%6s  %-29.29s %.74s\n",
 1849                     "", (pr->pr_name[0] != '\0') ? pr->pr_name : "", state);
 1850                 db_printf("%6s  %-6d\n",
 1851                     "", pr->pr_cpuset->cs_id);
 1852 #ifdef INET
 1853                 for (i=0; i < pr->pr_ip4s; i++) {
 1854                         ia.s_addr = pr->pr_ip4[i].s_addr;
 1855                         db_printf("%6s  %s\n", "", inet_ntoa(ia));
 1856                 }
 1857 #endif
 1858 #ifdef INET6
 1859                 for (i=0; i < pr->pr_ip6s; i++)
 1860                         db_printf("%6s  %s\n",
 1861                             "", ip6_sprintf(ip6buf, &pr->pr_ip6[i]));
 1862 #endif /* INET6 */
 1863                 if (db_pager_quit)
 1864                         break;
 1865         }
 1866 }
 1867 #endif /* DDB */

Cache object: 4efe60db20413aa62fc513b42895ab0a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.