The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * ----------------------------------------------------------------------------
    3  * "THE BEER-WARE LICENSE" (Revision 42):
    4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
    5  * can do whatever you want with this stuff. If we meet some day, and you think
    6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
    7  * ----------------------------------------------------------------------------
    8  */
    9 
   10 #include <sys/cdefs.h>
   11 __FBSDID("$FreeBSD$");
   12 
   13 #include "opt_mac.h"
   14 
   15 #include <sys/param.h>
   16 #include <sys/types.h>
   17 #include <sys/kernel.h>
   18 #include <sys/systm.h>
   19 #include <sys/errno.h>
   20 #include <sys/sysproto.h>
   21 #include <sys/malloc.h>
   22 #include <sys/priv.h>
   23 #include <sys/proc.h>
   24 #include <sys/taskqueue.h>
   25 #include <sys/jail.h>
   26 #include <sys/lock.h>
   27 #include <sys/mutex.h>
   28 #include <sys/sx.h>
   29 #include <sys/namei.h>
   30 #include <sys/mount.h>
   31 #include <sys/queue.h>
   32 #include <sys/socket.h>
   33 #include <sys/syscallsubr.h>
   34 #include <sys/sysctl.h>
   35 #include <sys/vnode.h>
   36 #include <net/if.h>
   37 #include <netinet/in.h>
   38 
   39 #include <security/mac/mac_framework.h>
   40 
   41 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
   42 
   43 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
   44     "Jail rules");
   45 
   46 int     jail_set_hostname_allowed = 1;
   47 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
   48     &jail_set_hostname_allowed, 0,
   49     "Processes in jail can set their hostnames");
   50 
   51 int     jail_socket_unixiproute_only = 1;
   52 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
   53     &jail_socket_unixiproute_only, 0,
   54     "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
   55 
   56 int     jail_sysvipc_allowed = 0;
   57 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
   58     &jail_sysvipc_allowed, 0,
   59     "Processes in jail can use System V IPC primitives");
   60 
   61 static int jail_enforce_statfs = 2;
   62 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
   63     &jail_enforce_statfs, 0,
   64     "Processes in jail cannot see all mounted file systems");
   65 
   66 int     jail_allow_raw_sockets = 0;
   67 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
   68     &jail_allow_raw_sockets, 0,
   69     "Prison root can create raw sockets");
   70 
   71 int     jail_chflags_allowed = 0;
   72 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
   73     &jail_chflags_allowed, 0,
   74     "Processes in jail can alter system file flags");
   75 
   76 int     jail_mount_allowed = 0;
   77 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
   78     &jail_mount_allowed, 0,
   79     "Processes in jail can mount/unmount jail-friendly file systems");
   80 
   81 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
   82 struct  prisonlist allprison;
   83 struct  sx allprison_lock;
   84 int     lastprid = 0;
   85 int     prisoncount = 0;
   86 
   87 /*
   88  * List of jail services. Protected by allprison_lock.
   89  */
   90 TAILQ_HEAD(prison_services_head, prison_service);
   91 static struct prison_services_head prison_services =
   92     TAILQ_HEAD_INITIALIZER(prison_services);
   93 static int prison_service_slots = 0;
   94 
   95 struct prison_service {
   96         prison_create_t ps_create;
   97         prison_destroy_t ps_destroy;
   98         int             ps_slotno;
   99         TAILQ_ENTRY(prison_service) ps_next;
  100         char    ps_name[0];
  101 };
  102 
  103 static void              init_prison(void *);
  104 static void              prison_complete(void *context, int pending);
  105 static int               sysctl_jail_list(SYSCTL_HANDLER_ARGS);
  106 
  107 static void
  108 init_prison(void *data __unused)
  109 {
  110 
  111         sx_init(&allprison_lock, "allprison");
  112         LIST_INIT(&allprison);
  113 }
  114 
  115 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
  116 
  117 /*
  118  * struct jail_args {
  119  *      struct jail *jail;
  120  * };
  121  */
  122 int
  123 jail(struct thread *td, struct jail_args *uap)
  124 {
  125         struct nameidata nd;
  126         struct prison *pr, *tpr;
  127         struct prison_service *psrv;
  128         struct jail j;
  129         struct jail_attach_args jaa;
  130         int vfslocked, error, tryprid;
  131 
  132         error = copyin(uap->jail, &j, sizeof(j));
  133         if (error)
  134                 return (error);
  135         if (j.version != 0)
  136                 return (EINVAL);
  137 
  138         MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
  139         mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
  140         pr->pr_ref = 1;
  141         error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
  142         if (error)
  143                 goto e_killmtx;
  144         NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
  145             pr->pr_path, td);
  146         error = namei(&nd);
  147         if (error)
  148                 goto e_killmtx;
  149         vfslocked = NDHASGIANT(&nd);
  150         pr->pr_root = nd.ni_vp;
  151         VOP_UNLOCK(nd.ni_vp, 0, td);
  152         NDFREE(&nd, NDF_ONLY_PNBUF);
  153         VFS_UNLOCK_GIANT(vfslocked);
  154         error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
  155         if (error)
  156                 goto e_dropvnref;
  157         pr->pr_ip = j.ip_number;
  158         pr->pr_linux = NULL;
  159         pr->pr_securelevel = securelevel;
  160         if (prison_service_slots == 0)
  161                 pr->pr_slots = NULL;
  162         else {
  163                 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
  164                     M_PRISON, M_ZERO | M_WAITOK);
  165         }
  166 
  167         /* Determine next pr_id and add prison to allprison list. */
  168         sx_xlock(&allprison_lock);
  169         tryprid = lastprid + 1;
  170         if (tryprid == JAIL_MAX)
  171                 tryprid = 1;
  172 next:
  173         LIST_FOREACH(tpr, &allprison, pr_list) {
  174                 if (tpr->pr_id == tryprid) {
  175                         tryprid++;
  176                         if (tryprid == JAIL_MAX) {
  177                                 sx_xunlock(&allprison_lock);
  178                                 error = EAGAIN;
  179                                 goto e_dropvnref;
  180                         }
  181                         goto next;
  182                 }
  183         }
  184         pr->pr_id = jaa.jid = lastprid = tryprid;
  185         LIST_INSERT_HEAD(&allprison, pr, pr_list);
  186         prisoncount++;
  187         sx_downgrade(&allprison_lock);
  188         TAILQ_FOREACH(psrv, &prison_services, ps_next) {
  189                 psrv->ps_create(psrv, pr);
  190         }
  191         sx_sunlock(&allprison_lock);
  192 
  193         error = jail_attach(td, &jaa);
  194         if (error)
  195                 goto e_dropprref;
  196         mtx_lock(&pr->pr_mtx);
  197         pr->pr_ref--;
  198         mtx_unlock(&pr->pr_mtx);
  199         td->td_retval[0] = jaa.jid;
  200         return (0);
  201 e_dropprref:
  202         sx_xlock(&allprison_lock);
  203         LIST_REMOVE(pr, pr_list);
  204         prisoncount--;
  205         sx_downgrade(&allprison_lock);
  206         TAILQ_FOREACH(psrv, &prison_services, ps_next) {
  207                 psrv->ps_destroy(psrv, pr);
  208         }
  209         sx_sunlock(&allprison_lock);
  210 e_dropvnref:
  211         if (pr->pr_slots != NULL)
  212                 FREE(pr->pr_slots, M_PRISON);
  213         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
  214         vrele(pr->pr_root);
  215         VFS_UNLOCK_GIANT(vfslocked);
  216 e_killmtx:
  217         mtx_destroy(&pr->pr_mtx);
  218         FREE(pr, M_PRISON);
  219         return (error);
  220 }
  221 
  222 /*
  223  * struct jail_attach_args {
  224  *      int jid;
  225  * };
  226  */
  227 int
  228 jail_attach(struct thread *td, struct jail_attach_args *uap)
  229 {
  230         struct proc *p;
  231         struct ucred *newcred, *oldcred;
  232         struct prison *pr;
  233         int vfslocked, error;
  234 
  235         /*
  236          * XXX: Note that there is a slight race here if two threads
  237          * in the same privileged process attempt to attach to two
  238          * different jails at the same time.  It is important for
  239          * user processes not to do this, or they might end up with
  240          * a process root from one prison, but attached to the jail
  241          * of another.
  242          */
  243         error = priv_check(td, PRIV_JAIL_ATTACH);
  244         if (error)
  245                 return (error);
  246 
  247         p = td->td_proc;
  248         sx_slock(&allprison_lock);
  249         pr = prison_find(uap->jid);
  250         if (pr == NULL) {
  251                 sx_sunlock(&allprison_lock);
  252                 return (EINVAL);
  253         }
  254         pr->pr_ref++;
  255         mtx_unlock(&pr->pr_mtx);
  256         sx_sunlock(&allprison_lock);
  257 
  258         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
  259         vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
  260         if ((error = change_dir(pr->pr_root, td)) != 0)
  261                 goto e_unlock;
  262 #ifdef MAC
  263         if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
  264                 goto e_unlock;
  265 #endif
  266         VOP_UNLOCK(pr->pr_root, 0, td);
  267         change_root(pr->pr_root, td);
  268         VFS_UNLOCK_GIANT(vfslocked);
  269 
  270         newcred = crget();
  271         PROC_LOCK(p);
  272         oldcred = p->p_ucred;
  273         setsugid(p);
  274         crcopy(newcred, oldcred);
  275         newcred->cr_prison = pr;
  276         p->p_ucred = newcred;
  277         PROC_UNLOCK(p);
  278         crfree(oldcred);
  279         return (0);
  280 e_unlock:
  281         VOP_UNLOCK(pr->pr_root, 0, td);
  282         VFS_UNLOCK_GIANT(vfslocked);
  283         mtx_lock(&pr->pr_mtx);
  284         pr->pr_ref--;
  285         mtx_unlock(&pr->pr_mtx);
  286         return (error);
  287 }
  288 
  289 /*
  290  * Returns a locked prison instance, or NULL on failure.
  291  */
  292 struct prison *
  293 prison_find(int prid)
  294 {
  295         struct prison *pr;
  296 
  297         sx_assert(&allprison_lock, SX_LOCKED);
  298         LIST_FOREACH(pr, &allprison, pr_list) {
  299                 if (pr->pr_id == prid) {
  300                         mtx_lock(&pr->pr_mtx);
  301                         if (pr->pr_ref == 0) {
  302                                 mtx_unlock(&pr->pr_mtx);
  303                                 break;
  304                         }
  305                         return (pr);
  306                 }
  307         }
  308         return (NULL);
  309 }
  310 
  311 void
  312 prison_free(struct prison *pr)
  313 {
  314 
  315         mtx_lock(&pr->pr_mtx);
  316         pr->pr_ref--;
  317         if (pr->pr_ref == 0) {
  318                 mtx_unlock(&pr->pr_mtx);
  319                 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
  320                 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
  321                 return;
  322         }
  323         mtx_unlock(&pr->pr_mtx);
  324 }
  325 
  326 static void
  327 prison_complete(void *context, int pending)
  328 {
  329         struct prison_service *psrv;
  330         struct prison *pr;
  331         int vfslocked;
  332 
  333         pr = (struct prison *)context;
  334 
  335         sx_xlock(&allprison_lock);
  336         LIST_REMOVE(pr, pr_list);
  337         prisoncount--;
  338         sx_downgrade(&allprison_lock);
  339         TAILQ_FOREACH(psrv, &prison_services, ps_next) {
  340                 psrv->ps_destroy(psrv, pr);
  341         }
  342         sx_sunlock(&allprison_lock);
  343         if (pr->pr_slots != NULL)
  344                 FREE(pr->pr_slots, M_PRISON);
  345 
  346         vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
  347         vrele(pr->pr_root);
  348         VFS_UNLOCK_GIANT(vfslocked);
  349 
  350         mtx_destroy(&pr->pr_mtx);
  351         if (pr->pr_linux != NULL)
  352                 FREE(pr->pr_linux, M_PRISON);
  353         FREE(pr, M_PRISON);
  354 }
  355 
  356 void
  357 prison_hold(struct prison *pr)
  358 {
  359 
  360         mtx_lock(&pr->pr_mtx);
  361         KASSERT(pr->pr_ref > 0,
  362             ("Trying to hold dead prison (id=%d).", pr->pr_id));
  363         pr->pr_ref++;
  364         mtx_unlock(&pr->pr_mtx);
  365 }
  366 
  367 u_int32_t
  368 prison_getip(struct ucred *cred)
  369 {
  370 
  371         return (cred->cr_prison->pr_ip);
  372 }
  373 
  374 int
  375 prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
  376 {
  377         u_int32_t tmp;
  378 
  379         if (!jailed(cred))
  380                 return (0);
  381         if (flag)
  382                 tmp = *ip;
  383         else
  384                 tmp = ntohl(*ip);
  385         if (tmp == INADDR_ANY) {
  386                 if (flag)
  387                         *ip = cred->cr_prison->pr_ip;
  388                 else
  389                         *ip = htonl(cred->cr_prison->pr_ip);
  390                 return (0);
  391         }
  392         if (tmp == INADDR_LOOPBACK) {
  393                 if (flag)
  394                         *ip = cred->cr_prison->pr_ip;
  395                 else
  396                         *ip = htonl(cred->cr_prison->pr_ip);
  397                 return (0);
  398         }
  399         if (cred->cr_prison->pr_ip != tmp)
  400                 return (1);
  401         return (0);
  402 }
  403 
  404 void
  405 prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
  406 {
  407         u_int32_t tmp;
  408 
  409         if (!jailed(cred))
  410                 return;
  411         if (flag)
  412                 tmp = *ip;
  413         else
  414                 tmp = ntohl(*ip);
  415         if (tmp == INADDR_LOOPBACK) {
  416                 if (flag)
  417                         *ip = cred->cr_prison->pr_ip;
  418                 else
  419                         *ip = htonl(cred->cr_prison->pr_ip);
  420                 return;
  421         }
  422         return;
  423 }
  424 
  425 int
  426 prison_if(struct ucred *cred, struct sockaddr *sa)
  427 {
  428         struct sockaddr_in *sai;
  429         int ok;
  430 
  431         sai = (struct sockaddr_in *)sa;
  432         if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
  433                 ok = 1;
  434         else if (sai->sin_family != AF_INET)
  435                 ok = 0;
  436         else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
  437                 ok = 1;
  438         else
  439                 ok = 0;
  440         return (ok);
  441 }
  442 
  443 /*
  444  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
  445  */
  446 int
  447 prison_check(struct ucred *cred1, struct ucred *cred2)
  448 {
  449 
  450         if (jailed(cred1)) {
  451                 if (!jailed(cred2))
  452                         return (ESRCH);
  453                 if (cred2->cr_prison != cred1->cr_prison)
  454                         return (ESRCH);
  455         }
  456 
  457         return (0);
  458 }
  459 
  460 /*
  461  * Return 1 if the passed credential is in a jail, otherwise 0.
  462  */
  463 int
  464 jailed(struct ucred *cred)
  465 {
  466 
  467         return (cred->cr_prison != NULL);
  468 }
  469 
  470 /*
  471  * Return the correct hostname for the passed credential.
  472  */
  473 void
  474 getcredhostname(struct ucred *cred, char *buf, size_t size)
  475 {
  476 
  477         if (jailed(cred)) {
  478                 mtx_lock(&cred->cr_prison->pr_mtx);
  479                 strlcpy(buf, cred->cr_prison->pr_host, size);
  480                 mtx_unlock(&cred->cr_prison->pr_mtx);
  481         } else
  482                 strlcpy(buf, hostname, size);
  483 }
  484 
  485 /*
  486  * Determine whether the subject represented by cred can "see"
  487  * status of a mount point.
  488  * Returns: 0 for permitted, ENOENT otherwise.
  489  * XXX: This function should be called cr_canseemount() and should be
  490  *      placed in kern_prot.c.
  491  */
  492 int
  493 prison_canseemount(struct ucred *cred, struct mount *mp)
  494 {
  495         struct prison *pr;
  496         struct statfs *sp;
  497         size_t len;
  498 
  499         if (!jailed(cred) || jail_enforce_statfs == 0)
  500                 return (0);
  501         pr = cred->cr_prison;
  502         if (pr->pr_root->v_mount == mp)
  503                 return (0);
  504         if (jail_enforce_statfs == 2)
  505                 return (ENOENT);
  506         /*
  507          * If jail's chroot directory is set to "/" we should be able to see
  508          * all mount-points from inside a jail.
  509          * This is ugly check, but this is the only situation when jail's
  510          * directory ends with '/'.
  511          */
  512         if (strcmp(pr->pr_path, "/") == 0)
  513                 return (0);
  514         len = strlen(pr->pr_path);
  515         sp = &mp->mnt_stat;
  516         if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
  517                 return (ENOENT);
  518         /*
  519          * Be sure that we don't have situation where jail's root directory
  520          * is "/some/path" and mount point is "/some/pathpath".
  521          */
  522         if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
  523                 return (ENOENT);
  524         return (0);
  525 }
  526 
  527 void
  528 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
  529 {
  530         char jpath[MAXPATHLEN];
  531         struct prison *pr;
  532         size_t len;
  533 
  534         if (!jailed(cred) || jail_enforce_statfs == 0)
  535                 return;
  536         pr = cred->cr_prison;
  537         if (prison_canseemount(cred, mp) != 0) {
  538                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
  539                 strlcpy(sp->f_mntonname, "[restricted]",
  540                     sizeof(sp->f_mntonname));
  541                 return;
  542         }
  543         if (pr->pr_root->v_mount == mp) {
  544                 /*
  545                  * Clear current buffer data, so we are sure nothing from
  546                  * the valid path left there.
  547                  */
  548                 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
  549                 *sp->f_mntonname = '/';
  550                 return;
  551         }
  552         /*
  553          * If jail's chroot directory is set to "/" we should be able to see
  554          * all mount-points from inside a jail.
  555          */
  556         if (strcmp(pr->pr_path, "/") == 0)
  557                 return;
  558         len = strlen(pr->pr_path);
  559         strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
  560         /*
  561          * Clear current buffer data, so we are sure nothing from
  562          * the valid path left there.
  563          */
  564         bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
  565         if (*jpath == '\0') {
  566                 /* Should never happen. */
  567                 *sp->f_mntonname = '/';
  568         } else {
  569                 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
  570         }
  571 }
  572 
  573 /*
  574  * Check with permission for a specific privilege is granted within jail.  We
  575  * have a specific list of accepted privileges; the rest are denied.
  576  */
  577 int
  578 prison_priv_check(struct ucred *cred, int priv)
  579 {
  580 
  581         if (!jailed(cred))
  582                 return (0);
  583 
  584         switch (priv) {
  585 
  586                 /*
  587                  * Allow ktrace privileges for root in jail.
  588                  */
  589         case PRIV_KTRACE:
  590 
  591 #if 0
  592                 /*
  593                  * Allow jailed processes to configure audit identity and
  594                  * submit audit records (login, etc).  In the future we may
  595                  * want to further refine the relationship between audit and
  596                  * jail.
  597                  */
  598         case PRIV_AUDIT_GETAUDIT:
  599         case PRIV_AUDIT_SETAUDIT:
  600         case PRIV_AUDIT_SUBMIT:
  601 #endif
  602 
  603                 /*
  604                  * Allow jailed processes to manipulate process UNIX
  605                  * credentials in any way they see fit.
  606                  */
  607         case PRIV_CRED_SETUID:
  608         case PRIV_CRED_SETEUID:
  609         case PRIV_CRED_SETGID:
  610         case PRIV_CRED_SETEGID:
  611         case PRIV_CRED_SETGROUPS:
  612         case PRIV_CRED_SETREUID:
  613         case PRIV_CRED_SETREGID:
  614         case PRIV_CRED_SETRESUID:
  615         case PRIV_CRED_SETRESGID:
  616 
  617                 /*
  618                  * Jail implements visibility constraints already, so allow
  619                  * jailed root to override uid/gid-based constraints.
  620                  */
  621         case PRIV_SEEOTHERGIDS:
  622         case PRIV_SEEOTHERUIDS:
  623 
  624                 /*
  625                  * Jail implements inter-process debugging limits already, so
  626                  * allow jailed root various debugging privileges.
  627                  */
  628         case PRIV_DEBUG_DIFFCRED:
  629         case PRIV_DEBUG_SUGID:
  630         case PRIV_DEBUG_UNPRIV:
  631 
  632                 /*
  633                  * Allow jail to set various resource limits and login
  634                  * properties, and for now, exceed process resource limits.
  635                  */
  636         case PRIV_PROC_LIMIT:
  637         case PRIV_PROC_SETLOGIN:
  638         case PRIV_PROC_SETRLIMIT:
  639 
  640                 /*
  641                  * System V and POSIX IPC privileges are granted in jail.
  642                  */
  643         case PRIV_IPC_READ:
  644         case PRIV_IPC_WRITE:
  645         case PRIV_IPC_ADMIN:
  646         case PRIV_IPC_MSGSIZE:
  647         case PRIV_MQ_ADMIN:
  648 
  649                 /*
  650                  * Jail implements its own inter-process limits, so allow
  651                  * root processes in jail to change scheduling on other
  652                  * processes in the same jail.  Likewise for signalling.
  653                  */
  654         case PRIV_SCHED_DIFFCRED:
  655         case PRIV_SIGNAL_DIFFCRED:
  656         case PRIV_SIGNAL_SUGID:
  657 
  658                 /*
  659                  * Allow jailed processes to write to sysctls marked as jail
  660                  * writable.
  661                  */
  662         case PRIV_SYSCTL_WRITEJAIL:
  663 
  664                 /*
  665                  * Allow root in jail to manage a variety of quota
  666                  * properties.  These should likely be conditional on a
  667                  * configuration option.
  668                  */
  669         case PRIV_VFS_GETQUOTA:
  670         case PRIV_VFS_SETQUOTA:
  671 
  672                 /*
  673                  * Since Jail relies on chroot() to implement file system
  674                  * protections, grant many VFS privileges to root in jail.
  675                  * Be careful to exclude mount-related and NFS-related
  676                  * privileges.
  677                  */
  678         case PRIV_VFS_READ:
  679         case PRIV_VFS_WRITE:
  680         case PRIV_VFS_ADMIN:
  681         case PRIV_VFS_EXEC:
  682         case PRIV_VFS_LOOKUP:
  683         case PRIV_VFS_BLOCKRESERVE:     /* XXXRW: Slightly surprising. */
  684         case PRIV_VFS_CHFLAGS_DEV:
  685         case PRIV_VFS_CHOWN:
  686         case PRIV_VFS_CHROOT:
  687         case PRIV_VFS_RETAINSUGID:
  688         case PRIV_VFS_FCHROOT:
  689         case PRIV_VFS_LINK:
  690         case PRIV_VFS_SETGID:
  691         case PRIV_VFS_STICKYFILE:
  692                 return (0);
  693 
  694                 /*
  695                  * Depending on the global setting, allow privilege of
  696                  * setting system flags.
  697                  */
  698         case PRIV_VFS_SYSFLAGS:
  699                 if (jail_chflags_allowed)
  700                         return (0);
  701                 else
  702                         return (EPERM);
  703 
  704                 /*
  705                  * Depending on the global setting, allow privilege of
  706                  * mounting/unmounting file systems.
  707                  */
  708         case PRIV_VFS_MOUNT:
  709         case PRIV_VFS_UNMOUNT:
  710         case PRIV_VFS_MOUNT_NONUSER:
  711         case PRIV_VFS_MOUNT_OWNER:
  712                 if (jail_mount_allowed)
  713                         return (0);
  714                 else
  715                         return (EPERM);
  716 
  717                 /*
  718                  * Allow jailed root to bind reserved ports and reuse in-use
  719                  * ports.
  720                  */
  721         case PRIV_NETINET_RESERVEDPORT:
  722         case PRIV_NETINET_REUSEPORT:
  723                 return (0);
  724 
  725                 /*
  726                  * Allow jailed root to set certian IPv4/6 (option) headers.
  727                  */
  728         case PRIV_NETINET_SETHDROPTS:
  729                 return (0);
  730 
  731                 /*
  732                  * Conditionally allow creating raw sockets in jail.
  733                  */
  734         case PRIV_NETINET_RAW:
  735                 if (jail_allow_raw_sockets)
  736                         return (0);
  737                 else
  738                         return (EPERM);
  739 
  740                 /*
  741                  * Since jail implements its own visibility limits on netstat
  742                  * sysctls, allow getcred.  This allows identd to work in
  743                  * jail.
  744                  */
  745         case PRIV_NETINET_GETCRED:
  746                 return (0);
  747 
  748         default:
  749                 /*
  750                  * In all remaining cases, deny the privilege request.  This
  751                  * includes almost all network privileges, many system
  752                  * configuration privileges.
  753                  */
  754                 return (EPERM);
  755         }
  756 }
  757 
  758 /*
  759  * Register jail service. Provides 'create' and 'destroy' methods.
  760  * 'create' method will be called for every existing jail and all
  761  * jails in the future as they beeing created.
  762  * 'destroy' method will be called for every jail going away and
  763  * for all existing jails at the time of service deregistration.
  764  */
  765 struct prison_service *
  766 prison_service_register(const char *name, prison_create_t create,
  767     prison_destroy_t destroy)
  768 {
  769         struct prison_service *psrv, *psrv2;
  770         struct prison *pr;
  771         int reallocate = 1, slotno = 0;
  772         void **slots, **oldslots;
  773 
  774         psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
  775             M_WAITOK | M_ZERO);
  776         psrv->ps_create = create;
  777         psrv->ps_destroy = destroy;
  778         strcpy(psrv->ps_name, name);
  779         /*
  780          * Grab the allprison_lock here, so we won't miss any jail
  781          * creation/destruction.
  782          */
  783         sx_xlock(&allprison_lock);
  784 #ifdef INVARIANTS
  785         /*
  786          * Verify if service is not already registered.
  787          */
  788         TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
  789                 KASSERT(strcmp(psrv2->ps_name, name) != 0,
  790                     ("jail service %s already registered", name));
  791         }
  792 #endif
  793         /*
  794          * Find free slot. When there is no existing free slot available,
  795          * allocate one at the end.
  796          */
  797         TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
  798                 if (psrv2->ps_slotno != slotno) {
  799                         KASSERT(slotno < psrv2->ps_slotno,
  800                             ("Invalid slotno (slotno=%d >= ps_slotno=%d",
  801                             slotno, psrv2->ps_slotno));
  802                         /* We found free slot. */
  803                         reallocate = 0;
  804                         break;
  805                 }
  806                 slotno++;
  807         }
  808         psrv->ps_slotno = slotno;
  809         /*
  810          * Keep the list sorted by slot number.
  811          */
  812         if (psrv2 != NULL) {
  813                 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
  814                 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
  815         } else {
  816                 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
  817                 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
  818         }
  819         prison_service_slots++;
  820         sx_downgrade(&allprison_lock);
  821         /*
  822          * Allocate memory for new slot if we didn't found empty one.
  823          * Do not use realloc(9), because pr_slots is protected with a mutex,
  824          * so we can't sleep.
  825          */
  826         LIST_FOREACH(pr, &allprison, pr_list) {
  827                 if (reallocate) {
  828                         /* First allocate memory with M_WAITOK. */
  829                         slots = malloc(sizeof(*slots) * prison_service_slots,
  830                             M_PRISON, M_WAITOK);
  831                         /* Now grab the mutex and replace pr_slots. */
  832                         mtx_lock(&pr->pr_mtx);
  833                         oldslots = pr->pr_slots;
  834                         if (psrv->ps_slotno > 0) {
  835                                 bcopy(oldslots, slots,
  836                                     sizeof(*slots) * (prison_service_slots - 1));
  837                         }
  838                         slots[psrv->ps_slotno] = NULL;
  839                         pr->pr_slots = slots;
  840                         mtx_unlock(&pr->pr_mtx);
  841                         if (oldslots != NULL)
  842                                 free(oldslots, M_PRISON);
  843                 }
  844                 /*
  845                  * Call 'create' method for each existing jail.
  846                  */
  847                 psrv->ps_create(psrv, pr);
  848         }
  849         sx_sunlock(&allprison_lock);
  850 
  851         return (psrv);
  852 }
  853 
  854 void
  855 prison_service_deregister(struct prison_service *psrv)
  856 {
  857         struct prison *pr;
  858         void **slots, **oldslots;
  859         int last = 0;
  860 
  861         sx_xlock(&allprison_lock);
  862         if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
  863                 last = 1;
  864         TAILQ_REMOVE(&prison_services, psrv, ps_next);
  865         prison_service_slots--;
  866         sx_downgrade(&allprison_lock);
  867         LIST_FOREACH(pr, &allprison, pr_list) {
  868                 /*
  869                  * Call 'destroy' method for every currently existing jail.
  870                  */
  871                 psrv->ps_destroy(psrv, pr);
  872                 /*
  873                  * If this is the last slot, free the memory allocated for it.
  874                  */
  875                 if (last) {
  876                         if (prison_service_slots == 0)
  877                                 slots = NULL;
  878                         else {
  879                                 slots = malloc(sizeof(*slots) * prison_service_slots,
  880                                     M_PRISON, M_WAITOK);
  881                         }
  882                         mtx_lock(&pr->pr_mtx);
  883                         oldslots = pr->pr_slots;
  884                         /*
  885                          * We require setting slot to NULL after freeing it,
  886                          * this way we can check for memory leaks here.
  887                          */
  888                         KASSERT(oldslots[psrv->ps_slotno] == NULL,
  889                             ("Slot %d (service %s, jailid=%d) still contains data?",
  890                              psrv->ps_slotno, psrv->ps_name, pr->pr_id));
  891                         if (psrv->ps_slotno > 0) {
  892                                 bcopy(oldslots, slots,
  893                                     sizeof(*slots) * prison_service_slots);
  894                         }
  895                         pr->pr_slots = slots;
  896                         mtx_unlock(&pr->pr_mtx);
  897                         KASSERT(oldslots != NULL, ("oldslots == NULL"));
  898                         free(oldslots, M_PRISON);
  899                 }
  900         }
  901         sx_sunlock(&allprison_lock);
  902         free(psrv, M_PRISON);
  903 }
  904 
  905 /*
  906  * Function sets data for the given jail in slot assigned for the given
  907  * jail service.
  908  */
  909 void
  910 prison_service_data_set(struct prison_service *psrv, struct prison *pr,
  911     void *data)
  912 {
  913 
  914         mtx_assert(&pr->pr_mtx, MA_OWNED);
  915         pr->pr_slots[psrv->ps_slotno] = data;
  916 }
  917 
  918 /*
  919  * Function clears slots assigned for the given jail service in the given
  920  * prison structure and returns current slot data.
  921  */
  922 void *
  923 prison_service_data_del(struct prison_service *psrv, struct prison *pr)
  924 {
  925         void *data;
  926 
  927         mtx_assert(&pr->pr_mtx, MA_OWNED);
  928         data = pr->pr_slots[psrv->ps_slotno];
  929         pr->pr_slots[psrv->ps_slotno] = NULL;
  930         return (data);
  931 }
  932 
  933 /*
  934  * Function returns current data from the slot assigned to the given jail
  935  * service for the given jail.
  936  */
  937 void *
  938 prison_service_data_get(struct prison_service *psrv, struct prison *pr)
  939 {
  940 
  941         mtx_assert(&pr->pr_mtx, MA_OWNED);
  942         return (pr->pr_slots[psrv->ps_slotno]);
  943 }
  944 
  945 static int
  946 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
  947 {
  948         struct xprison *xp, *sxp;
  949         struct prison *pr;
  950         int count, error;
  951 
  952         if (jailed(req->td->td_ucred))
  953                 return (0);
  954 
  955         sx_slock(&allprison_lock);
  956         if ((count = prisoncount) == 0) {
  957                 sx_sunlock(&allprison_lock);
  958                 return (0);
  959         }
  960 
  961         sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
  962 
  963         LIST_FOREACH(pr, &allprison, pr_list) {
  964                 xp->pr_version = XPRISON_VERSION;
  965                 xp->pr_id = pr->pr_id;
  966                 xp->pr_ip = pr->pr_ip;
  967                 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
  968                 mtx_lock(&pr->pr_mtx);
  969                 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
  970                 mtx_unlock(&pr->pr_mtx);
  971                 xp++;
  972         }
  973         sx_sunlock(&allprison_lock);
  974 
  975         error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
  976         free(sxp, M_TEMP);
  977         return (error);
  978 }
  979 
  980 SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
  981     NULL, 0, sysctl_jail_list, "S", "List of active jails");
  982 
  983 static int
  984 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
  985 {
  986         int error, injail;
  987 
  988         injail = jailed(req->td->td_ucred);
  989         error = SYSCTL_OUT(req, &injail, sizeof(injail));
  990 
  991         return (error);
  992 }
  993 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
  994     NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");

Cache object: b30a02fd4d27ece1ac3eef14631c3e83


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.