FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c
1 /*-
2 * Copyright (c) 1999 Poul-Henning Kamp.
3 * Copyright (c) 2008 Bjoern A. Zeeb.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include "opt_ddb.h"
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 #include "opt_mac.h"
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/kernel.h>
39 #include <sys/systm.h>
40 #include <sys/errno.h>
41 #include <sys/sysproto.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/taskqueue.h>
46 #include <sys/jail.h>
47 #include <sys/lock.h>
48 #include <sys/mutex.h>
49 #include <sys/sx.h>
50 #include <sys/namei.h>
51 #include <sys/mount.h>
52 #include <sys/queue.h>
53 #include <sys/socket.h>
54 #include <sys/syscallsubr.h>
55 #include <sys/sysctl.h>
56 #include <sys/vnode.h>
57 #include <net/if.h>
58 #include <netinet/in.h>
59 #ifdef DDB
60 #include <ddb/ddb.h>
61 #ifdef INET6
62 #include <netinet6/in6_var.h>
63 #endif /* INET6 */
64 #endif /* DDB */
65
66 #include <security/mac/mac_framework.h>
67
68 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
69
70 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
71 "Jail rules");
72
73 int jail_set_hostname_allowed = 1;
74 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
75 &jail_set_hostname_allowed, 0,
76 "Processes in jail can set their hostnames");
77
78 int jail_socket_unixiproute_only = 1;
79 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
80 &jail_socket_unixiproute_only, 0,
81 "Processes in jail are limited to creating UNIX/IP/route sockets only");
82
83 int jail_sysvipc_allowed = 0;
84 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
85 &jail_sysvipc_allowed, 0,
86 "Processes in jail can use System V IPC primitives");
87
88 static int jail_enforce_statfs = 2;
89 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
90 &jail_enforce_statfs, 0,
91 "Processes in jail cannot see all mounted file systems");
92
93 int jail_allow_raw_sockets = 0;
94 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
95 &jail_allow_raw_sockets, 0,
96 "Prison root can create raw sockets");
97
98 int jail_chflags_allowed = 0;
99 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
100 &jail_chflags_allowed, 0,
101 "Processes in jail can alter system file flags");
102
103 int jail_mount_allowed = 0;
104 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
105 &jail_mount_allowed, 0,
106 "Processes in jail can mount/unmount jail-friendly file systems");
107
108 int jail_max_af_ips = 255;
109 SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
110 &jail_max_af_ips, 0,
111 "Number of IP addresses a jail may have at most per address family");
112
113 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
114 struct prisonlist allprison;
115 struct sx allprison_lock;
116 int lastprid = 0;
117 int prisoncount = 0;
118
119 /*
120 * List of jail services. Protected by allprison_lock.
121 */
122 TAILQ_HEAD(prison_services_head, prison_service);
123 static struct prison_services_head prison_services =
124 TAILQ_HEAD_INITIALIZER(prison_services);
125 static int prison_service_slots = 0;
126
127 struct prison_service {
128 prison_create_t ps_create;
129 prison_destroy_t ps_destroy;
130 int ps_slotno;
131 TAILQ_ENTRY(prison_service) ps_next;
132 char ps_name[0];
133 };
134
135 static void init_prison(void *);
136 static void prison_complete(void *context, int pending);
137 static int sysctl_jail_list(SYSCTL_HANDLER_ARGS);
138 #ifdef INET
139 static int _prison_check_ip4(struct prison *, struct in_addr *);
140 #endif
141 #ifdef INET6
142 static int _prison_check_ip6(struct prison *, struct in6_addr *);
143 #endif
144
145 static void
146 init_prison(void *data __unused)
147 {
148
149 sx_init(&allprison_lock, "allprison");
150 LIST_INIT(&allprison);
151 }
152
153 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
154
155 #ifdef INET
156 static int
157 qcmp_v4(const void *ip1, const void *ip2)
158 {
159 in_addr_t iaa, iab;
160
161 /*
162 * We need to compare in HBO here to get the list sorted as expected
163 * by the result of the code. Sorting NBO addresses gives you
164 * interesting results. If you do not understand, do not try.
165 */
166 iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
167 iab = ntohl(((const struct in_addr *)ip2)->s_addr);
168
169 /*
170 * Do not simply return the difference of the two numbers, the int is
171 * not wide enough.
172 */
173 if (iaa > iab)
174 return (1);
175 else if (iaa < iab)
176 return (-1);
177 else
178 return (0);
179 }
180 #endif
181
182 #ifdef INET6
183 static int
184 qcmp_v6(const void *ip1, const void *ip2)
185 {
186 const struct in6_addr *ia6a, *ia6b;
187 int i, rc;
188
189 ia6a = (const struct in6_addr *)ip1;
190 ia6b = (const struct in6_addr *)ip2;
191
192 rc = 0;
193 for (i=0; rc == 0 && i < sizeof(struct in6_addr); i++) {
194 if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
195 rc = 1;
196 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
197 rc = -1;
198 }
199 return (rc);
200 }
201 #endif
202
203 #if defined(INET) || defined(INET6)
204 static int
205 prison_check_conflicting_ips(struct prison *p)
206 {
207 struct prison *pr;
208 int i;
209
210 sx_assert(&allprison_lock, SX_LOCKED);
211
212 if (p->pr_ip4s == 0 && p->pr_ip6s == 0)
213 return (0);
214
215 LIST_FOREACH(pr, &allprison, pr_list) {
216 /*
217 * Skip 'dying' prisons to avoid problems when
218 * restarting multi-IP jails.
219 */
220 if (pr->pr_state == PRISON_STATE_DYING)
221 continue;
222
223 /*
224 * We permit conflicting IPs if there is no
225 * more than 1 IP on eeach jail.
226 * In case there is one duplicate on a jail with
227 * more than one IP stop checking and return error.
228 */
229 #ifdef INET
230 if ((p->pr_ip4s >= 1 && pr->pr_ip4s > 1) ||
231 (p->pr_ip4s > 1 && pr->pr_ip4s >= 1)) {
232 for (i = 0; i < p->pr_ip4s; i++) {
233 if (_prison_check_ip4(pr, &p->pr_ip4[i]) == 0)
234 return (EINVAL);
235 }
236 }
237 #endif
238 #ifdef INET6
239 if ((p->pr_ip6s >= 1 && pr->pr_ip6s > 1) ||
240 (p->pr_ip6s > 1 && pr->pr_ip6s >= 1)) {
241 for (i = 0; i < p->pr_ip6s; i++) {
242 if (_prison_check_ip6(pr, &p->pr_ip6[i]) == 0)
243 return (EINVAL);
244 }
245 }
246 #endif
247 }
248
249 return (0);
250 }
251
252 static int
253 jail_copyin_ips(struct jail *j)
254 {
255 #ifdef INET
256 struct in_addr *ip4;
257 #endif
258 #ifdef INET6
259 struct in6_addr *ip6;
260 #endif
261 int error, i;
262
263 /*
264 * Copy in addresses, check for duplicate addresses and do some
265 * simple 0 and broadcast checks. If users give other bogus addresses
266 * it is their problem.
267 *
268 * IP addresses are all sorted but ip[0] to preserve the primary IP
269 * address as given from userland. This special IP is used for
270 * unbound outgoing connections as well for "loopback" traffic.
271 */
272 #ifdef INET
273 ip4 = NULL;
274 #endif
275 #ifdef INET6
276 ip6 = NULL;
277 #endif
278 #ifdef INET
279 if (j->ip4s > 0) {
280 ip4 = (struct in_addr *)malloc(j->ip4s * sizeof(struct in_addr),
281 M_PRISON, M_WAITOK | M_ZERO);
282 error = copyin(j->ip4, ip4, j->ip4s * sizeof(struct in_addr));
283 if (error)
284 goto e_free_ip;
285 /* Sort all but the first IPv4 address. */
286 if (j->ip4s > 1)
287 qsort((ip4 + 1), j->ip4s - 1,
288 sizeof(struct in_addr), qcmp_v4);
289
290 /*
291 * We do not have to care about byte order for these checks
292 * so we will do them in NBO.
293 */
294 for (i=0; i<j->ip4s; i++) {
295 if (ip4[i].s_addr == htonl(INADDR_ANY) ||
296 ip4[i].s_addr == htonl(INADDR_BROADCAST)) {
297 error = EINVAL;
298 goto e_free_ip;
299 }
300 if ((i+1) < j->ip4s &&
301 (ip4[0].s_addr == ip4[i+1].s_addr ||
302 ip4[i].s_addr == ip4[i+1].s_addr)) {
303 error = EINVAL;
304 goto e_free_ip;
305 }
306 }
307
308 j->ip4 = ip4;
309 } else
310 j->ip4 = NULL;
311 #endif
312 #ifdef INET6
313 if (j->ip6s > 0) {
314 ip6 = (struct in6_addr *)malloc(j->ip6s * sizeof(struct in6_addr),
315 M_PRISON, M_WAITOK | M_ZERO);
316 error = copyin(j->ip6, ip6, j->ip6s * sizeof(struct in6_addr));
317 if (error)
318 goto e_free_ip;
319 /* Sort all but the first IPv6 address. */
320 if (j->ip6s > 1)
321 qsort((ip6 + 1), j->ip6s - 1,
322 sizeof(struct in6_addr), qcmp_v6);
323 for (i=0; i<j->ip6s; i++) {
324 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[i])) {
325 error = EINVAL;
326 goto e_free_ip;
327 }
328 if ((i+1) < j->ip6s &&
329 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[i+1]) ||
330 IN6_ARE_ADDR_EQUAL(&ip6[i], &ip6[i+1]))) {
331 error = EINVAL;
332 goto e_free_ip;
333 }
334 }
335
336 j->ip6 = ip6;
337 } else
338 j->ip6 = NULL;
339 #endif
340 return (0);
341
342 e_free_ip:
343 #ifdef INET6
344 free(ip6, M_PRISON);
345 #endif
346 #ifdef INET
347 free(ip4, M_PRISON);
348 #endif
349 return (error);
350 }
351 #endif /* INET || INET6 */
352
353 static int
354 jail_handle_ips(struct jail *j)
355 {
356 #if defined(INET) || defined(INET6)
357 int error;
358 #endif
359
360 /*
361 * Finish conversion for older versions, copyin and setup IPs.
362 */
363 switch (j->version) {
364 case 0:
365 {
366 #ifdef INET
367 /* FreeBSD single IPv4 jails. */
368 struct in_addr *ip4;
369
370 if (j->ip4s == INADDR_ANY || j->ip4s == INADDR_BROADCAST)
371 return (EINVAL);
372 ip4 = (struct in_addr *)malloc(sizeof(struct in_addr),
373 M_PRISON, M_WAITOK | M_ZERO);
374
375 /*
376 * Jail version 0 still used HBO for the IPv4 address.
377 */
378 ip4->s_addr = htonl(j->ip4s);
379 j->ip4s = 1;
380 j->ip4 = ip4;
381 break;
382 #else
383 return (EINVAL);
384 #endif
385 }
386
387 case 1:
388 /*
389 * Version 1 was used by multi-IPv4 jail implementations
390 * that never made it into the official kernel.
391 * We should never hit this here; jail() should catch it.
392 */
393 return (EINVAL);
394
395 case 2: /* JAIL_API_VERSION */
396 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
397 #if defined(INET) || defined(INET6)
398 #ifdef INET
399 if (j->ip4s > jail_max_af_ips)
400 return (EINVAL);
401 #else
402 if (j->ip4s != 0)
403 return (EINVAL);
404 #endif
405 #ifdef INET6
406 if (j->ip6s > jail_max_af_ips)
407 return (EINVAL);
408 #else
409 if (j->ip6s != 0)
410 return (EINVAL);
411 #endif
412 error = jail_copyin_ips(j);
413 if (error)
414 return (error);
415 #endif
416 break;
417
418 default:
419 /* Sci-Fi jails are not supported, sorry. */
420 return (EINVAL);
421 }
422
423 return (0);
424 }
425
426
427 /*
428 * struct jail_args {
429 * struct jail *jail;
430 * };
431 */
432 int
433 jail(struct thread *td, struct jail_args *uap)
434 {
435 uint32_t version;
436 int error;
437 struct jail j;
438
439 error = copyin(uap->jail, &version, sizeof(uint32_t));
440 if (error)
441 return (error);
442
443 switch (version) {
444 case 0:
445 /* FreeBSD single IPv4 jails. */
446 {
447 struct jail_v0 j0;
448
449 bzero(&j, sizeof(struct jail));
450 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
451 if (error)
452 return (error);
453 j.version = j0.version;
454 j.path = j0.path;
455 j.hostname = j0.hostname;
456 j.ip4s = j0.ip_number;
457 break;
458 }
459
460 case 1:
461 /*
462 * Version 1 was used by multi-IPv4 jail implementations
463 * that never made it into the official kernel.
464 */
465 return (EINVAL);
466
467 case 2: /* JAIL_API_VERSION */
468 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
469 error = copyin(uap->jail, &j, sizeof(struct jail));
470 if (error)
471 return (error);
472 break;
473
474 default:
475 /* Sci-Fi jails are not supported, sorry. */
476 return (EINVAL);
477 }
478 return (kern_jail(td, &j));
479 }
480
481 int
482 kern_jail(struct thread *td, struct jail *j)
483 {
484 struct nameidata nd;
485 struct prison *pr, *tpr;
486 struct prison_service *psrv;
487 struct jail_attach_args jaa;
488 int vfslocked, error, tryprid;
489
490 KASSERT(j != NULL, ("%s: j is NULL", __func__));
491
492 /* Handle addresses - convert old structs, copyin, check IPs. */
493 error = jail_handle_ips(j);
494 if (error)
495 return (error);
496
497 /* Allocate struct prison and fill it with life. */
498 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
499 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
500 pr->pr_ref = 1;
501 error = copyinstr(j->path, &pr->pr_path, sizeof(pr->pr_path), NULL);
502 if (error)
503 goto e_killmtx;
504 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
505 pr->pr_path, td);
506 error = namei(&nd);
507 if (error)
508 goto e_killmtx;
509 vfslocked = NDHASGIANT(&nd);
510 pr->pr_root = nd.ni_vp;
511 VOP_UNLOCK(nd.ni_vp, 0, td);
512 NDFREE(&nd, NDF_ONLY_PNBUF);
513 VFS_UNLOCK_GIANT(vfslocked);
514 error = copyinstr(j->hostname, &pr->pr_host, sizeof(pr->pr_host), NULL);
515 if (error)
516 goto e_dropvnref;
517 if (j->jailname != NULL) {
518 error = copyinstr(j->jailname, &pr->pr_name,
519 sizeof(pr->pr_name), NULL);
520 if (error)
521 goto e_dropvnref;
522 }
523 if (j->ip4s > 0) {
524 pr->pr_ip4 = j->ip4;
525 pr->pr_ip4s = j->ip4s;
526 }
527 #ifdef INET6
528 if (j->ip6s > 0) {
529 pr->pr_ip6 = j->ip6;
530 pr->pr_ip6s = j->ip6s;
531 }
532 #endif
533 pr->pr_linux = NULL;
534 pr->pr_securelevel = securelevel;
535 if (prison_service_slots == 0)
536 pr->pr_slots = NULL;
537 else {
538 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
539 M_PRISON, M_ZERO | M_WAITOK);
540 }
541
542 /*
543 * Pre-set prison state to ALIVE upon cration. This is needed so we
544 * can later attach the process to it, etc (avoiding another extra
545 * state for ther process of creation, complicating things).
546 */
547 pr->pr_state = PRISON_STATE_ALIVE;
548
549 /* Allocate a dedicated cpuset for each jail. */
550 error = cpuset_create_root(td, &pr->pr_cpuset);
551 if (error)
552 goto e_dropvnref;
553
554 sx_xlock(&allprison_lock);
555 /* Make sure we cannot run into problems with ambiguous bind()ings. */
556 #if defined(INET) || defined(INET6)
557 error = prison_check_conflicting_ips(pr);
558 if (error) {
559 sx_xunlock(&allprison_lock);
560 goto e_dropcpuset;
561 }
562 #endif
563
564 /* Determine next pr_id and add prison to allprison list. */
565 tryprid = lastprid + 1;
566 if (tryprid == JAIL_MAX)
567 tryprid = 1;
568 next:
569 LIST_FOREACH(tpr, &allprison, pr_list) {
570 if (tpr->pr_id == tryprid) {
571 tryprid++;
572 if (tryprid == JAIL_MAX) {
573 sx_xunlock(&allprison_lock);
574 error = EAGAIN;
575 goto e_dropcpuset;
576 }
577 goto next;
578 }
579 }
580 pr->pr_id = jaa.jid = lastprid = tryprid;
581 LIST_INSERT_HEAD(&allprison, pr, pr_list);
582 prisoncount++;
583 sx_downgrade(&allprison_lock);
584 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
585 psrv->ps_create(psrv, pr);
586 }
587 sx_sunlock(&allprison_lock);
588
589 error = jail_attach(td, &jaa);
590 if (error)
591 goto e_dropprref;
592 mtx_lock(&pr->pr_mtx);
593 pr->pr_ref--;
594 mtx_unlock(&pr->pr_mtx);
595 td->td_retval[0] = jaa.jid;
596 return (0);
597 e_dropprref:
598 sx_xlock(&allprison_lock);
599 LIST_REMOVE(pr, pr_list);
600 prisoncount--;
601 sx_downgrade(&allprison_lock);
602 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
603 psrv->ps_destroy(psrv, pr);
604 }
605 sx_sunlock(&allprison_lock);
606 e_dropcpuset:
607 cpuset_rel(pr->pr_cpuset);
608 e_dropvnref:
609 if (pr->pr_slots != NULL)
610 free(pr->pr_slots, M_PRISON);
611 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
612 vrele(pr->pr_root);
613 VFS_UNLOCK_GIANT(vfslocked);
614 e_killmtx:
615 mtx_destroy(&pr->pr_mtx);
616 free(pr, M_PRISON);
617 #ifdef INET6
618 free(j->ip6, M_PRISON);
619 #endif
620 #ifdef INET
621 free(j->ip4, M_PRISON);
622 #endif
623 return (error);
624 }
625
626 /*
627 * struct jail_attach_args {
628 * int jid;
629 * };
630 */
631 int
632 jail_attach(struct thread *td, struct jail_attach_args *uap)
633 {
634 struct proc *p;
635 struct ucred *newcred, *oldcred;
636 struct prison *pr;
637 int vfslocked, error;
638
639 /*
640 * XXX: Note that there is a slight race here if two threads
641 * in the same privileged process attempt to attach to two
642 * different jails at the same time. It is important for
643 * user processes not to do this, or they might end up with
644 * a process root from one prison, but attached to the jail
645 * of another.
646 */
647 error = priv_check(td, PRIV_JAIL_ATTACH);
648 if (error)
649 return (error);
650
651 p = td->td_proc;
652 sx_slock(&allprison_lock);
653 pr = prison_find(uap->jid);
654 if (pr == NULL) {
655 sx_sunlock(&allprison_lock);
656 return (EINVAL);
657 }
658
659 /*
660 * Do not allow a process to attach to a prison that is not
661 * considered to be "ALIVE".
662 */
663 if (pr->pr_state != PRISON_STATE_ALIVE) {
664 mtx_unlock(&pr->pr_mtx);
665 sx_sunlock(&allprison_lock);
666 return (EINVAL);
667 }
668 pr->pr_ref++;
669 mtx_unlock(&pr->pr_mtx);
670 sx_sunlock(&allprison_lock);
671
672 /*
673 * Reparent the newly attached process to this jail.
674 */
675 error = cpuset_setproc_update_set(p, pr->pr_cpuset);
676 if (error)
677 goto e_unref;
678
679 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
680 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
681 if ((error = change_dir(pr->pr_root, td)) != 0)
682 goto e_unlock;
683 #ifdef MAC
684 if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
685 goto e_unlock;
686 #endif
687 VOP_UNLOCK(pr->pr_root, 0, td);
688 change_root(pr->pr_root, td);
689 VFS_UNLOCK_GIANT(vfslocked);
690
691 newcred = crget();
692 PROC_LOCK(p);
693 oldcred = p->p_ucred;
694 setsugid(p);
695 crcopy(newcred, oldcred);
696 newcred->cr_prison = pr;
697 p->p_ucred = newcred;
698 prison_proc_hold(pr);
699 PROC_UNLOCK(p);
700 crfree(oldcred);
701 return (0);
702 e_unlock:
703 VOP_UNLOCK(pr->pr_root, 0, td);
704 VFS_UNLOCK_GIANT(vfslocked);
705 e_unref:
706 mtx_lock(&pr->pr_mtx);
707 pr->pr_ref--;
708 mtx_unlock(&pr->pr_mtx);
709 return (error);
710 }
711
712 /*
713 * Returns a locked prison instance, or NULL on failure.
714 */
715 struct prison *
716 prison_find(int prid)
717 {
718 struct prison *pr;
719
720 sx_assert(&allprison_lock, SX_LOCKED);
721 LIST_FOREACH(pr, &allprison, pr_list) {
722 if (pr->pr_id == prid) {
723 mtx_lock(&pr->pr_mtx);
724 if (pr->pr_ref == 0) {
725 mtx_unlock(&pr->pr_mtx);
726 break;
727 }
728 return (pr);
729 }
730 }
731 return (NULL);
732 }
733
734 void
735 prison_free_locked(struct prison *pr)
736 {
737
738 mtx_assert(&pr->pr_mtx, MA_OWNED);
739 pr->pr_ref--;
740 if (pr->pr_ref == 0) {
741 mtx_unlock(&pr->pr_mtx);
742 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
743 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
744 return;
745 }
746 mtx_unlock(&pr->pr_mtx);
747 }
748
749 void
750 prison_free(struct prison *pr)
751 {
752
753 mtx_lock(&pr->pr_mtx);
754 prison_free_locked(pr);
755 }
756
757 static void
758 prison_complete(void *context, int pending)
759 {
760 struct prison_service *psrv;
761 struct prison *pr;
762 int vfslocked;
763
764 pr = (struct prison *)context;
765
766 sx_xlock(&allprison_lock);
767 LIST_REMOVE(pr, pr_list);
768 prisoncount--;
769 sx_downgrade(&allprison_lock);
770 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
771 psrv->ps_destroy(psrv, pr);
772 }
773 sx_sunlock(&allprison_lock);
774
775 cpuset_rel(pr->pr_cpuset);
776
777 if (pr->pr_slots != NULL)
778 free(pr->pr_slots, M_PRISON);
779
780 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
781 vrele(pr->pr_root);
782 VFS_UNLOCK_GIANT(vfslocked);
783
784 mtx_destroy(&pr->pr_mtx);
785 free(pr->pr_linux, M_PRISON);
786 #ifdef INET6
787 free(pr->pr_ip6, M_PRISON);
788 #endif
789 #ifdef INET
790 free(pr->pr_ip4, M_PRISON);
791 #endif
792 free(pr, M_PRISON);
793 }
794
795 void
796 prison_hold_locked(struct prison *pr)
797 {
798
799 mtx_assert(&pr->pr_mtx, MA_OWNED);
800 KASSERT(pr->pr_ref > 0,
801 ("Trying to hold dead prison (id=%d).", pr->pr_id));
802 pr->pr_ref++;
803 }
804
805 void
806 prison_hold(struct prison *pr)
807 {
808
809 mtx_lock(&pr->pr_mtx);
810 prison_hold_locked(pr);
811 mtx_unlock(&pr->pr_mtx);
812 }
813
814 void
815 prison_proc_hold(struct prison *pr)
816 {
817
818 mtx_lock(&pr->pr_mtx);
819 KASSERT(pr->pr_state == PRISON_STATE_ALIVE,
820 ("Cannot add a process to a non-alive prison (id=%d).", pr->pr_id));
821 pr->pr_nprocs++;
822 mtx_unlock(&pr->pr_mtx);
823 }
824
825 void
826 prison_proc_free(struct prison *pr)
827 {
828
829 mtx_lock(&pr->pr_mtx);
830 KASSERT(pr->pr_state == PRISON_STATE_ALIVE && pr->pr_nprocs > 0,
831 ("Trying to kill a process in a dead prison (id=%d).", pr->pr_id));
832 pr->pr_nprocs--;
833 if (pr->pr_nprocs == 0)
834 pr->pr_state = PRISON_STATE_DYING;
835 mtx_unlock(&pr->pr_mtx);
836 }
837
838
839 #ifdef INET
840 /*
841 * Pass back primary IPv4 address of this jail.
842 *
843 * If not jailed return success but do not alter the address. Caller has to
844 * make sure to intialize it correctly (e.g. INADDR_ANY).
845 *
846 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
847 * Address returned in NBO.
848 */
849 int
850 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
851 {
852
853 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
854 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
855
856 if (!jailed(cred))
857 /* Do not change address passed in. */
858 return (0);
859
860 if (cred->cr_prison->pr_ip4 == NULL)
861 return (EAFNOSUPPORT);
862
863 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
864 return (0);
865 }
866
867 /*
868 * Make sure our (source) address is set to something meaningful to this
869 * jail.
870 *
871 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
872 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
873 * Address passed in in NBO and returned in NBO.
874 */
875 int
876 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
877 {
878 struct in_addr ia0;
879
880 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
881 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
882
883 if (!jailed(cred))
884 return (0);
885 if (cred->cr_prison->pr_ip4 == NULL)
886 return (EAFNOSUPPORT);
887
888 ia0.s_addr = ntohl(ia->s_addr);
889 if (ia0.s_addr == INADDR_LOOPBACK) {
890 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
891 return (0);
892 }
893
894 if (ia0.s_addr == INADDR_ANY) {
895 /*
896 * In case there is only 1 IPv4 address, bind directly.
897 */
898 if (cred->cr_prison->pr_ip4s == 1)
899 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
900 return (0);
901 }
902
903 return (_prison_check_ip4(cred->cr_prison, ia));
904 }
905
906 /*
907 * Rewrite destination address in case we will connect to loopback address.
908 *
909 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
910 * Address passed in in NBO and returned in NBO.
911 */
912 int
913 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
914 {
915
916 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
917 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
918
919 if (!jailed(cred))
920 return (0);
921 if (cred->cr_prison->pr_ip4 == NULL)
922 return (EAFNOSUPPORT);
923
924 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
925 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
926 return (0);
927 }
928
929 /*
930 * Return success because nothing had to be changed.
931 */
932 return (0);
933 }
934
935 /*
936 * Check if given address belongs to the jail referenced by cred/prison.
937 *
938 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
939 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
940 * Address passed in in NBO.
941 */
942 static int
943 _prison_check_ip4(struct prison *pr, struct in_addr *ia)
944 {
945 int i, a, z, d;
946
947 /*
948 * Check the primary IP.
949 */
950 if (pr->pr_ip4[0].s_addr == ia->s_addr)
951 return (0);
952
953 /*
954 * All the other IPs are sorted so we can do a binary search.
955 */
956 a = 0;
957 z = pr->pr_ip4s - 2;
958 while (a <= z) {
959 i = (a + z) / 2;
960 d = qcmp_v4(&pr->pr_ip4[i+1], ia);
961 if (d > 0)
962 z = i - 1;
963 else if (d < 0)
964 a = i + 1;
965 else
966 return (0);
967 }
968
969 return (EADDRNOTAVAIL);
970 }
971
972 int
973 prison_check_ip4(struct ucred *cred, struct in_addr *ia)
974 {
975
976 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
977 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
978
979 if (!jailed(cred))
980 return (0);
981 if (cred->cr_prison->pr_ip4 == NULL)
982 return (EAFNOSUPPORT);
983
984 return (_prison_check_ip4(cred->cr_prison, ia));
985 }
986 #endif
987
988 #ifdef INET6
989 /*
990 * Pass back primary IPv6 address for this jail.
991 *
992 * If not jailed return success but do not alter the address. Caller has to
993 * make sure to intialize it correctly (e.g. IN6ADDR_ANY_INIT).
994 *
995 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
996 */
997 int
998 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
999 {
1000
1001 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1002 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1003
1004 if (!jailed(cred))
1005 return (0);
1006 if (cred->cr_prison->pr_ip6 == NULL)
1007 return (EAFNOSUPPORT);
1008
1009 bcopy(&cred->cr_prison->pr_ip6[0], ia6, sizeof(struct in6_addr));
1010 return (0);
1011 }
1012
1013 /*
1014 * Make sure our (source) address is set to something meaningful to this jail.
1015 *
1016 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
1017 * when needed while binding.
1018 *
1019 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1020 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
1021 */
1022 int
1023 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
1024 {
1025
1026 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1027 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1028
1029 if (!jailed(cred))
1030 return (0);
1031 if (cred->cr_prison->pr_ip6 == NULL)
1032 return (EAFNOSUPPORT);
1033
1034 if (IN6_IS_ADDR_LOOPBACK(ia6)) {
1035 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1036 sizeof(struct in6_addr));
1037 return (0);
1038 }
1039
1040 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
1041 /*
1042 * In case there is only 1 IPv6 address, and v6only is true,
1043 * then bind directly.
1044 */
1045 if (v6only != 0 && cred->cr_prison->pr_ip6s == 1)
1046 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1047 sizeof(struct in6_addr));
1048 return (0);
1049 }
1050
1051 return (_prison_check_ip6(cred->cr_prison, ia6));
1052 }
1053
1054 /*
1055 * Rewrite destination address in case we will connect to loopback address.
1056 *
1057 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
1058 */
1059 int
1060 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
1061 {
1062
1063 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1064 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1065
1066 if (!jailed(cred))
1067 return (0);
1068 if (cred->cr_prison->pr_ip6 == NULL)
1069 return (EAFNOSUPPORT);
1070
1071 if (IN6_IS_ADDR_LOOPBACK(ia6)) {
1072 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1073 sizeof(struct in6_addr));
1074 return (0);
1075 }
1076
1077 /*
1078 * Return success because nothing had to be changed.
1079 */
1080 return (0);
1081 }
1082
1083 /*
1084 * Check if given address belongs to the jail referenced by cred/prison.
1085 *
1086 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1087 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
1088 */
1089 static int
1090 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
1091 {
1092 int i, a, z, d;
1093
1094 /*
1095 * Check the primary IP.
1096 */
1097 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
1098 return (0);
1099
1100 /*
1101 * All the other IPs are sorted so we can do a binary search.
1102 */
1103 a = 0;
1104 z = pr->pr_ip6s - 2;
1105 while (a <= z) {
1106 i = (a + z) / 2;
1107 d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
1108 if (d > 0)
1109 z = i - 1;
1110 else if (d < 0)
1111 a = i + 1;
1112 else
1113 return (0);
1114 }
1115
1116 return (EADDRNOTAVAIL);
1117 }
1118
1119 int
1120 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
1121 {
1122
1123 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1124 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1125
1126 if (!jailed(cred))
1127 return (0);
1128 if (cred->cr_prison->pr_ip6 == NULL)
1129 return (EAFNOSUPPORT);
1130
1131 return (_prison_check_ip6(cred->cr_prison, ia6));
1132 }
1133 #endif
1134
1135 /*
1136 * Check if a jail supports the given address family.
1137 *
1138 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
1139 * if not.
1140 */
1141 int
1142 prison_check_af(struct ucred *cred, int af)
1143 {
1144 int error;
1145
1146 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1147
1148
1149 if (!jailed(cred))
1150 return (0);
1151
1152 error = 0;
1153 switch (af)
1154 {
1155 #ifdef INET
1156 case AF_INET:
1157 if (cred->cr_prison->pr_ip4 == NULL)
1158 error = EAFNOSUPPORT;
1159 break;
1160 #endif
1161 #ifdef INET6
1162 case AF_INET6:
1163 if (cred->cr_prison->pr_ip6 == NULL)
1164 error = EAFNOSUPPORT;
1165 break;
1166 #endif
1167 case AF_LOCAL:
1168 case AF_ROUTE:
1169 break;
1170 default:
1171 if (jail_socket_unixiproute_only)
1172 error = EAFNOSUPPORT;
1173 }
1174 return (error);
1175 }
1176
1177 /*
1178 * Check if given address belongs to the jail referenced by cred (wrapper to
1179 * prison_check_ip[46]).
1180 *
1181 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1182 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow
1183 * the address family. IPv4 Address passed in in NBO.
1184 */
1185 int
1186 prison_if(struct ucred *cred, struct sockaddr *sa)
1187 {
1188 #ifdef INET
1189 struct sockaddr_in *sai;
1190 #endif
1191 #ifdef INET6
1192 struct sockaddr_in6 *sai6;
1193 #endif
1194 int error;
1195
1196 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1197 KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
1198
1199 error = 0;
1200 switch (sa->sa_family)
1201 {
1202 #ifdef INET
1203 case AF_INET:
1204 sai = (struct sockaddr_in *)sa;
1205 error = prison_check_ip4(cred, &sai->sin_addr);
1206 break;
1207 #endif
1208 #ifdef INET6
1209 case AF_INET6:
1210 sai6 = (struct sockaddr_in6 *)sa;
1211 error = prison_check_ip6(cred, &sai6->sin6_addr);
1212 break;
1213 #endif
1214 default:
1215 if (jailed(cred) && jail_socket_unixiproute_only)
1216 error = EAFNOSUPPORT;
1217 }
1218 return (error);
1219 }
1220
1221 /*
1222 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
1223 */
1224 int
1225 prison_check(struct ucred *cred1, struct ucred *cred2)
1226 {
1227
1228 if (jailed(cred1)) {
1229 if (!jailed(cred2))
1230 return (ESRCH);
1231 if (cred2->cr_prison != cred1->cr_prison)
1232 return (ESRCH);
1233 }
1234
1235 return (0);
1236 }
1237
1238 /*
1239 * Return 1 if the passed credential is in a jail, otherwise 0.
1240 */
1241 int
1242 jailed(struct ucred *cred)
1243 {
1244
1245 return (cred->cr_prison != NULL);
1246 }
1247
1248 /*
1249 * Return the correct hostname for the passed credential.
1250 */
1251 void
1252 getcredhostname(struct ucred *cred, char *buf, size_t size)
1253 {
1254
1255 if (jailed(cred)) {
1256 mtx_lock(&cred->cr_prison->pr_mtx);
1257 strlcpy(buf, cred->cr_prison->pr_host, size);
1258 mtx_unlock(&cred->cr_prison->pr_mtx);
1259 } else
1260 strlcpy(buf, hostname, size);
1261 }
1262
1263 /*
1264 * Determine whether the subject represented by cred can "see"
1265 * status of a mount point.
1266 * Returns: 0 for permitted, ENOENT otherwise.
1267 * XXX: This function should be called cr_canseemount() and should be
1268 * placed in kern_prot.c.
1269 */
1270 int
1271 prison_canseemount(struct ucred *cred, struct mount *mp)
1272 {
1273 struct prison *pr;
1274 struct statfs *sp;
1275 size_t len;
1276
1277 if (!jailed(cred) || jail_enforce_statfs == 0)
1278 return (0);
1279 pr = cred->cr_prison;
1280 if (pr->pr_root->v_mount == mp)
1281 return (0);
1282 if (jail_enforce_statfs == 2)
1283 return (ENOENT);
1284 /*
1285 * If jail's chroot directory is set to "/" we should be able to see
1286 * all mount-points from inside a jail.
1287 * This is ugly check, but this is the only situation when jail's
1288 * directory ends with '/'.
1289 */
1290 if (strcmp(pr->pr_path, "/") == 0)
1291 return (0);
1292 len = strlen(pr->pr_path);
1293 sp = &mp->mnt_stat;
1294 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
1295 return (ENOENT);
1296 /*
1297 * Be sure that we don't have situation where jail's root directory
1298 * is "/some/path" and mount point is "/some/pathpath".
1299 */
1300 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
1301 return (ENOENT);
1302 return (0);
1303 }
1304
1305 void
1306 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
1307 {
1308 char jpath[MAXPATHLEN];
1309 struct prison *pr;
1310 size_t len;
1311
1312 if (!jailed(cred) || jail_enforce_statfs == 0)
1313 return;
1314 pr = cred->cr_prison;
1315 if (prison_canseemount(cred, mp) != 0) {
1316 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1317 strlcpy(sp->f_mntonname, "[restricted]",
1318 sizeof(sp->f_mntonname));
1319 return;
1320 }
1321 if (pr->pr_root->v_mount == mp) {
1322 /*
1323 * Clear current buffer data, so we are sure nothing from
1324 * the valid path left there.
1325 */
1326 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1327 *sp->f_mntonname = '/';
1328 return;
1329 }
1330 /*
1331 * If jail's chroot directory is set to "/" we should be able to see
1332 * all mount-points from inside a jail.
1333 */
1334 if (strcmp(pr->pr_path, "/") == 0)
1335 return;
1336 len = strlen(pr->pr_path);
1337 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
1338 /*
1339 * Clear current buffer data, so we are sure nothing from
1340 * the valid path left there.
1341 */
1342 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1343 if (*jpath == '\0') {
1344 /* Should never happen. */
1345 *sp->f_mntonname = '/';
1346 } else {
1347 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
1348 }
1349 }
1350
1351 /*
1352 * Check with permission for a specific privilege is granted within jail. We
1353 * have a specific list of accepted privileges; the rest are denied.
1354 */
1355 int
1356 prison_priv_check(struct ucred *cred, int priv)
1357 {
1358
1359 if (!jailed(cred))
1360 return (0);
1361
1362 switch (priv) {
1363
1364 /*
1365 * Allow ktrace privileges for root in jail.
1366 */
1367 case PRIV_KTRACE:
1368
1369 #if 0
1370 /*
1371 * Allow jailed processes to configure audit identity and
1372 * submit audit records (login, etc). In the future we may
1373 * want to further refine the relationship between audit and
1374 * jail.
1375 */
1376 case PRIV_AUDIT_GETAUDIT:
1377 case PRIV_AUDIT_SETAUDIT:
1378 case PRIV_AUDIT_SUBMIT:
1379 #endif
1380
1381 /*
1382 * Allow jailed processes to manipulate process UNIX
1383 * credentials in any way they see fit.
1384 */
1385 case PRIV_CRED_SETUID:
1386 case PRIV_CRED_SETEUID:
1387 case PRIV_CRED_SETGID:
1388 case PRIV_CRED_SETEGID:
1389 case PRIV_CRED_SETGROUPS:
1390 case PRIV_CRED_SETREUID:
1391 case PRIV_CRED_SETREGID:
1392 case PRIV_CRED_SETRESUID:
1393 case PRIV_CRED_SETRESGID:
1394
1395 /*
1396 * Jail implements visibility constraints already, so allow
1397 * jailed root to override uid/gid-based constraints.
1398 */
1399 case PRIV_SEEOTHERGIDS:
1400 case PRIV_SEEOTHERUIDS:
1401
1402 /*
1403 * Jail implements inter-process debugging limits already, so
1404 * allow jailed root various debugging privileges.
1405 */
1406 case PRIV_DEBUG_DIFFCRED:
1407 case PRIV_DEBUG_SUGID:
1408 case PRIV_DEBUG_UNPRIV:
1409
1410 /*
1411 * Allow jail to set various resource limits and login
1412 * properties, and for now, exceed process resource limits.
1413 */
1414 case PRIV_PROC_LIMIT:
1415 case PRIV_PROC_SETLOGIN:
1416 case PRIV_PROC_SETRLIMIT:
1417
1418 /*
1419 * System V and POSIX IPC privileges are granted in jail.
1420 */
1421 case PRIV_IPC_READ:
1422 case PRIV_IPC_WRITE:
1423 case PRIV_IPC_ADMIN:
1424 case PRIV_IPC_MSGSIZE:
1425 case PRIV_MQ_ADMIN:
1426
1427 /*
1428 * Jail implements its own inter-process limits, so allow
1429 * root processes in jail to change scheduling on other
1430 * processes in the same jail. Likewise for signalling.
1431 */
1432 case PRIV_SCHED_DIFFCRED:
1433 case PRIV_SCHED_CPUSET:
1434 case PRIV_SIGNAL_DIFFCRED:
1435 case PRIV_SIGNAL_SUGID:
1436
1437 /*
1438 * Allow jailed processes to write to sysctls marked as jail
1439 * writable.
1440 */
1441 case PRIV_SYSCTL_WRITEJAIL:
1442
1443 /*
1444 * Allow root in jail to manage a variety of quota
1445 * properties. These should likely be conditional on a
1446 * configuration option.
1447 */
1448 case PRIV_VFS_GETQUOTA:
1449 case PRIV_VFS_SETQUOTA:
1450
1451 /*
1452 * Since Jail relies on chroot() to implement file system
1453 * protections, grant many VFS privileges to root in jail.
1454 * Be careful to exclude mount-related and NFS-related
1455 * privileges.
1456 */
1457 case PRIV_VFS_READ:
1458 case PRIV_VFS_WRITE:
1459 case PRIV_VFS_ADMIN:
1460 case PRIV_VFS_EXEC:
1461 case PRIV_VFS_LOOKUP:
1462 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */
1463 case PRIV_VFS_CHFLAGS_DEV:
1464 case PRIV_VFS_CHOWN:
1465 case PRIV_VFS_CHROOT:
1466 case PRIV_VFS_RETAINSUGID:
1467 case PRIV_VFS_FCHROOT:
1468 case PRIV_VFS_LINK:
1469 case PRIV_VFS_SETGID:
1470 case PRIV_VFS_STICKYFILE:
1471 return (0);
1472
1473 /*
1474 * Depending on the global setting, allow privilege of
1475 * setting system flags.
1476 */
1477 case PRIV_VFS_SYSFLAGS:
1478 if (jail_chflags_allowed)
1479 return (0);
1480 else
1481 return (EPERM);
1482
1483 /*
1484 * Depending on the global setting, allow privilege of
1485 * mounting/unmounting file systems.
1486 */
1487 case PRIV_VFS_MOUNT:
1488 case PRIV_VFS_UNMOUNT:
1489 case PRIV_VFS_MOUNT_NONUSER:
1490 case PRIV_VFS_MOUNT_OWNER:
1491 if (jail_mount_allowed)
1492 return (0);
1493 else
1494 return (EPERM);
1495
1496 /*
1497 * Allow jailed root to bind reserved ports and reuse in-use
1498 * ports.
1499 */
1500 case PRIV_NETINET_RESERVEDPORT:
1501 case PRIV_NETINET_REUSEPORT:
1502 return (0);
1503
1504 /*
1505 * Allow jailed root to set certian IPv4/6 (option) headers.
1506 */
1507 case PRIV_NETINET_SETHDROPTS:
1508 return (0);
1509
1510 /*
1511 * Conditionally allow creating raw sockets in jail.
1512 */
1513 case PRIV_NETINET_RAW:
1514 if (jail_allow_raw_sockets)
1515 return (0);
1516 else
1517 return (EPERM);
1518
1519 /*
1520 * Since jail implements its own visibility limits on netstat
1521 * sysctls, allow getcred. This allows identd to work in
1522 * jail.
1523 */
1524 case PRIV_NETINET_GETCRED:
1525 return (0);
1526
1527 default:
1528 /*
1529 * In all remaining cases, deny the privilege request. This
1530 * includes almost all network privileges, many system
1531 * configuration privileges.
1532 */
1533 return (EPERM);
1534 }
1535 }
1536
1537 /*
1538 * Register jail service. Provides 'create' and 'destroy' methods.
1539 * 'create' method will be called for every existing jail and all
1540 * jails in the future as they beeing created.
1541 * 'destroy' method will be called for every jail going away and
1542 * for all existing jails at the time of service deregistration.
1543 */
1544 struct prison_service *
1545 prison_service_register(const char *name, prison_create_t create,
1546 prison_destroy_t destroy)
1547 {
1548 struct prison_service *psrv, *psrv2;
1549 struct prison *pr;
1550 int reallocate = 1, slotno = 0;
1551 void **slots, **oldslots;
1552
1553 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
1554 M_WAITOK | M_ZERO);
1555 psrv->ps_create = create;
1556 psrv->ps_destroy = destroy;
1557 strcpy(psrv->ps_name, name);
1558 /*
1559 * Grab the allprison_lock here, so we won't miss any jail
1560 * creation/destruction.
1561 */
1562 sx_xlock(&allprison_lock);
1563 #ifdef INVARIANTS
1564 /*
1565 * Verify if service is not already registered.
1566 */
1567 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
1568 KASSERT(strcmp(psrv2->ps_name, name) != 0,
1569 ("jail service %s already registered", name));
1570 }
1571 #endif
1572 /*
1573 * Find free slot. When there is no existing free slot available,
1574 * allocate one at the end.
1575 */
1576 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
1577 if (psrv2->ps_slotno != slotno) {
1578 KASSERT(slotno < psrv2->ps_slotno,
1579 ("Invalid slotno (slotno=%d >= ps_slotno=%d",
1580 slotno, psrv2->ps_slotno));
1581 /* We found free slot. */
1582 reallocate = 0;
1583 break;
1584 }
1585 slotno++;
1586 }
1587 psrv->ps_slotno = slotno;
1588 /*
1589 * Keep the list sorted by slot number.
1590 */
1591 if (psrv2 != NULL) {
1592 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
1593 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
1594 } else {
1595 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
1596 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
1597 }
1598 prison_service_slots++;
1599 sx_downgrade(&allprison_lock);
1600 /*
1601 * Allocate memory for new slot if we didn't found empty one.
1602 * Do not use realloc(9), because pr_slots is protected with a mutex,
1603 * so we can't sleep.
1604 */
1605 LIST_FOREACH(pr, &allprison, pr_list) {
1606 if (reallocate) {
1607 /* First allocate memory with M_WAITOK. */
1608 slots = malloc(sizeof(*slots) * prison_service_slots,
1609 M_PRISON, M_WAITOK);
1610 /* Now grab the mutex and replace pr_slots. */
1611 mtx_lock(&pr->pr_mtx);
1612 oldslots = pr->pr_slots;
1613 if (psrv->ps_slotno > 0) {
1614 bcopy(oldslots, slots,
1615 sizeof(*slots) * (prison_service_slots - 1));
1616 }
1617 slots[psrv->ps_slotno] = NULL;
1618 pr->pr_slots = slots;
1619 mtx_unlock(&pr->pr_mtx);
1620 if (oldslots != NULL)
1621 free(oldslots, M_PRISON);
1622 }
1623 /*
1624 * Call 'create' method for each existing jail.
1625 */
1626 psrv->ps_create(psrv, pr);
1627 }
1628 sx_sunlock(&allprison_lock);
1629
1630 return (psrv);
1631 }
1632
1633 void
1634 prison_service_deregister(struct prison_service *psrv)
1635 {
1636 struct prison *pr;
1637 void **slots, **oldslots;
1638 int last = 0;
1639
1640 sx_xlock(&allprison_lock);
1641 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
1642 last = 1;
1643 TAILQ_REMOVE(&prison_services, psrv, ps_next);
1644 prison_service_slots--;
1645 sx_downgrade(&allprison_lock);
1646 LIST_FOREACH(pr, &allprison, pr_list) {
1647 /*
1648 * Call 'destroy' method for every currently existing jail.
1649 */
1650 psrv->ps_destroy(psrv, pr);
1651 /*
1652 * If this is the last slot, free the memory allocated for it.
1653 */
1654 if (last) {
1655 if (prison_service_slots == 0)
1656 slots = NULL;
1657 else {
1658 slots = malloc(sizeof(*slots) * prison_service_slots,
1659 M_PRISON, M_WAITOK);
1660 }
1661 mtx_lock(&pr->pr_mtx);
1662 oldslots = pr->pr_slots;
1663 /*
1664 * We require setting slot to NULL after freeing it,
1665 * this way we can check for memory leaks here.
1666 */
1667 KASSERT(oldslots[psrv->ps_slotno] == NULL,
1668 ("Slot %d (service %s, jailid=%d) still contains data?",
1669 psrv->ps_slotno, psrv->ps_name, pr->pr_id));
1670 if (psrv->ps_slotno > 0) {
1671 bcopy(oldslots, slots,
1672 sizeof(*slots) * prison_service_slots);
1673 }
1674 pr->pr_slots = slots;
1675 mtx_unlock(&pr->pr_mtx);
1676 KASSERT(oldslots != NULL, ("oldslots == NULL"));
1677 free(oldslots, M_PRISON);
1678 }
1679 }
1680 sx_sunlock(&allprison_lock);
1681 free(psrv, M_PRISON);
1682 }
1683
1684 /*
1685 * Function sets data for the given jail in slot assigned for the given
1686 * jail service.
1687 */
1688 void
1689 prison_service_data_set(struct prison_service *psrv, struct prison *pr,
1690 void *data)
1691 {
1692
1693 mtx_assert(&pr->pr_mtx, MA_OWNED);
1694 pr->pr_slots[psrv->ps_slotno] = data;
1695 }
1696
1697 /*
1698 * Function clears slots assigned for the given jail service in the given
1699 * prison structure and returns current slot data.
1700 */
1701 void *
1702 prison_service_data_del(struct prison_service *psrv, struct prison *pr)
1703 {
1704 void *data;
1705
1706 mtx_assert(&pr->pr_mtx, MA_OWNED);
1707 data = pr->pr_slots[psrv->ps_slotno];
1708 pr->pr_slots[psrv->ps_slotno] = NULL;
1709 return (data);
1710 }
1711
1712 /*
1713 * Function returns current data from the slot assigned to the given jail
1714 * service for the given jail.
1715 */
1716 void *
1717 prison_service_data_get(struct prison_service *psrv, struct prison *pr)
1718 {
1719
1720 mtx_assert(&pr->pr_mtx, MA_OWNED);
1721 return (pr->pr_slots[psrv->ps_slotno]);
1722 }
1723
1724 static int
1725 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
1726 {
1727 struct xprison *xp, *sxp;
1728 struct prison *pr;
1729 char *p;
1730 size_t len;
1731 int count, error;
1732
1733 if (jailed(req->td->td_ucred))
1734 return (0);
1735
1736 sx_slock(&allprison_lock);
1737 if ((count = prisoncount) == 0) {
1738 sx_sunlock(&allprison_lock);
1739 return (0);
1740 }
1741
1742 len = sizeof(*xp) * count;
1743 LIST_FOREACH(pr, &allprison, pr_list) {
1744 #ifdef INET
1745 len += pr->pr_ip4s * sizeof(struct in_addr);
1746 #endif
1747 #ifdef INET6
1748 len += pr->pr_ip6s * sizeof(struct in6_addr);
1749 #endif
1750 }
1751
1752 sxp = xp = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
1753
1754 LIST_FOREACH(pr, &allprison, pr_list) {
1755 xp->pr_version = XPRISON_VERSION;
1756 xp->pr_id = pr->pr_id;
1757 xp->pr_state = pr->pr_state;
1758 xp->pr_cpusetid = pr->pr_cpuset->cs_id;
1759 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
1760 mtx_lock(&pr->pr_mtx);
1761 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
1762 strlcpy(xp->pr_name, pr->pr_name, sizeof(xp->pr_name));
1763 mtx_unlock(&pr->pr_mtx);
1764 #ifdef INET
1765 xp->pr_ip4s = pr->pr_ip4s;
1766 #endif
1767 #ifdef INET6
1768 xp->pr_ip6s = pr->pr_ip6s;
1769 #endif
1770 p = (char *)(xp + 1);
1771 #ifdef INET
1772 if (pr->pr_ip4s > 0) {
1773 bcopy(pr->pr_ip4, (struct in_addr *)p,
1774 pr->pr_ip4s * sizeof(struct in_addr));
1775 p += (pr->pr_ip4s * sizeof(struct in_addr));
1776 }
1777 #endif
1778 #ifdef INET6
1779 if (pr->pr_ip6s > 0) {
1780 bcopy(pr->pr_ip6, (struct in6_addr *)p,
1781 pr->pr_ip6s * sizeof(struct in6_addr));
1782 p += (pr->pr_ip6s * sizeof(struct in6_addr));
1783 }
1784 #endif
1785 xp = (struct xprison *)p;
1786 }
1787 sx_sunlock(&allprison_lock);
1788
1789 error = SYSCTL_OUT(req, sxp, len);
1790 free(sxp, M_TEMP);
1791 return (error);
1792 }
1793
1794 SYSCTL_OID(_security_jail, OID_AUTO, list,
1795 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1796 sysctl_jail_list, "S", "List of active jails");
1797
1798 static int
1799 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
1800 {
1801 int error, injail;
1802
1803 injail = jailed(req->td->td_ucred);
1804 error = SYSCTL_OUT(req, &injail, sizeof(injail));
1805
1806 return (error);
1807 }
1808 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
1809 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1810 sysctl_jail_jailed, "I", "Process in jail?");
1811
1812 #ifdef DDB
1813 DB_SHOW_COMMAND(jails, db_show_jails)
1814 {
1815 struct prison *pr;
1816 #ifdef INET
1817 struct in_addr ia;
1818 #endif
1819 #ifdef INET6
1820 char ip6buf[INET6_ADDRSTRLEN];
1821 #endif
1822 const char *state;
1823 #if defined(INET) || defined(INET6)
1824 int i;
1825 #endif
1826
1827 db_printf(
1828 " JID pr_ref pr_nprocs pr_ip4s pr_ip6s\n");
1829 db_printf(
1830 " Hostname Path\n");
1831 db_printf(
1832 " Name State\n");
1833 db_printf(
1834 " Cpusetid\n");
1835 db_printf(
1836 " IP Address(es)\n");
1837 LIST_FOREACH(pr, &allprison, pr_list) {
1838 db_printf("%6d %6d %9d %7d %7d\n",
1839 pr->pr_id, pr->pr_ref, pr->pr_nprocs,
1840 pr->pr_ip4s, pr->pr_ip6s);
1841 db_printf("%6s %-29.29s %.74s\n",
1842 "", pr->pr_host, pr->pr_path);
1843 if (pr->pr_state < 0 || pr->pr_state >= (int)((sizeof(
1844 prison_states) / sizeof(struct prison_state))))
1845 state = "(bogus)";
1846 else
1847 state = prison_states[pr->pr_state].state_name;
1848 db_printf("%6s %-29.29s %.74s\n",
1849 "", (pr->pr_name[0] != '\0') ? pr->pr_name : "", state);
1850 db_printf("%6s %-6d\n",
1851 "", pr->pr_cpuset->cs_id);
1852 #ifdef INET
1853 for (i=0; i < pr->pr_ip4s; i++) {
1854 ia.s_addr = pr->pr_ip4[i].s_addr;
1855 db_printf("%6s %s\n", "", inet_ntoa(ia));
1856 }
1857 #endif
1858 #ifdef INET6
1859 for (i=0; i < pr->pr_ip6s; i++)
1860 db_printf("%6s %s\n",
1861 "", ip6_sprintf(ip6buf, &pr->pr_ip6[i]));
1862 #endif /* INET6 */
1863 if (db_pager_quit)
1864 break;
1865 }
1866 }
1867 #endif /* DDB */
Cache object: 4efe60db20413aa62fc513b42895ab0a
|