FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_jail.c
1 /*-
2 * Copyright (c) 1999 Poul-Henning Kamp.
3 * Copyright (c) 2008 Bjoern A. Zeeb.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: releng/7.4/sys/kern/kern_jail.c 202924 2010-01-24 14:05:56Z bz $");
30
31 #include "opt_ddb.h"
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 #include "opt_mac.h"
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/kernel.h>
39 #include <sys/systm.h>
40 #include <sys/errno.h>
41 #include <sys/sysproto.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/taskqueue.h>
46 #include <sys/jail.h>
47 #include <sys/lock.h>
48 #include <sys/mutex.h>
49 #include <sys/sx.h>
50 #include <sys/namei.h>
51 #include <sys/mount.h>
52 #include <sys/queue.h>
53 #include <sys/socket.h>
54 #include <sys/syscallsubr.h>
55 #include <sys/sysctl.h>
56 #include <sys/vnode.h>
57 #include <net/if.h>
58 #include <netinet/in.h>
59 #ifdef DDB
60 #include <ddb/ddb.h>
61 #ifdef INET6
62 #include <netinet6/in6_var.h>
63 #endif /* INET6 */
64 #endif /* DDB */
65
66 #include <security/mac/mac_framework.h>
67
68 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
69
70 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
71 "Jail rules");
72
73 int jail_set_hostname_allowed = 1;
74 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
75 &jail_set_hostname_allowed, 0,
76 "Processes in jail can set their hostnames");
77
78 int jail_socket_unixiproute_only = 1;
79 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
80 &jail_socket_unixiproute_only, 0,
81 "Processes in jail are limited to creating UNIX/IP/route sockets only");
82
83 int jail_sysvipc_allowed = 0;
84 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
85 &jail_sysvipc_allowed, 0,
86 "Processes in jail can use System V IPC primitives");
87
88 static int jail_enforce_statfs = 2;
89 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
90 &jail_enforce_statfs, 0,
91 "Processes in jail cannot see all mounted file systems");
92
93 int jail_allow_raw_sockets = 0;
94 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
95 &jail_allow_raw_sockets, 0,
96 "Prison root can create raw sockets");
97
98 #ifdef INET
99 static int jail_ip4_saddrsel = 1;
100 SYSCTL_INT(_security_jail, OID_AUTO, ip4_saddrsel, CTLFLAG_RW,
101 &jail_ip4_saddrsel, 0,
102 "Do (not) use IPv4 source address selection rather than the "
103 "primary jail IPv4 address.");
104 #endif
105
106 #ifdef INET6
107 static int jail_ip6_saddrsel = 1;
108 SYSCTL_INT(_security_jail, OID_AUTO, ip6_saddrsel, CTLFLAG_RW,
109 &jail_ip6_saddrsel, 0,
110 "Do (not) use IPv6 source address selection rather than the "
111 "primary jail IPv6 address.");
112 #endif
113
114 int jail_chflags_allowed = 0;
115 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
116 &jail_chflags_allowed, 0,
117 "Processes in jail can alter system file flags");
118
119 int jail_mount_allowed = 0;
120 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
121 &jail_mount_allowed, 0,
122 "Processes in jail can mount/unmount jail-friendly file systems");
123
124 int jail_max_af_ips = 255;
125 SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
126 &jail_max_af_ips, 0,
127 "Number of IP addresses a jail may have at most per address family");
128
129 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
130 struct prisonlist allprison;
131 struct sx allprison_lock;
132 int lastprid = 0;
133 int prisoncount = 0;
134
135 /*
136 * List of jail services. Protected by allprison_lock.
137 */
138 TAILQ_HEAD(prison_services_head, prison_service);
139 static struct prison_services_head prison_services =
140 TAILQ_HEAD_INITIALIZER(prison_services);
141 static int prison_service_slots = 0;
142
143 struct prison_service {
144 prison_create_t ps_create;
145 prison_destroy_t ps_destroy;
146 int ps_slotno;
147 TAILQ_ENTRY(prison_service) ps_next;
148 char ps_name[0];
149 };
150
151 static void init_prison(void *);
152 static void prison_complete(void *context, int pending);
153 static int sysctl_jail_list(SYSCTL_HANDLER_ARGS);
154 #ifdef INET
155 static int _prison_check_ip4(struct prison *, struct in_addr *);
156 #endif
157 #ifdef INET6
158 static int _prison_check_ip6(struct prison *, struct in6_addr *);
159 #endif
160
161 static void
162 init_prison(void *data __unused)
163 {
164
165 sx_init(&allprison_lock, "allprison");
166 LIST_INIT(&allprison);
167 }
168
169 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
170
171 #ifdef INET
172 static int
173 qcmp_v4(const void *ip1, const void *ip2)
174 {
175 in_addr_t iaa, iab;
176
177 /*
178 * We need to compare in HBO here to get the list sorted as expected
179 * by the result of the code. Sorting NBO addresses gives you
180 * interesting results. If you do not understand, do not try.
181 */
182 iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
183 iab = ntohl(((const struct in_addr *)ip2)->s_addr);
184
185 /*
186 * Do not simply return the difference of the two numbers, the int is
187 * not wide enough.
188 */
189 if (iaa > iab)
190 return (1);
191 else if (iaa < iab)
192 return (-1);
193 else
194 return (0);
195 }
196 #endif
197
198 #ifdef INET6
199 static int
200 qcmp_v6(const void *ip1, const void *ip2)
201 {
202 const struct in6_addr *ia6a, *ia6b;
203 int i, rc;
204
205 ia6a = (const struct in6_addr *)ip1;
206 ia6b = (const struct in6_addr *)ip2;
207
208 rc = 0;
209 for (i=0; rc == 0 && i < sizeof(struct in6_addr); i++) {
210 if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
211 rc = 1;
212 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
213 rc = -1;
214 }
215 return (rc);
216 }
217 #endif
218
219 #if defined(INET) || defined(INET6)
220 static int
221 prison_check_conflicting_ips(struct prison *p)
222 {
223 struct prison *pr;
224 int i;
225
226 sx_assert(&allprison_lock, SX_LOCKED);
227
228 if (p->pr_ip4s == 0 && p->pr_ip6s == 0)
229 return (0);
230
231 LIST_FOREACH(pr, &allprison, pr_list) {
232 /*
233 * Skip 'dying' prisons to avoid problems when
234 * restarting multi-IP jails.
235 */
236 if (pr->pr_state == PRISON_STATE_DYING)
237 continue;
238
239 /*
240 * We permit conflicting IPs if there is no
241 * more than 1 IP on eeach jail.
242 * In case there is one duplicate on a jail with
243 * more than one IP stop checking and return error.
244 */
245 #ifdef INET
246 if ((p->pr_ip4s >= 1 && pr->pr_ip4s > 1) ||
247 (p->pr_ip4s > 1 && pr->pr_ip4s >= 1)) {
248 for (i = 0; i < p->pr_ip4s; i++) {
249 if (_prison_check_ip4(pr, &p->pr_ip4[i]) == 0)
250 return (EINVAL);
251 }
252 }
253 #endif
254 #ifdef INET6
255 if ((p->pr_ip6s >= 1 && pr->pr_ip6s > 1) ||
256 (p->pr_ip6s > 1 && pr->pr_ip6s >= 1)) {
257 for (i = 0; i < p->pr_ip6s; i++) {
258 if (_prison_check_ip6(pr, &p->pr_ip6[i]) == 0)
259 return (EINVAL);
260 }
261 }
262 #endif
263 }
264
265 return (0);
266 }
267
268 static int
269 jail_copyin_ips(struct jail *j)
270 {
271 #ifdef INET
272 struct in_addr *ip4;
273 #endif
274 #ifdef INET6
275 struct in6_addr *ip6;
276 #endif
277 int error, i;
278
279 /*
280 * Copy in addresses, check for duplicate addresses and do some
281 * simple 0 and broadcast checks. If users give other bogus addresses
282 * it is their problem.
283 *
284 * IP addresses are all sorted but ip[0] to preserve the primary IP
285 * address as given from userland. This special IP is used for
286 * unbound outgoing connections as well for "loopback" traffic in case
287 * source address selection cannot find any more fitting address to
288 * connect from.
289 */
290 #ifdef INET
291 ip4 = NULL;
292 #endif
293 #ifdef INET6
294 ip6 = NULL;
295 #endif
296 #ifdef INET
297 if (j->ip4s > 0) {
298 ip4 = (struct in_addr *)malloc(j->ip4s * sizeof(struct in_addr),
299 M_PRISON, M_WAITOK | M_ZERO);
300 error = copyin(j->ip4, ip4, j->ip4s * sizeof(struct in_addr));
301 if (error)
302 goto e_free_ip;
303 /* Sort all but the first IPv4 address. */
304 if (j->ip4s > 1)
305 qsort((ip4 + 1), j->ip4s - 1,
306 sizeof(struct in_addr), qcmp_v4);
307
308 /*
309 * We do not have to care about byte order for these checks
310 * so we will do them in NBO.
311 */
312 for (i=0; i<j->ip4s; i++) {
313 if (ip4[i].s_addr == htonl(INADDR_ANY) ||
314 ip4[i].s_addr == htonl(INADDR_BROADCAST)) {
315 error = EINVAL;
316 goto e_free_ip;
317 }
318 if ((i+1) < j->ip4s &&
319 (ip4[0].s_addr == ip4[i+1].s_addr ||
320 ip4[i].s_addr == ip4[i+1].s_addr)) {
321 error = EINVAL;
322 goto e_free_ip;
323 }
324 }
325
326 j->ip4 = ip4;
327 } else
328 j->ip4 = NULL;
329 #endif
330 #ifdef INET6
331 if (j->ip6s > 0) {
332 ip6 = (struct in6_addr *)malloc(j->ip6s * sizeof(struct in6_addr),
333 M_PRISON, M_WAITOK | M_ZERO);
334 error = copyin(j->ip6, ip6, j->ip6s * sizeof(struct in6_addr));
335 if (error)
336 goto e_free_ip;
337 /* Sort all but the first IPv6 address. */
338 if (j->ip6s > 1)
339 qsort((ip6 + 1), j->ip6s - 1,
340 sizeof(struct in6_addr), qcmp_v6);
341 for (i=0; i<j->ip6s; i++) {
342 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[i])) {
343 error = EINVAL;
344 goto e_free_ip;
345 }
346 if ((i+1) < j->ip6s &&
347 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[i+1]) ||
348 IN6_ARE_ADDR_EQUAL(&ip6[i], &ip6[i+1]))) {
349 error = EINVAL;
350 goto e_free_ip;
351 }
352 }
353
354 j->ip6 = ip6;
355 } else
356 j->ip6 = NULL;
357 #endif
358 return (0);
359
360 e_free_ip:
361 #ifdef INET6
362 free(ip6, M_PRISON);
363 #endif
364 #ifdef INET
365 free(ip4, M_PRISON);
366 #endif
367 return (error);
368 }
369 #endif /* INET || INET6 */
370
371 static int
372 jail_handle_ips(struct jail *j)
373 {
374 #if defined(INET) || defined(INET6)
375 int error;
376 #endif
377
378 /*
379 * Finish conversion for older versions, copyin and setup IPs.
380 */
381 switch (j->version) {
382 case 0:
383 {
384 #ifdef INET
385 /* FreeBSD single IPv4 jails. */
386 struct in_addr *ip4;
387
388 if (j->ip4s == INADDR_ANY || j->ip4s == INADDR_BROADCAST)
389 return (EINVAL);
390 ip4 = (struct in_addr *)malloc(sizeof(struct in_addr),
391 M_PRISON, M_WAITOK | M_ZERO);
392
393 /*
394 * Jail version 0 still used HBO for the IPv4 address.
395 */
396 ip4->s_addr = htonl(j->ip4s);
397 j->ip4s = 1;
398 j->ip4 = ip4;
399 break;
400 #else
401 return (EINVAL);
402 #endif
403 }
404
405 case 1:
406 /*
407 * Version 1 was used by multi-IPv4 jail implementations
408 * that never made it into the official kernel.
409 * We should never hit this here; jail() should catch it.
410 */
411 return (EINVAL);
412
413 case 2: /* JAIL_API_VERSION */
414 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
415 #if defined(INET) || defined(INET6)
416 #ifdef INET
417 if (j->ip4s > jail_max_af_ips)
418 return (EINVAL);
419 #else
420 if (j->ip4s != 0)
421 return (EINVAL);
422 #endif
423 #ifdef INET6
424 if (j->ip6s > jail_max_af_ips)
425 return (EINVAL);
426 #else
427 if (j->ip6s != 0)
428 return (EINVAL);
429 #endif
430 error = jail_copyin_ips(j);
431 if (error)
432 return (error);
433 #endif
434 break;
435
436 default:
437 /* Sci-Fi jails are not supported, sorry. */
438 return (EINVAL);
439 }
440
441 return (0);
442 }
443
444
445 /*
446 * struct jail_args {
447 * struct jail *jail;
448 * };
449 */
450 int
451 jail(struct thread *td, struct jail_args *uap)
452 {
453 uint32_t version;
454 int error;
455 struct jail j;
456
457 error = copyin(uap->jail, &version, sizeof(uint32_t));
458 if (error)
459 return (error);
460
461 switch (version) {
462 case 0:
463 /* FreeBSD single IPv4 jails. */
464 {
465 struct jail_v0 j0;
466
467 bzero(&j, sizeof(struct jail));
468 error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
469 if (error)
470 return (error);
471 j.version = j0.version;
472 j.path = j0.path;
473 j.hostname = j0.hostname;
474 j.ip4s = j0.ip_number;
475 break;
476 }
477
478 case 1:
479 /*
480 * Version 1 was used by multi-IPv4 jail implementations
481 * that never made it into the official kernel.
482 */
483 return (EINVAL);
484
485 case 2: /* JAIL_API_VERSION */
486 /* FreeBSD multi-IPv4/IPv6,noIP jails. */
487 error = copyin(uap->jail, &j, sizeof(struct jail));
488 if (error)
489 return (error);
490 break;
491
492 default:
493 /* Sci-Fi jails are not supported, sorry. */
494 return (EINVAL);
495 }
496 return (kern_jail(td, &j));
497 }
498
499 int
500 kern_jail(struct thread *td, struct jail *j)
501 {
502 struct nameidata nd;
503 struct prison *pr, *tpr;
504 struct prison_service *psrv;
505 struct jail_attach_args jaa;
506 int vfslocked, error, tryprid;
507
508 KASSERT(j != NULL, ("%s: j is NULL", __func__));
509
510 /* Handle addresses - convert old structs, copyin, check IPs. */
511 error = jail_handle_ips(j);
512 if (error)
513 return (error);
514
515 /* Allocate struct prison and fill it with life. */
516 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
517 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
518 pr->pr_ref = 1;
519 error = copyinstr(j->path, &pr->pr_path, sizeof(pr->pr_path), NULL);
520 if (error)
521 goto e_killmtx;
522 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
523 pr->pr_path, td);
524 error = namei(&nd);
525 if (error)
526 goto e_killmtx;
527 vfslocked = NDHASGIANT(&nd);
528 pr->pr_root = nd.ni_vp;
529 VOP_UNLOCK(nd.ni_vp, 0, td);
530 NDFREE(&nd, NDF_ONLY_PNBUF);
531 VFS_UNLOCK_GIANT(vfslocked);
532 error = copyinstr(j->hostname, &pr->pr_host, sizeof(pr->pr_host), NULL);
533 if (error)
534 goto e_dropvnref;
535 if (j->jailname != NULL) {
536 error = copyinstr(j->jailname, &pr->pr_name,
537 sizeof(pr->pr_name), NULL);
538 if (error)
539 goto e_dropvnref;
540 }
541 if (j->ip4s > 0) {
542 pr->pr_ip4 = j->ip4;
543 pr->pr_ip4s = j->ip4s;
544 }
545 #ifdef INET6
546 if (j->ip6s > 0) {
547 pr->pr_ip6 = j->ip6;
548 pr->pr_ip6s = j->ip6s;
549 }
550 #endif
551 pr->pr_linux = NULL;
552 pr->pr_securelevel = securelevel;
553 if (prison_service_slots == 0)
554 pr->pr_slots = NULL;
555 else {
556 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
557 M_PRISON, M_ZERO | M_WAITOK);
558 }
559
560 /*
561 * Pre-set prison state to ALIVE upon cration. This is needed so we
562 * can later attach the process to it, etc (avoiding another extra
563 * state for ther process of creation, complicating things).
564 */
565 pr->pr_state = PRISON_STATE_ALIVE;
566
567 /* Allocate a dedicated cpuset for each jail. */
568 error = cpuset_create_root(td, &pr->pr_cpuset);
569 if (error)
570 goto e_dropvnref;
571
572 sx_xlock(&allprison_lock);
573 /* Make sure we cannot run into problems with ambiguous bind()ings. */
574 #if defined(INET) || defined(INET6)
575 error = prison_check_conflicting_ips(pr);
576 if (error) {
577 sx_xunlock(&allprison_lock);
578 goto e_dropcpuset;
579 }
580 #endif
581
582 /* Determine next pr_id and add prison to allprison list. */
583 tryprid = lastprid + 1;
584 if (tryprid == JAIL_MAX)
585 tryprid = 1;
586 next:
587 LIST_FOREACH(tpr, &allprison, pr_list) {
588 if (tpr->pr_id == tryprid) {
589 tryprid++;
590 if (tryprid == JAIL_MAX) {
591 sx_xunlock(&allprison_lock);
592 error = EAGAIN;
593 goto e_dropcpuset;
594 }
595 goto next;
596 }
597 }
598 pr->pr_id = jaa.jid = lastprid = tryprid;
599 LIST_INSERT_HEAD(&allprison, pr, pr_list);
600 prisoncount++;
601 sx_downgrade(&allprison_lock);
602 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
603 psrv->ps_create(psrv, pr);
604 }
605 sx_sunlock(&allprison_lock);
606
607 error = jail_attach(td, &jaa);
608 if (error)
609 goto e_dropprref;
610 mtx_lock(&pr->pr_mtx);
611 pr->pr_ref--;
612 mtx_unlock(&pr->pr_mtx);
613 td->td_retval[0] = jaa.jid;
614 return (0);
615 e_dropprref:
616 sx_xlock(&allprison_lock);
617 LIST_REMOVE(pr, pr_list);
618 prisoncount--;
619 sx_downgrade(&allprison_lock);
620 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
621 psrv->ps_destroy(psrv, pr);
622 }
623 sx_sunlock(&allprison_lock);
624 e_dropcpuset:
625 cpuset_rel(pr->pr_cpuset);
626 e_dropvnref:
627 if (pr->pr_slots != NULL)
628 free(pr->pr_slots, M_PRISON);
629 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
630 vrele(pr->pr_root);
631 VFS_UNLOCK_GIANT(vfslocked);
632 e_killmtx:
633 mtx_destroy(&pr->pr_mtx);
634 free(pr, M_PRISON);
635 #ifdef INET6
636 free(j->ip6, M_PRISON);
637 #endif
638 #ifdef INET
639 free(j->ip4, M_PRISON);
640 #endif
641 return (error);
642 }
643
644 /*
645 * struct jail_attach_args {
646 * int jid;
647 * };
648 */
649 int
650 jail_attach(struct thread *td, struct jail_attach_args *uap)
651 {
652 struct proc *p;
653 struct ucred *newcred, *oldcred;
654 struct prison *pr;
655 int vfslocked, error;
656
657 /*
658 * XXX: Note that there is a slight race here if two threads
659 * in the same privileged process attempt to attach to two
660 * different jails at the same time. It is important for
661 * user processes not to do this, or they might end up with
662 * a process root from one prison, but attached to the jail
663 * of another.
664 */
665 error = priv_check(td, PRIV_JAIL_ATTACH);
666 if (error)
667 return (error);
668
669 p = td->td_proc;
670 sx_slock(&allprison_lock);
671 pr = prison_find(uap->jid);
672 if (pr == NULL) {
673 sx_sunlock(&allprison_lock);
674 return (EINVAL);
675 }
676
677 /*
678 * Do not allow a process to attach to a prison that is not
679 * considered to be "ALIVE".
680 */
681 if (pr->pr_state != PRISON_STATE_ALIVE) {
682 mtx_unlock(&pr->pr_mtx);
683 sx_sunlock(&allprison_lock);
684 return (EINVAL);
685 }
686 pr->pr_ref++;
687 mtx_unlock(&pr->pr_mtx);
688 sx_sunlock(&allprison_lock);
689
690 /*
691 * Reparent the newly attached process to this jail.
692 */
693 error = cpuset_setproc_update_set(p, pr->pr_cpuset);
694 if (error)
695 goto e_unref;
696
697 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
698 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
699 if ((error = change_dir(pr->pr_root, td)) != 0)
700 goto e_unlock;
701 #ifdef MAC
702 if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
703 goto e_unlock;
704 #endif
705 VOP_UNLOCK(pr->pr_root, 0, td);
706 change_root(pr->pr_root, td);
707 VFS_UNLOCK_GIANT(vfslocked);
708
709 newcred = crget();
710 PROC_LOCK(p);
711 oldcred = p->p_ucred;
712 setsugid(p);
713 crcopy(newcred, oldcred);
714 newcred->cr_prison = pr;
715 p->p_ucred = newcred;
716 prison_proc_hold(pr);
717 PROC_UNLOCK(p);
718 crfree(oldcred);
719 return (0);
720 e_unlock:
721 VOP_UNLOCK(pr->pr_root, 0, td);
722 VFS_UNLOCK_GIANT(vfslocked);
723 e_unref:
724 mtx_lock(&pr->pr_mtx);
725 pr->pr_ref--;
726 mtx_unlock(&pr->pr_mtx);
727 return (error);
728 }
729
730 /*
731 * Returns a locked prison instance, or NULL on failure.
732 */
733 struct prison *
734 prison_find(int prid)
735 {
736 struct prison *pr;
737
738 sx_assert(&allprison_lock, SX_LOCKED);
739 LIST_FOREACH(pr, &allprison, pr_list) {
740 if (pr->pr_id == prid) {
741 mtx_lock(&pr->pr_mtx);
742 if (pr->pr_ref == 0) {
743 mtx_unlock(&pr->pr_mtx);
744 break;
745 }
746 return (pr);
747 }
748 }
749 return (NULL);
750 }
751
752 void
753 prison_free_locked(struct prison *pr)
754 {
755
756 mtx_assert(&pr->pr_mtx, MA_OWNED);
757 pr->pr_ref--;
758 if (pr->pr_ref == 0) {
759 mtx_unlock(&pr->pr_mtx);
760 TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
761 taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
762 return;
763 }
764 mtx_unlock(&pr->pr_mtx);
765 }
766
767 void
768 prison_free(struct prison *pr)
769 {
770
771 mtx_lock(&pr->pr_mtx);
772 prison_free_locked(pr);
773 }
774
775 static void
776 prison_complete(void *context, int pending)
777 {
778 struct prison_service *psrv;
779 struct prison *pr;
780 int vfslocked;
781
782 pr = (struct prison *)context;
783
784 sx_xlock(&allprison_lock);
785 LIST_REMOVE(pr, pr_list);
786 prisoncount--;
787 sx_downgrade(&allprison_lock);
788 TAILQ_FOREACH(psrv, &prison_services, ps_next) {
789 psrv->ps_destroy(psrv, pr);
790 }
791 sx_sunlock(&allprison_lock);
792
793 cpuset_rel(pr->pr_cpuset);
794
795 if (pr->pr_slots != NULL)
796 free(pr->pr_slots, M_PRISON);
797
798 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
799 vrele(pr->pr_root);
800 VFS_UNLOCK_GIANT(vfslocked);
801
802 mtx_destroy(&pr->pr_mtx);
803 free(pr->pr_linux, M_PRISON);
804 #ifdef INET6
805 free(pr->pr_ip6, M_PRISON);
806 #endif
807 #ifdef INET
808 free(pr->pr_ip4, M_PRISON);
809 #endif
810 free(pr, M_PRISON);
811 }
812
813 void
814 prison_hold_locked(struct prison *pr)
815 {
816
817 mtx_assert(&pr->pr_mtx, MA_OWNED);
818 KASSERT(pr->pr_ref > 0,
819 ("Trying to hold dead prison (id=%d).", pr->pr_id));
820 pr->pr_ref++;
821 }
822
823 void
824 prison_hold(struct prison *pr)
825 {
826
827 mtx_lock(&pr->pr_mtx);
828 prison_hold_locked(pr);
829 mtx_unlock(&pr->pr_mtx);
830 }
831
832 void
833 prison_proc_hold(struct prison *pr)
834 {
835
836 mtx_lock(&pr->pr_mtx);
837 KASSERT(pr->pr_state == PRISON_STATE_ALIVE,
838 ("Cannot add a process to a non-alive prison (id=%d).", pr->pr_id));
839 pr->pr_nprocs++;
840 mtx_unlock(&pr->pr_mtx);
841 }
842
843 void
844 prison_proc_free(struct prison *pr)
845 {
846
847 mtx_lock(&pr->pr_mtx);
848 KASSERT(pr->pr_state == PRISON_STATE_ALIVE && pr->pr_nprocs > 0,
849 ("Trying to kill a process in a dead prison (id=%d).", pr->pr_id));
850 pr->pr_nprocs--;
851 if (pr->pr_nprocs == 0)
852 pr->pr_state = PRISON_STATE_DYING;
853 mtx_unlock(&pr->pr_mtx);
854 }
855
856
857 #ifdef INET
858 /*
859 * Pass back primary IPv4 address of this jail.
860 *
861 * If not jailed return success but do not alter the address. Caller has to
862 * make sure to intialize it correctly (e.g. INADDR_ANY).
863 *
864 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
865 * Address returned in NBO.
866 */
867 int
868 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
869 {
870
871 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
872 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
873
874 if (!jailed(cred))
875 /* Do not change address passed in. */
876 return (0);
877
878 if (cred->cr_prison->pr_ip4 == NULL)
879 return (EAFNOSUPPORT);
880
881 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
882 return (0);
883 }
884
885 /*
886 * Return 1 if we should do proper source address selection or are not jailed.
887 * We will return 0 if we should bypass source address selection in favour
888 * of the primary jail IPv4 address. Only in this case *ia will be updated and
889 * returned in NBO.
890 * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
891 */
892 int
893 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
894 {
895 struct in_addr lia;
896 int error;
897
898 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
899 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
900
901 if (!jailed(cred))
902 return (1);
903
904 if (jail_ip4_saddrsel != 0)
905 return (1);
906
907 lia.s_addr = INADDR_ANY;
908 error = prison_get_ip4(cred, &lia);
909 if (error)
910 return (error);
911 if (lia.s_addr == INADDR_ANY)
912 return (1);
913
914 ia->s_addr = lia.s_addr;
915 return (0);
916 }
917
918 /*
919 * Make sure our (source) address is set to something meaningful to this
920 * jail.
921 *
922 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
923 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
924 * Address passed in in NBO and returned in NBO.
925 */
926 int
927 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
928 {
929 struct in_addr ia0;
930
931 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
932 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
933
934 if (!jailed(cred))
935 return (0);
936 if (cred->cr_prison->pr_ip4 == NULL)
937 return (EAFNOSUPPORT);
938
939 ia0.s_addr = ntohl(ia->s_addr);
940 if (ia0.s_addr == INADDR_LOOPBACK) {
941 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
942 return (0);
943 }
944
945 if (ia0.s_addr == INADDR_ANY) {
946 /*
947 * In case there is only 1 IPv4 address, bind directly.
948 */
949 if (cred->cr_prison->pr_ip4s == 1)
950 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
951 return (0);
952 }
953
954 return (_prison_check_ip4(cred->cr_prison, ia));
955 }
956
957 /*
958 * Rewrite destination address in case we will connect to loopback address.
959 *
960 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
961 * Address passed in in NBO and returned in NBO.
962 */
963 int
964 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
965 {
966
967 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
968 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
969
970 if (!jailed(cred))
971 return (0);
972 if (cred->cr_prison->pr_ip4 == NULL)
973 return (EAFNOSUPPORT);
974
975 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
976 ia->s_addr = cred->cr_prison->pr_ip4[0].s_addr;
977 return (0);
978 }
979
980 /*
981 * Return success because nothing had to be changed.
982 */
983 return (0);
984 }
985
986 /*
987 * Check if given address belongs to the jail referenced by cred/prison.
988 *
989 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
990 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv4.
991 * Address passed in in NBO.
992 */
993 static int
994 _prison_check_ip4(struct prison *pr, struct in_addr *ia)
995 {
996 int i, a, z, d;
997
998 /*
999 * Check the primary IP.
1000 */
1001 if (pr->pr_ip4[0].s_addr == ia->s_addr)
1002 return (0);
1003
1004 /*
1005 * All the other IPs are sorted so we can do a binary search.
1006 */
1007 a = 0;
1008 z = pr->pr_ip4s - 2;
1009 while (a <= z) {
1010 i = (a + z) / 2;
1011 d = qcmp_v4(&pr->pr_ip4[i+1], ia);
1012 if (d > 0)
1013 z = i - 1;
1014 else if (d < 0)
1015 a = i + 1;
1016 else
1017 return (0);
1018 }
1019
1020 return (EADDRNOTAVAIL);
1021 }
1022
1023 int
1024 prison_check_ip4(struct ucred *cred, struct in_addr *ia)
1025 {
1026
1027 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1028 KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
1029
1030 if (!jailed(cred))
1031 return (0);
1032 if (cred->cr_prison->pr_ip4 == NULL)
1033 return (EAFNOSUPPORT);
1034
1035 return (_prison_check_ip4(cred->cr_prison, ia));
1036 }
1037 #endif
1038
1039 #ifdef INET6
1040 /*
1041 * Pass back primary IPv6 address for this jail.
1042 *
1043 * If not jailed return success but do not alter the address. Caller has to
1044 * make sure to intialize it correctly (e.g. IN6ADDR_ANY_INIT).
1045 *
1046 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
1047 */
1048 int
1049 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
1050 {
1051
1052 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1053 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1054
1055 if (!jailed(cred))
1056 return (0);
1057 if (cred->cr_prison->pr_ip6 == NULL)
1058 return (EAFNOSUPPORT);
1059
1060 bcopy(&cred->cr_prison->pr_ip6[0], ia6, sizeof(struct in6_addr));
1061 return (0);
1062 }
1063
1064 /*
1065 * Return 1 if we should do proper source address selection or are not jailed.
1066 * We will return 0 if we should bypass source address selection in favour
1067 * of the primary jail IPv6 address. Only in this case *ia will be updated and
1068 * returned in NBO.
1069 * Return EAFNOSUPPORT, in case this jail does not allow IPv6.
1070 */
1071 int
1072 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6)
1073 {
1074 struct in6_addr lia6;
1075 int error;
1076
1077 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1078 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1079
1080 if (!jailed(cred))
1081 return (1);
1082
1083 if (jail_ip6_saddrsel != 0)
1084 return (1);
1085
1086 lia6 = in6addr_any;
1087 error = prison_get_ip6(cred, &lia6);
1088 if (error)
1089 return (error);
1090 if (IN6_IS_ADDR_UNSPECIFIED(&lia6))
1091 return (1);
1092
1093 bcopy(&lia6, ia6, sizeof(struct in6_addr));
1094 return (0);
1095 }
1096
1097 /*
1098 * Make sure our (source) address is set to something meaningful to this jail.
1099 *
1100 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
1101 * when needed while binding.
1102 *
1103 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1104 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
1105 */
1106 int
1107 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
1108 {
1109
1110 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1111 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1112
1113 if (!jailed(cred))
1114 return (0);
1115 if (cred->cr_prison->pr_ip6 == NULL)
1116 return (EAFNOSUPPORT);
1117
1118 if (IN6_IS_ADDR_LOOPBACK(ia6)) {
1119 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1120 sizeof(struct in6_addr));
1121 return (0);
1122 }
1123
1124 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
1125 /*
1126 * In case there is only 1 IPv6 address, and v6only is true,
1127 * then bind directly.
1128 */
1129 if (v6only != 0 && cred->cr_prison->pr_ip6s == 1)
1130 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1131 sizeof(struct in6_addr));
1132 return (0);
1133 }
1134
1135 return (_prison_check_ip6(cred->cr_prison, ia6));
1136 }
1137
1138 /*
1139 * Rewrite destination address in case we will connect to loopback address.
1140 *
1141 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
1142 */
1143 int
1144 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
1145 {
1146
1147 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1148 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1149
1150 if (!jailed(cred))
1151 return (0);
1152 if (cred->cr_prison->pr_ip6 == NULL)
1153 return (EAFNOSUPPORT);
1154
1155 if (IN6_IS_ADDR_LOOPBACK(ia6)) {
1156 bcopy(&cred->cr_prison->pr_ip6[0], ia6,
1157 sizeof(struct in6_addr));
1158 return (0);
1159 }
1160
1161 /*
1162 * Return success because nothing had to be changed.
1163 */
1164 return (0);
1165 }
1166
1167 /*
1168 * Check if given address belongs to the jail referenced by cred/prison.
1169 *
1170 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1171 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow IPv6.
1172 */
1173 static int
1174 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
1175 {
1176 int i, a, z, d;
1177
1178 /*
1179 * Check the primary IP.
1180 */
1181 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
1182 return (0);
1183
1184 /*
1185 * All the other IPs are sorted so we can do a binary search.
1186 */
1187 a = 0;
1188 z = pr->pr_ip6s - 2;
1189 while (a <= z) {
1190 i = (a + z) / 2;
1191 d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
1192 if (d > 0)
1193 z = i - 1;
1194 else if (d < 0)
1195 a = i + 1;
1196 else
1197 return (0);
1198 }
1199
1200 return (EADDRNOTAVAIL);
1201 }
1202
1203 int
1204 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
1205 {
1206
1207 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1208 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
1209
1210 if (!jailed(cred))
1211 return (0);
1212 if (cred->cr_prison->pr_ip6 == NULL)
1213 return (EAFNOSUPPORT);
1214
1215 return (_prison_check_ip6(cred->cr_prison, ia6));
1216 }
1217 #endif
1218
1219 /*
1220 * Check if a jail supports the given address family.
1221 *
1222 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
1223 * if not.
1224 */
1225 int
1226 prison_check_af(struct ucred *cred, int af)
1227 {
1228 int error;
1229
1230 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1231
1232
1233 if (!jailed(cred))
1234 return (0);
1235
1236 error = 0;
1237 switch (af)
1238 {
1239 #ifdef INET
1240 case AF_INET:
1241 if (cred->cr_prison->pr_ip4 == NULL)
1242 error = EAFNOSUPPORT;
1243 break;
1244 #endif
1245 #ifdef INET6
1246 case AF_INET6:
1247 if (cred->cr_prison->pr_ip6 == NULL)
1248 error = EAFNOSUPPORT;
1249 break;
1250 #endif
1251 case AF_LOCAL:
1252 case AF_ROUTE:
1253 break;
1254 default:
1255 if (jail_socket_unixiproute_only)
1256 error = EAFNOSUPPORT;
1257 }
1258 return (error);
1259 }
1260
1261 /*
1262 * Check if given address belongs to the jail referenced by cred (wrapper to
1263 * prison_check_ip[46]).
1264 *
1265 * Returns 0 if not jailed or if address belongs to jail, EADDRNOTAVAIL if
1266 * the address doesn't belong, or EAFNOSUPPORT if the jail doesn't allow
1267 * the address family. IPv4 Address passed in in NBO.
1268 */
1269 int
1270 prison_if(struct ucred *cred, struct sockaddr *sa)
1271 {
1272 #ifdef INET
1273 struct sockaddr_in *sai;
1274 #endif
1275 #ifdef INET6
1276 struct sockaddr_in6 *sai6;
1277 #endif
1278 int error;
1279
1280 KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
1281 KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
1282
1283 error = 0;
1284 switch (sa->sa_family)
1285 {
1286 #ifdef INET
1287 case AF_INET:
1288 sai = (struct sockaddr_in *)sa;
1289 error = prison_check_ip4(cred, &sai->sin_addr);
1290 break;
1291 #endif
1292 #ifdef INET6
1293 case AF_INET6:
1294 sai6 = (struct sockaddr_in6 *)sa;
1295 error = prison_check_ip6(cred, &sai6->sin6_addr);
1296 break;
1297 #endif
1298 default:
1299 if (jailed(cred) && jail_socket_unixiproute_only)
1300 error = EAFNOSUPPORT;
1301 }
1302 return (error);
1303 }
1304
1305 /*
1306 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
1307 */
1308 int
1309 prison_check(struct ucred *cred1, struct ucred *cred2)
1310 {
1311
1312 if (jailed(cred1)) {
1313 if (!jailed(cred2))
1314 return (ESRCH);
1315 if (cred2->cr_prison != cred1->cr_prison)
1316 return (ESRCH);
1317 }
1318
1319 return (0);
1320 }
1321
1322 /*
1323 * Return 1 if the passed credential is in a jail, otherwise 0.
1324 */
1325 int
1326 jailed(struct ucred *cred)
1327 {
1328
1329 return (cred->cr_prison != NULL);
1330 }
1331
1332 /*
1333 * Return the correct hostname for the passed credential.
1334 */
1335 void
1336 getcredhostname(struct ucred *cred, char *buf, size_t size)
1337 {
1338
1339 if (jailed(cred)) {
1340 mtx_lock(&cred->cr_prison->pr_mtx);
1341 strlcpy(buf, cred->cr_prison->pr_host, size);
1342 mtx_unlock(&cred->cr_prison->pr_mtx);
1343 } else
1344 strlcpy(buf, hostname, size);
1345 }
1346
1347 /*
1348 * Determine whether the subject represented by cred can "see"
1349 * status of a mount point.
1350 * Returns: 0 for permitted, ENOENT otherwise.
1351 * XXX: This function should be called cr_canseemount() and should be
1352 * placed in kern_prot.c.
1353 */
1354 int
1355 prison_canseemount(struct ucred *cred, struct mount *mp)
1356 {
1357 struct prison *pr;
1358 struct statfs *sp;
1359 size_t len;
1360
1361 if (!jailed(cred) || jail_enforce_statfs == 0)
1362 return (0);
1363 pr = cred->cr_prison;
1364 if (pr->pr_root->v_mount == mp)
1365 return (0);
1366 if (jail_enforce_statfs == 2)
1367 return (ENOENT);
1368 /*
1369 * If jail's chroot directory is set to "/" we should be able to see
1370 * all mount-points from inside a jail.
1371 * This is ugly check, but this is the only situation when jail's
1372 * directory ends with '/'.
1373 */
1374 if (strcmp(pr->pr_path, "/") == 0)
1375 return (0);
1376 len = strlen(pr->pr_path);
1377 sp = &mp->mnt_stat;
1378 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
1379 return (ENOENT);
1380 /*
1381 * Be sure that we don't have situation where jail's root directory
1382 * is "/some/path" and mount point is "/some/pathpath".
1383 */
1384 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
1385 return (ENOENT);
1386 return (0);
1387 }
1388
1389 void
1390 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
1391 {
1392 char jpath[MAXPATHLEN];
1393 struct prison *pr;
1394 size_t len;
1395
1396 if (!jailed(cred) || jail_enforce_statfs == 0)
1397 return;
1398 pr = cred->cr_prison;
1399 if (prison_canseemount(cred, mp) != 0) {
1400 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1401 strlcpy(sp->f_mntonname, "[restricted]",
1402 sizeof(sp->f_mntonname));
1403 return;
1404 }
1405 if (pr->pr_root->v_mount == mp) {
1406 /*
1407 * Clear current buffer data, so we are sure nothing from
1408 * the valid path left there.
1409 */
1410 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1411 *sp->f_mntonname = '/';
1412 return;
1413 }
1414 /*
1415 * If jail's chroot directory is set to "/" we should be able to see
1416 * all mount-points from inside a jail.
1417 */
1418 if (strcmp(pr->pr_path, "/") == 0)
1419 return;
1420 len = strlen(pr->pr_path);
1421 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
1422 /*
1423 * Clear current buffer data, so we are sure nothing from
1424 * the valid path left there.
1425 */
1426 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1427 if (*jpath == '\0') {
1428 /* Should never happen. */
1429 *sp->f_mntonname = '/';
1430 } else {
1431 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
1432 }
1433 }
1434
1435 /*
1436 * Check with permission for a specific privilege is granted within jail. We
1437 * have a specific list of accepted privileges; the rest are denied.
1438 */
1439 int
1440 prison_priv_check(struct ucred *cred, int priv)
1441 {
1442
1443 if (!jailed(cred))
1444 return (0);
1445
1446 switch (priv) {
1447
1448 /*
1449 * Allow ktrace privileges for root in jail.
1450 */
1451 case PRIV_KTRACE:
1452
1453 #if 0
1454 /*
1455 * Allow jailed processes to configure audit identity and
1456 * submit audit records (login, etc). In the future we may
1457 * want to further refine the relationship between audit and
1458 * jail.
1459 */
1460 case PRIV_AUDIT_GETAUDIT:
1461 case PRIV_AUDIT_SETAUDIT:
1462 case PRIV_AUDIT_SUBMIT:
1463 #endif
1464
1465 /*
1466 * Allow jailed processes to manipulate process UNIX
1467 * credentials in any way they see fit.
1468 */
1469 case PRIV_CRED_SETUID:
1470 case PRIV_CRED_SETEUID:
1471 case PRIV_CRED_SETGID:
1472 case PRIV_CRED_SETEGID:
1473 case PRIV_CRED_SETGROUPS:
1474 case PRIV_CRED_SETREUID:
1475 case PRIV_CRED_SETREGID:
1476 case PRIV_CRED_SETRESUID:
1477 case PRIV_CRED_SETRESGID:
1478
1479 /*
1480 * Jail implements visibility constraints already, so allow
1481 * jailed root to override uid/gid-based constraints.
1482 */
1483 case PRIV_SEEOTHERGIDS:
1484 case PRIV_SEEOTHERUIDS:
1485
1486 /*
1487 * Jail implements inter-process debugging limits already, so
1488 * allow jailed root various debugging privileges.
1489 */
1490 case PRIV_DEBUG_DIFFCRED:
1491 case PRIV_DEBUG_SUGID:
1492 case PRIV_DEBUG_UNPRIV:
1493
1494 /*
1495 * Allow jail to set various resource limits and login
1496 * properties, and for now, exceed process resource limits.
1497 */
1498 case PRIV_PROC_LIMIT:
1499 case PRIV_PROC_SETLOGIN:
1500 case PRIV_PROC_SETRLIMIT:
1501
1502 /*
1503 * System V and POSIX IPC privileges are granted in jail.
1504 */
1505 case PRIV_IPC_READ:
1506 case PRIV_IPC_WRITE:
1507 case PRIV_IPC_ADMIN:
1508 case PRIV_IPC_MSGSIZE:
1509 case PRIV_MQ_ADMIN:
1510
1511 /*
1512 * Jail implements its own inter-process limits, so allow
1513 * root processes in jail to change scheduling on other
1514 * processes in the same jail. Likewise for signalling.
1515 */
1516 case PRIV_SCHED_DIFFCRED:
1517 case PRIV_SCHED_CPUSET:
1518 case PRIV_SIGNAL_DIFFCRED:
1519 case PRIV_SIGNAL_SUGID:
1520
1521 /*
1522 * Allow jailed processes to write to sysctls marked as jail
1523 * writable.
1524 */
1525 case PRIV_SYSCTL_WRITEJAIL:
1526
1527 /*
1528 * Allow root in jail to manage a variety of quota
1529 * properties. These should likely be conditional on a
1530 * configuration option.
1531 */
1532 case PRIV_VFS_GETQUOTA:
1533 case PRIV_VFS_SETQUOTA:
1534
1535 /*
1536 * Since Jail relies on chroot() to implement file system
1537 * protections, grant many VFS privileges to root in jail.
1538 * Be careful to exclude mount-related and NFS-related
1539 * privileges.
1540 */
1541 case PRIV_VFS_READ:
1542 case PRIV_VFS_WRITE:
1543 case PRIV_VFS_ADMIN:
1544 case PRIV_VFS_EXEC:
1545 case PRIV_VFS_LOOKUP:
1546 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */
1547 case PRIV_VFS_CHFLAGS_DEV:
1548 case PRIV_VFS_CHOWN:
1549 case PRIV_VFS_CHROOT:
1550 case PRIV_VFS_RETAINSUGID:
1551 case PRIV_VFS_FCHROOT:
1552 case PRIV_VFS_LINK:
1553 case PRIV_VFS_SETGID:
1554 case PRIV_VFS_STICKYFILE:
1555 return (0);
1556
1557 /*
1558 * Depending on the global setting, allow privilege of
1559 * setting system flags.
1560 */
1561 case PRIV_VFS_SYSFLAGS:
1562 if (jail_chflags_allowed)
1563 return (0);
1564 else
1565 return (EPERM);
1566
1567 /*
1568 * Depending on the global setting, allow privilege of
1569 * mounting/unmounting file systems.
1570 */
1571 case PRIV_VFS_MOUNT:
1572 case PRIV_VFS_UNMOUNT:
1573 case PRIV_VFS_MOUNT_NONUSER:
1574 case PRIV_VFS_MOUNT_OWNER:
1575 if (jail_mount_allowed)
1576 return (0);
1577 else
1578 return (EPERM);
1579
1580 /*
1581 * Allow jailed root to bind reserved ports and reuse in-use
1582 * ports.
1583 */
1584 case PRIV_NETINET_RESERVEDPORT:
1585 case PRIV_NETINET_REUSEPORT:
1586 return (0);
1587
1588 /*
1589 * Allow jailed root to set certian IPv4/6 (option) headers.
1590 */
1591 case PRIV_NETINET_SETHDROPTS:
1592 return (0);
1593
1594 /*
1595 * Conditionally allow creating raw sockets in jail.
1596 */
1597 case PRIV_NETINET_RAW:
1598 if (jail_allow_raw_sockets)
1599 return (0);
1600 else
1601 return (EPERM);
1602
1603 /*
1604 * Since jail implements its own visibility limits on netstat
1605 * sysctls, allow getcred. This allows identd to work in
1606 * jail.
1607 */
1608 case PRIV_NETINET_GETCRED:
1609 return (0);
1610
1611 default:
1612 /*
1613 * In all remaining cases, deny the privilege request. This
1614 * includes almost all network privileges, many system
1615 * configuration privileges.
1616 */
1617 return (EPERM);
1618 }
1619 }
1620
1621 /*
1622 * Register jail service. Provides 'create' and 'destroy' methods.
1623 * 'create' method will be called for every existing jail and all
1624 * jails in the future as they beeing created.
1625 * 'destroy' method will be called for every jail going away and
1626 * for all existing jails at the time of service deregistration.
1627 */
1628 struct prison_service *
1629 prison_service_register(const char *name, prison_create_t create,
1630 prison_destroy_t destroy)
1631 {
1632 struct prison_service *psrv, *psrv2;
1633 struct prison *pr;
1634 int reallocate = 1, slotno = 0;
1635 void **slots, **oldslots;
1636
1637 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
1638 M_WAITOK | M_ZERO);
1639 psrv->ps_create = create;
1640 psrv->ps_destroy = destroy;
1641 strcpy(psrv->ps_name, name);
1642 /*
1643 * Grab the allprison_lock here, so we won't miss any jail
1644 * creation/destruction.
1645 */
1646 sx_xlock(&allprison_lock);
1647 #ifdef INVARIANTS
1648 /*
1649 * Verify if service is not already registered.
1650 */
1651 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
1652 KASSERT(strcmp(psrv2->ps_name, name) != 0,
1653 ("jail service %s already registered", name));
1654 }
1655 #endif
1656 /*
1657 * Find free slot. When there is no existing free slot available,
1658 * allocate one at the end.
1659 */
1660 TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
1661 if (psrv2->ps_slotno != slotno) {
1662 KASSERT(slotno < psrv2->ps_slotno,
1663 ("Invalid slotno (slotno=%d >= ps_slotno=%d",
1664 slotno, psrv2->ps_slotno));
1665 /* We found free slot. */
1666 reallocate = 0;
1667 break;
1668 }
1669 slotno++;
1670 }
1671 psrv->ps_slotno = slotno;
1672 /*
1673 * Keep the list sorted by slot number.
1674 */
1675 if (psrv2 != NULL) {
1676 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
1677 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
1678 } else {
1679 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
1680 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
1681 }
1682 prison_service_slots++;
1683 sx_downgrade(&allprison_lock);
1684 /*
1685 * Allocate memory for new slot if we didn't found empty one.
1686 * Do not use realloc(9), because pr_slots is protected with a mutex,
1687 * so we can't sleep.
1688 */
1689 LIST_FOREACH(pr, &allprison, pr_list) {
1690 if (reallocate) {
1691 /* First allocate memory with M_WAITOK. */
1692 slots = malloc(sizeof(*slots) * prison_service_slots,
1693 M_PRISON, M_WAITOK);
1694 /* Now grab the mutex and replace pr_slots. */
1695 mtx_lock(&pr->pr_mtx);
1696 oldslots = pr->pr_slots;
1697 if (psrv->ps_slotno > 0) {
1698 bcopy(oldslots, slots,
1699 sizeof(*slots) * (prison_service_slots - 1));
1700 }
1701 slots[psrv->ps_slotno] = NULL;
1702 pr->pr_slots = slots;
1703 mtx_unlock(&pr->pr_mtx);
1704 if (oldslots != NULL)
1705 free(oldslots, M_PRISON);
1706 }
1707 /*
1708 * Call 'create' method for each existing jail.
1709 */
1710 psrv->ps_create(psrv, pr);
1711 }
1712 sx_sunlock(&allprison_lock);
1713
1714 return (psrv);
1715 }
1716
1717 void
1718 prison_service_deregister(struct prison_service *psrv)
1719 {
1720 struct prison *pr;
1721 void **slots, **oldslots;
1722 int last = 0;
1723
1724 sx_xlock(&allprison_lock);
1725 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
1726 last = 1;
1727 TAILQ_REMOVE(&prison_services, psrv, ps_next);
1728 prison_service_slots--;
1729 sx_downgrade(&allprison_lock);
1730 LIST_FOREACH(pr, &allprison, pr_list) {
1731 /*
1732 * Call 'destroy' method for every currently existing jail.
1733 */
1734 psrv->ps_destroy(psrv, pr);
1735 /*
1736 * If this is the last slot, free the memory allocated for it.
1737 */
1738 if (last) {
1739 if (prison_service_slots == 0)
1740 slots = NULL;
1741 else {
1742 slots = malloc(sizeof(*slots) * prison_service_slots,
1743 M_PRISON, M_WAITOK);
1744 }
1745 mtx_lock(&pr->pr_mtx);
1746 oldslots = pr->pr_slots;
1747 /*
1748 * We require setting slot to NULL after freeing it,
1749 * this way we can check for memory leaks here.
1750 */
1751 KASSERT(oldslots[psrv->ps_slotno] == NULL,
1752 ("Slot %d (service %s, jailid=%d) still contains data?",
1753 psrv->ps_slotno, psrv->ps_name, pr->pr_id));
1754 if (psrv->ps_slotno > 0) {
1755 bcopy(oldslots, slots,
1756 sizeof(*slots) * prison_service_slots);
1757 }
1758 pr->pr_slots = slots;
1759 mtx_unlock(&pr->pr_mtx);
1760 KASSERT(oldslots != NULL, ("oldslots == NULL"));
1761 free(oldslots, M_PRISON);
1762 }
1763 }
1764 sx_sunlock(&allprison_lock);
1765 free(psrv, M_PRISON);
1766 }
1767
1768 /*
1769 * Function sets data for the given jail in slot assigned for the given
1770 * jail service.
1771 */
1772 void
1773 prison_service_data_set(struct prison_service *psrv, struct prison *pr,
1774 void *data)
1775 {
1776
1777 mtx_assert(&pr->pr_mtx, MA_OWNED);
1778 pr->pr_slots[psrv->ps_slotno] = data;
1779 }
1780
1781 /*
1782 * Function clears slots assigned for the given jail service in the given
1783 * prison structure and returns current slot data.
1784 */
1785 void *
1786 prison_service_data_del(struct prison_service *psrv, struct prison *pr)
1787 {
1788 void *data;
1789
1790 mtx_assert(&pr->pr_mtx, MA_OWNED);
1791 data = pr->pr_slots[psrv->ps_slotno];
1792 pr->pr_slots[psrv->ps_slotno] = NULL;
1793 return (data);
1794 }
1795
1796 /*
1797 * Function returns current data from the slot assigned to the given jail
1798 * service for the given jail.
1799 */
1800 void *
1801 prison_service_data_get(struct prison_service *psrv, struct prison *pr)
1802 {
1803
1804 mtx_assert(&pr->pr_mtx, MA_OWNED);
1805 return (pr->pr_slots[psrv->ps_slotno]);
1806 }
1807
1808 static int
1809 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
1810 {
1811 struct xprison *xp, *sxp;
1812 struct prison *pr;
1813 char *p;
1814 size_t len;
1815 int count, error;
1816
1817 if (jailed(req->td->td_ucred))
1818 return (0);
1819
1820 sx_slock(&allprison_lock);
1821 if ((count = prisoncount) == 0) {
1822 sx_sunlock(&allprison_lock);
1823 return (0);
1824 }
1825
1826 len = sizeof(*xp) * count;
1827 LIST_FOREACH(pr, &allprison, pr_list) {
1828 #ifdef INET
1829 len += pr->pr_ip4s * sizeof(struct in_addr);
1830 #endif
1831 #ifdef INET6
1832 len += pr->pr_ip6s * sizeof(struct in6_addr);
1833 #endif
1834 }
1835
1836 sxp = xp = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
1837
1838 LIST_FOREACH(pr, &allprison, pr_list) {
1839 xp->pr_version = XPRISON_VERSION;
1840 xp->pr_id = pr->pr_id;
1841 xp->pr_state = pr->pr_state;
1842 xp->pr_cpusetid = pr->pr_cpuset->cs_id;
1843 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
1844 mtx_lock(&pr->pr_mtx);
1845 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
1846 strlcpy(xp->pr_name, pr->pr_name, sizeof(xp->pr_name));
1847 mtx_unlock(&pr->pr_mtx);
1848 #ifdef INET
1849 xp->pr_ip4s = pr->pr_ip4s;
1850 #endif
1851 #ifdef INET6
1852 xp->pr_ip6s = pr->pr_ip6s;
1853 #endif
1854 p = (char *)(xp + 1);
1855 #ifdef INET
1856 if (pr->pr_ip4s > 0) {
1857 bcopy(pr->pr_ip4, (struct in_addr *)p,
1858 pr->pr_ip4s * sizeof(struct in_addr));
1859 p += (pr->pr_ip4s * sizeof(struct in_addr));
1860 }
1861 #endif
1862 #ifdef INET6
1863 if (pr->pr_ip6s > 0) {
1864 bcopy(pr->pr_ip6, (struct in6_addr *)p,
1865 pr->pr_ip6s * sizeof(struct in6_addr));
1866 p += (pr->pr_ip6s * sizeof(struct in6_addr));
1867 }
1868 #endif
1869 xp = (struct xprison *)p;
1870 }
1871 sx_sunlock(&allprison_lock);
1872
1873 error = SYSCTL_OUT(req, sxp, len);
1874 free(sxp, M_TEMP);
1875 return (error);
1876 }
1877
1878 SYSCTL_OID(_security_jail, OID_AUTO, list,
1879 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1880 sysctl_jail_list, "S", "List of active jails");
1881
1882 static int
1883 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
1884 {
1885 int error, injail;
1886
1887 injail = jailed(req->td->td_ucred);
1888 error = SYSCTL_OUT(req, &injail, sizeof(injail));
1889
1890 return (error);
1891 }
1892 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
1893 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1894 sysctl_jail_jailed, "I", "Process in jail?");
1895
1896 #ifdef DDB
1897 DB_SHOW_COMMAND(jails, db_show_jails)
1898 {
1899 struct prison *pr;
1900 #ifdef INET
1901 struct in_addr ia;
1902 #endif
1903 #ifdef INET6
1904 char ip6buf[INET6_ADDRSTRLEN];
1905 #endif
1906 const char *state;
1907 #if defined(INET) || defined(INET6)
1908 int i;
1909 #endif
1910
1911 db_printf(
1912 " JID pr_ref pr_nprocs pr_ip4s pr_ip6s\n");
1913 db_printf(
1914 " Hostname Path\n");
1915 db_printf(
1916 " Name State\n");
1917 db_printf(
1918 " Cpusetid\n");
1919 db_printf(
1920 " IP Address(es)\n");
1921 LIST_FOREACH(pr, &allprison, pr_list) {
1922 db_printf("%6d %6d %9d %7d %7d\n",
1923 pr->pr_id, pr->pr_ref, pr->pr_nprocs,
1924 pr->pr_ip4s, pr->pr_ip6s);
1925 db_printf("%6s %-29.29s %.74s\n",
1926 "", pr->pr_host, pr->pr_path);
1927 if (pr->pr_state < 0 || pr->pr_state >= (int)((sizeof(
1928 prison_states) / sizeof(struct prison_state))))
1929 state = "(bogus)";
1930 else
1931 state = prison_states[pr->pr_state].state_name;
1932 db_printf("%6s %-29.29s %.74s\n",
1933 "", (pr->pr_name[0] != '\0') ? pr->pr_name : "", state);
1934 db_printf("%6s %-6d\n",
1935 "", pr->pr_cpuset->cs_id);
1936 #ifdef INET
1937 for (i=0; i < pr->pr_ip4s; i++) {
1938 ia.s_addr = pr->pr_ip4[i].s_addr;
1939 db_printf("%6s %s\n", "", inet_ntoa(ia));
1940 }
1941 #endif
1942 #ifdef INET6
1943 for (i=0; i < pr->pr_ip6s; i++)
1944 db_printf("%6s %s\n",
1945 "", ip6_sprintf(ip6buf, &pr->pr_ip6[i]));
1946 #endif /* INET6 */
1947 if (db_pager_quit)
1948 break;
1949 }
1950 }
1951 #endif /* DDB */
Cache object: 0c87045209605d41bd04965502b2d90e
|