1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 1999 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/sdt.h>
35 #include <sys/systm.h>
36 #include <sys/sysctl.h>
37 #include <sys/proc.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/jail.h>
41 #include <sys/lock.h>
42 #include <sys/sx.h>
43
44 #include <compat/linux/linux_mib.h>
45 #include <compat/linux/linux_misc.h>
46
47 struct linux_prison {
48 char pr_osname[LINUX_MAX_UTSNAME];
49 char pr_osrelease[LINUX_MAX_UTSNAME];
50 int pr_oss_version;
51 int pr_osrel;
52 };
53
54 static struct linux_prison lprison0 = {
55 .pr_osname = "Linux",
56 .pr_osrelease = LINUX_VERSION_STR,
57 .pr_oss_version = 0x030600,
58 .pr_osrel = LINUX_VERSION_CODE
59 };
60
61 static unsigned linux_osd_jail_slot;
62
63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
64 "Linux mode");
65
66 int linux_debug = 3;
67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
68 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
69
70 int linux_default_openfiles = 1024;
71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
72 &linux_default_openfiles, 0,
73 "Default soft openfiles resource limit, or -1 for unlimited");
74
75 int linux_default_stacksize = 8 * 1024 * 1024;
76 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
77 &linux_default_stacksize, 0,
78 "Default soft stack size resource limit, or -1 for unlimited");
79
80 int linux_dummy_rlimits = 0;
81 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
82 &linux_dummy_rlimits, 0,
83 "Return dummy values for unsupported Linux-specific rlimits");
84
85 int linux_ignore_ip_recverr = 1;
86 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
87 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
88
89 int linux_preserve_vstatus = 1;
90 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
91 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
92
93 bool linux_map_sched_prio = true;
94 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
95 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
96 "(not POSIX compliant)");
97
98 int linux_use_emul_path = 1;
99 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
100 &linux_use_emul_path, 0, "Use linux.compat.emul_path");
101
102 static bool linux_setid_allowed = true;
103 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
104 &linux_setid_allowed, 0,
105 "Allow setuid/setgid on execve of Linux binary");
106
107 int
108 linux_setid_allowed_query(struct thread *td __unused,
109 struct image_params *imgp __unused)
110 {
111 return (linux_setid_allowed);
112 }
113
114 static int linux_set_osname(struct thread *td, char *osname);
115 static int linux_set_osrelease(struct thread *td, char *osrelease);
116 static int linux_set_oss_version(struct thread *td, int oss_version);
117
118 static int
119 linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
120 {
121 char osname[LINUX_MAX_UTSNAME];
122 int error;
123
124 linux_get_osname(req->td, osname);
125 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
126 if (error != 0 || req->newptr == NULL)
127 return (error);
128 error = linux_set_osname(req->td, osname);
129
130 return (error);
131 }
132
133 SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
134 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
135 0, 0, linux_sysctl_osname, "A",
136 "Linux kernel OS name");
137
138 static int
139 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
140 {
141 char osrelease[LINUX_MAX_UTSNAME];
142 int error;
143
144 linux_get_osrelease(req->td, osrelease);
145 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
146 if (error != 0 || req->newptr == NULL)
147 return (error);
148 error = linux_set_osrelease(req->td, osrelease);
149
150 return (error);
151 }
152
153 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
154 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
155 0, 0, linux_sysctl_osrelease, "A",
156 "Linux kernel OS release");
157
158 static int
159 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
160 {
161 int oss_version;
162 int error;
163
164 oss_version = linux_get_oss_version(req->td);
165 error = sysctl_handle_int(oidp, &oss_version, 0, req);
166 if (error != 0 || req->newptr == NULL)
167 return (error);
168 error = linux_set_oss_version(req->td, oss_version);
169
170 return (error);
171 }
172
173 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
174 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
175 0, 0, linux_sysctl_oss_version, "I",
176 "Linux OSS version");
177
178 /*
179 * Map the osrelease into integer
180 */
181 static int
182 linux_map_osrel(char *osrelease, int *osrel)
183 {
184 char *sep, *eosrelease;
185 int len, v0, v1, v2, v;
186
187 len = strlen(osrelease);
188 eosrelease = osrelease + len;
189 v0 = strtol(osrelease, &sep, 10);
190 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
191 return (EINVAL);
192 osrelease = sep + 1;
193 v1 = strtol(osrelease, &sep, 10);
194 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
195 return (EINVAL);
196 osrelease = sep + 1;
197 v2 = strtol(osrelease, &sep, 10);
198 if (osrelease == sep ||
199 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
200 return (EINVAL);
201
202 v = LINUX_KERNVER(v0, v1, v2);
203 if (v < LINUX_KERNVER(1, 0, 0))
204 return (EINVAL);
205
206 if (osrel != NULL)
207 *osrel = v;
208
209 return (0);
210 }
211
212 /*
213 * Find a prison with Linux info.
214 * Return the Linux info and the (locked) prison.
215 */
216 static struct linux_prison *
217 linux_find_prison(struct prison *spr, struct prison **prp)
218 {
219 struct prison *pr;
220 struct linux_prison *lpr;
221
222 for (pr = spr;; pr = pr->pr_parent) {
223 mtx_lock(&pr->pr_mtx);
224 lpr = (pr == &prison0)
225 ? &lprison0
226 : osd_jail_get(pr, linux_osd_jail_slot);
227 if (lpr != NULL)
228 break;
229 mtx_unlock(&pr->pr_mtx);
230 }
231 *prp = pr;
232
233 return (lpr);
234 }
235
236 /*
237 * Ensure a prison has its own Linux info. If lprp is non-null, point it to
238 * the Linux info and lock the prison.
239 */
240 static void
241 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
242 {
243 struct prison *ppr;
244 struct linux_prison *lpr, *nlpr;
245 void **rsv;
246
247 /* If this prison already has Linux info, return that. */
248 lpr = linux_find_prison(pr, &ppr);
249 if (ppr == pr)
250 goto done;
251 /*
252 * Allocate a new info record. Then check again, in case something
253 * changed during the allocation.
254 */
255 mtx_unlock(&ppr->pr_mtx);
256 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
257 rsv = osd_reserve(linux_osd_jail_slot);
258 lpr = linux_find_prison(pr, &ppr);
259 if (ppr == pr) {
260 free(nlpr, M_PRISON);
261 osd_free_reserved(rsv);
262 goto done;
263 }
264 /* Inherit the initial values from the ancestor. */
265 mtx_lock(&pr->pr_mtx);
266 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
267 bcopy(lpr, nlpr, sizeof(*lpr));
268 lpr = nlpr;
269 mtx_unlock(&ppr->pr_mtx);
270 done:
271 if (lprp != NULL)
272 *lprp = lpr;
273 else
274 mtx_unlock(&pr->pr_mtx);
275 }
276
277 /*
278 * Jail OSD methods for Linux prison data.
279 */
280 static int
281 linux_prison_create(void *obj, void *data)
282 {
283 struct prison *pr = obj;
284 struct vfsoptlist *opts = data;
285 int jsys;
286
287 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
288 jsys == JAIL_SYS_INHERIT)
289 return (0);
290 /*
291 * Inherit a prison's initial values from its parent
292 * (different from JAIL_SYS_INHERIT which also inherits changes).
293 */
294 linux_alloc_prison(pr, NULL);
295 return (0);
296 }
297
298 static int
299 linux_prison_check(void *obj __unused, void *data)
300 {
301 struct vfsoptlist *opts = data;
302 char *osname, *osrelease;
303 int error, jsys, len, oss_version;
304
305 /* Check that the parameters are correct. */
306 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
307 if (error != ENOENT) {
308 if (error != 0)
309 return (error);
310 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
311 return (EINVAL);
312 }
313 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
314 if (error != ENOENT) {
315 if (error != 0)
316 return (error);
317 if (len == 0 || osname[len - 1] != '\0')
318 return (EINVAL);
319 if (len > LINUX_MAX_UTSNAME) {
320 vfs_opterror(opts, "linux.osname too long");
321 return (ENAMETOOLONG);
322 }
323 }
324 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
325 if (error != ENOENT) {
326 if (error != 0)
327 return (error);
328 if (len == 0 || osrelease[len - 1] != '\0')
329 return (EINVAL);
330 if (len > LINUX_MAX_UTSNAME) {
331 vfs_opterror(opts, "linux.osrelease too long");
332 return (ENAMETOOLONG);
333 }
334 error = linux_map_osrel(osrelease, NULL);
335 if (error != 0) {
336 vfs_opterror(opts, "linux.osrelease format error");
337 return (error);
338 }
339 }
340 error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
341 sizeof(oss_version));
342
343 if (error == ENOENT)
344 error = 0;
345 return (error);
346 }
347
348 static int
349 linux_prison_set(void *obj, void *data)
350 {
351 struct linux_prison *lpr;
352 struct prison *pr = obj;
353 struct vfsoptlist *opts = data;
354 char *osname, *osrelease;
355 int error, gotversion, jsys, len, oss_version;
356
357 /* Set the parameters, which should be correct. */
358 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
359 if (error == ENOENT)
360 jsys = -1;
361 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
362 if (error == ENOENT)
363 osname = NULL;
364 else
365 jsys = JAIL_SYS_NEW;
366 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
367 if (error == ENOENT)
368 osrelease = NULL;
369 else
370 jsys = JAIL_SYS_NEW;
371 error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
372 sizeof(oss_version));
373 if (error == ENOENT)
374 gotversion = 0;
375 else {
376 gotversion = 1;
377 jsys = JAIL_SYS_NEW;
378 }
379 switch (jsys) {
380 case JAIL_SYS_INHERIT:
381 /* "linux=inherit": inherit the parent's Linux info. */
382 mtx_lock(&pr->pr_mtx);
383 osd_jail_del(pr, linux_osd_jail_slot);
384 mtx_unlock(&pr->pr_mtx);
385 break;
386 case JAIL_SYS_NEW:
387 /*
388 * "linux=new" or "linux.*":
389 * the prison gets its own Linux info.
390 */
391 linux_alloc_prison(pr, &lpr);
392 if (osrelease) {
393 (void)linux_map_osrel(osrelease, &lpr->pr_osrel);
394 strlcpy(lpr->pr_osrelease, osrelease,
395 LINUX_MAX_UTSNAME);
396 }
397 if (osname)
398 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
399 if (gotversion)
400 lpr->pr_oss_version = oss_version;
401 mtx_unlock(&pr->pr_mtx);
402 }
403
404 return (0);
405 }
406
407 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
408 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
409 "Jail Linux kernel OS name");
410 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
411 "Jail Linux kernel OS release");
412 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
413 "I", "Jail Linux OSS version");
414
415 static int
416 linux_prison_get(void *obj, void *data)
417 {
418 struct linux_prison *lpr;
419 struct prison *ppr;
420 struct prison *pr = obj;
421 struct vfsoptlist *opts = data;
422 int error, i;
423
424 static int version0;
425
426 /* See if this prison is the one with the Linux info. */
427 lpr = linux_find_prison(pr, &ppr);
428 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
429 error = vfs_setopt(opts, "linux", &i, sizeof(i));
430 if (error != 0 && error != ENOENT)
431 goto done;
432 if (i) {
433 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
434 if (error != 0 && error != ENOENT)
435 goto done;
436 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
437 if (error != 0 && error != ENOENT)
438 goto done;
439 error = vfs_setopt(opts, "linux.oss_version",
440 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
441 if (error != 0 && error != ENOENT)
442 goto done;
443 } else {
444 /*
445 * If this prison is inheriting its Linux info, report
446 * empty/zero parameters.
447 */
448 error = vfs_setopts(opts, "linux.osname", "");
449 if (error != 0 && error != ENOENT)
450 goto done;
451 error = vfs_setopts(opts, "linux.osrelease", "");
452 if (error != 0 && error != ENOENT)
453 goto done;
454 error = vfs_setopt(opts, "linux.oss_version", &version0,
455 sizeof(lpr->pr_oss_version));
456 if (error != 0 && error != ENOENT)
457 goto done;
458 }
459 error = 0;
460
461 done:
462 mtx_unlock(&ppr->pr_mtx);
463
464 return (error);
465 }
466
467 static void
468 linux_prison_destructor(void *data)
469 {
470
471 free(data, M_PRISON);
472 }
473
474 void
475 linux_osd_jail_register(void)
476 {
477 struct prison *pr;
478 osd_method_t methods[PR_MAXMETHOD] = {
479 [PR_METHOD_CREATE] = linux_prison_create,
480 [PR_METHOD_GET] = linux_prison_get,
481 [PR_METHOD_SET] = linux_prison_set,
482 [PR_METHOD_CHECK] = linux_prison_check
483 };
484
485 linux_osd_jail_slot =
486 osd_jail_register(linux_prison_destructor, methods);
487 /* Copy the system Linux info to any current prisons. */
488 sx_slock(&allprison_lock);
489 TAILQ_FOREACH(pr, &allprison, pr_list)
490 linux_alloc_prison(pr, NULL);
491 sx_sunlock(&allprison_lock);
492 }
493
494 void
495 linux_osd_jail_deregister(void)
496 {
497
498 osd_jail_deregister(linux_osd_jail_slot);
499 }
500
501 void
502 linux_get_osname(struct thread *td, char *dst)
503 {
504 struct prison *pr;
505 struct linux_prison *lpr;
506
507 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
508 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
509 mtx_unlock(&pr->pr_mtx);
510 }
511
512 static int
513 linux_set_osname(struct thread *td, char *osname)
514 {
515 struct prison *pr;
516 struct linux_prison *lpr;
517
518 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
519 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
520 mtx_unlock(&pr->pr_mtx);
521
522 return (0);
523 }
524
525 void
526 linux_get_osrelease(struct thread *td, char *dst)
527 {
528 struct prison *pr;
529 struct linux_prison *lpr;
530
531 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
532 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
533 mtx_unlock(&pr->pr_mtx);
534 }
535
536 int
537 linux_kernver(struct thread *td)
538 {
539 struct prison *pr;
540 struct linux_prison *lpr;
541 int osrel;
542
543 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
544 osrel = lpr->pr_osrel;
545 mtx_unlock(&pr->pr_mtx);
546
547 return (osrel);
548 }
549
550 static int
551 linux_set_osrelease(struct thread *td, char *osrelease)
552 {
553 struct prison *pr;
554 struct linux_prison *lpr;
555 int error;
556
557 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
558 error = linux_map_osrel(osrelease, &lpr->pr_osrel);
559 if (error == 0)
560 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
561 mtx_unlock(&pr->pr_mtx);
562
563 return (error);
564 }
565
566 int
567 linux_get_oss_version(struct thread *td)
568 {
569 struct prison *pr;
570 struct linux_prison *lpr;
571 int version;
572
573 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
574 version = lpr->pr_oss_version;
575 mtx_unlock(&pr->pr_mtx);
576
577 return (version);
578 }
579
580 static int
581 linux_set_oss_version(struct thread *td, int oss_version)
582 {
583 struct prison *pr;
584 struct linux_prison *lpr;
585
586 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
587 lpr->pr_oss_version = oss_version;
588 mtx_unlock(&pr->pr_mtx);
589
590 return (0);
591 }
Cache object: 9090e0a2bffb32f4231e17532f6bf558
|