1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1994-1995 Søren Schmidt
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: releng/12.0/sys/compat/linux/linux_misc.c 336914 2018-07-30 15:46:40Z asomers $");
34
35 #include "opt_compat.h"
36
37 #include <sys/param.h>
38 #include <sys/blist.h>
39 #include <sys/fcntl.h>
40 #if defined(__i386__)
41 #include <sys/imgact_aout.h>
42 #endif
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/limits.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mman.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/namei.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/reboot.h>
55 #include <sys/racct.h>
56 #include <sys/random.h>
57 #include <sys/resourcevar.h>
58 #include <sys/sched.h>
59 #include <sys/sdt.h>
60 #include <sys/signalvar.h>
61 #include <sys/stat.h>
62 #include <sys/syscallsubr.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysproto.h>
65 #include <sys/systm.h>
66 #include <sys/time.h>
67 #include <sys/vmmeter.h>
68 #include <sys/vnode.h>
69 #include <sys/wait.h>
70 #include <sys/cpuset.h>
71 #include <sys/uio.h>
72
73 #include <security/mac/mac_framework.h>
74
75 #include <vm/vm.h>
76 #include <vm/pmap.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_extern.h>
80 #include <vm/vm_object.h>
81 #include <vm/swap_pager.h>
82
83 #ifdef COMPAT_LINUX32
84 #include <machine/../linux32/linux.h>
85 #include <machine/../linux32/linux32_proto.h>
86 #else
87 #include <machine/../linux/linux.h>
88 #include <machine/../linux/linux_proto.h>
89 #endif
90
91 #include <compat/linux/linux_dtrace.h>
92 #include <compat/linux/linux_file.h>
93 #include <compat/linux/linux_mib.h>
94 #include <compat/linux/linux_signal.h>
95 #include <compat/linux/linux_timer.h>
96 #include <compat/linux/linux_util.h>
97 #include <compat/linux/linux_sysproto.h>
98 #include <compat/linux/linux_emul.h>
99 #include <compat/linux/linux_misc.h>
100
101 /**
102 * Special DTrace provider for the linuxulator.
103 *
104 * In this file we define the provider for the entire linuxulator. All
105 * modules (= files of the linuxulator) use it.
106 *
107 * We define a different name depending on the emulated bitsize, see
108 * ../../<ARCH>/linux{,32}/linux.h, e.g.:
109 * native bitsize = linuxulator
110 * amd64, 32bit emulation = linuxulator32
111 */
112 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE);
113
114 int stclohz; /* Statistics clock frequency */
115
116 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
117 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
118 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
119 RLIMIT_MEMLOCK, RLIMIT_AS
120 };
121
122 struct l_sysinfo {
123 l_long uptime; /* Seconds since boot */
124 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */
125 #define LINUX_SYSINFO_LOADS_SCALE 65536
126 l_ulong totalram; /* Total usable main memory size */
127 l_ulong freeram; /* Available memory size */
128 l_ulong sharedram; /* Amount of shared memory */
129 l_ulong bufferram; /* Memory used by buffers */
130 l_ulong totalswap; /* Total swap space size */
131 l_ulong freeswap; /* swap space still available */
132 l_ushort procs; /* Number of current processes */
133 l_ushort pads;
134 l_ulong totalbig;
135 l_ulong freebig;
136 l_uint mem_unit;
137 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */
138 };
139
140 struct l_pselect6arg {
141 l_uintptr_t ss;
142 l_size_t ss_len;
143 };
144
145 static int linux_utimensat_nsec_valid(l_long);
146
147
148 int
149 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
150 {
151 struct l_sysinfo sysinfo;
152 vm_object_t object;
153 int i, j;
154 struct timespec ts;
155
156 bzero(&sysinfo, sizeof(sysinfo));
157 getnanouptime(&ts);
158 if (ts.tv_nsec != 0)
159 ts.tv_sec++;
160 sysinfo.uptime = ts.tv_sec;
161
162 /* Use the information from the mib to get our load averages */
163 for (i = 0; i < 3; i++)
164 sysinfo.loads[i] = averunnable.ldavg[i] *
165 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
166
167 sysinfo.totalram = physmem * PAGE_SIZE;
168 sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE;
169
170 sysinfo.sharedram = 0;
171 mtx_lock(&vm_object_list_mtx);
172 TAILQ_FOREACH(object, &vm_object_list, object_list)
173 if (object->shadow_count > 1)
174 sysinfo.sharedram += object->resident_page_count;
175 mtx_unlock(&vm_object_list_mtx);
176
177 sysinfo.sharedram *= PAGE_SIZE;
178 sysinfo.bufferram = 0;
179
180 swap_pager_status(&i, &j);
181 sysinfo.totalswap = i * PAGE_SIZE;
182 sysinfo.freeswap = (i - j) * PAGE_SIZE;
183
184 sysinfo.procs = nprocs;
185
186 /* The following are only present in newer Linux kernels. */
187 sysinfo.totalbig = 0;
188 sysinfo.freebig = 0;
189 sysinfo.mem_unit = 1;
190
191 return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
192 }
193
194 #ifdef LINUX_LEGACY_SYSCALLS
195 int
196 linux_alarm(struct thread *td, struct linux_alarm_args *args)
197 {
198 struct itimerval it, old_it;
199 u_int secs;
200 int error;
201
202 #ifdef DEBUG
203 if (ldebug(alarm))
204 printf(ARGS(alarm, "%u"), args->secs);
205 #endif
206 secs = args->secs;
207 /*
208 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2
209 * to match kern_setitimer()'s limit to avoid error from it.
210 *
211 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit
212 * platforms.
213 */
214 if (secs > INT32_MAX / 2)
215 secs = INT32_MAX / 2;
216
217 it.it_value.tv_sec = secs;
218 it.it_value.tv_usec = 0;
219 timevalclear(&it.it_interval);
220 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
221 KASSERT(error == 0, ("kern_setitimer returns %d", error));
222
223 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) ||
224 old_it.it_value.tv_usec >= 500000)
225 old_it.it_value.tv_sec++;
226 td->td_retval[0] = old_it.it_value.tv_sec;
227 return (0);
228 }
229 #endif
230
231 int
232 linux_brk(struct thread *td, struct linux_brk_args *args)
233 {
234 struct vmspace *vm = td->td_proc->p_vmspace;
235 uintptr_t new, old;
236
237 #ifdef DEBUG
238 if (ldebug(brk))
239 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend);
240 #endif
241 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize);
242 new = (uintptr_t)args->dsend;
243 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new))
244 td->td_retval[0] = (register_t)new;
245 else
246 td->td_retval[0] = (register_t)old;
247
248 return (0);
249 }
250
251 #if defined(__i386__)
252 /* XXX: what about amd64/linux32? */
253
254 int
255 linux_uselib(struct thread *td, struct linux_uselib_args *args)
256 {
257 struct nameidata ni;
258 struct vnode *vp;
259 struct exec *a_out;
260 struct vattr attr;
261 vm_offset_t vmaddr;
262 unsigned long file_offset;
263 unsigned long bss_size;
264 char *library;
265 ssize_t aresid;
266 int error, locked, writecount;
267
268 LCONVPATHEXIST(td, args->library, &library);
269
270 #ifdef DEBUG
271 if (ldebug(uselib))
272 printf(ARGS(uselib, "%s"), library);
273 #endif
274
275 a_out = NULL;
276 locked = 0;
277 vp = NULL;
278
279 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
280 UIO_SYSSPACE, library, td);
281 error = namei(&ni);
282 LFREEPATH(library);
283 if (error)
284 goto cleanup;
285
286 vp = ni.ni_vp;
287 NDFREE(&ni, NDF_ONLY_PNBUF);
288
289 /*
290 * From here on down, we have a locked vnode that must be unlocked.
291 * XXX: The code below largely duplicates exec_check_permissions().
292 */
293 locked = 1;
294
295 /* Writable? */
296 error = VOP_GET_WRITECOUNT(vp, &writecount);
297 if (error != 0)
298 goto cleanup;
299 if (writecount != 0) {
300 error = ETXTBSY;
301 goto cleanup;
302 }
303
304 /* Executable? */
305 error = VOP_GETATTR(vp, &attr, td->td_ucred);
306 if (error)
307 goto cleanup;
308
309 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
310 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
311 /* EACCESS is what exec(2) returns. */
312 error = ENOEXEC;
313 goto cleanup;
314 }
315
316 /* Sensible size? */
317 if (attr.va_size == 0) {
318 error = ENOEXEC;
319 goto cleanup;
320 }
321
322 /* Can we access it? */
323 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
324 if (error)
325 goto cleanup;
326
327 /*
328 * XXX: This should use vn_open() so that it is properly authorized,
329 * and to reduce code redundancy all over the place here.
330 * XXX: Not really, it duplicates far more of exec_check_permissions()
331 * than vn_open().
332 */
333 #ifdef MAC
334 error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
335 if (error)
336 goto cleanup;
337 #endif
338 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
339 if (error)
340 goto cleanup;
341
342 /* Pull in executable header into exec_map */
343 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
344 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
345 if (error)
346 goto cleanup;
347
348 /* Is it a Linux binary ? */
349 if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
350 error = ENOEXEC;
351 goto cleanup;
352 }
353
354 /*
355 * While we are here, we should REALLY do some more checks
356 */
357
358 /* Set file/virtual offset based on a.out variant. */
359 switch ((int)(a_out->a_magic & 0xffff)) {
360 case 0413: /* ZMAGIC */
361 file_offset = 1024;
362 break;
363 case 0314: /* QMAGIC */
364 file_offset = 0;
365 break;
366 default:
367 error = ENOEXEC;
368 goto cleanup;
369 }
370
371 bss_size = round_page(a_out->a_bss);
372
373 /* Check various fields in header for validity/bounds. */
374 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
375 error = ENOEXEC;
376 goto cleanup;
377 }
378
379 /* text + data can't exceed file size */
380 if (a_out->a_data + a_out->a_text > attr.va_size) {
381 error = EFAULT;
382 goto cleanup;
383 }
384
385 /*
386 * text/data/bss must not exceed limits
387 * XXX - this is not complete. it should check current usage PLUS
388 * the resources needed by this library.
389 */
390 PROC_LOCK(td->td_proc);
391 if (a_out->a_text > maxtsiz ||
392 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) ||
393 racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
394 bss_size) != 0) {
395 PROC_UNLOCK(td->td_proc);
396 error = ENOMEM;
397 goto cleanup;
398 }
399 PROC_UNLOCK(td->td_proc);
400
401 /*
402 * Prevent more writers.
403 * XXX: Note that if any of the VM operations fail below we don't
404 * clear this flag.
405 */
406 VOP_SET_TEXT(vp);
407
408 /*
409 * Lock no longer needed
410 */
411 locked = 0;
412 VOP_UNLOCK(vp, 0);
413
414 /*
415 * Check if file_offset page aligned. Currently we cannot handle
416 * misalinged file offsets, and so we read in the entire image
417 * (what a waste).
418 */
419 if (file_offset & PAGE_MASK) {
420 #ifdef DEBUG
421 printf("uselib: Non page aligned binary %lu\n", file_offset);
422 #endif
423 /* Map text+data read/write/execute */
424
425 /* a_entry is the load address and is page aligned */
426 vmaddr = trunc_page(a_out->a_entry);
427
428 /* get anon user mapping, read+write+execute */
429 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
430 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
431 VM_PROT_ALL, VM_PROT_ALL, 0);
432 if (error)
433 goto cleanup;
434
435 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
436 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
437 td->td_ucred, NOCRED, &aresid, td);
438 if (error != 0)
439 goto cleanup;
440 if (aresid != 0) {
441 error = ENOEXEC;
442 goto cleanup;
443 }
444 } else {
445 #ifdef DEBUG
446 printf("uselib: Page aligned binary %lu\n", file_offset);
447 #endif
448 /*
449 * for QMAGIC, a_entry is 20 bytes beyond the load address
450 * to skip the executable header
451 */
452 vmaddr = trunc_page(a_out->a_entry);
453
454 /*
455 * Map it all into the process's space as a single
456 * copy-on-write "data" segment.
457 */
458 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
459 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
460 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
461 if (error)
462 goto cleanup;
463 }
464 #ifdef DEBUG
465 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0],
466 ((long *)vmaddr)[1]);
467 #endif
468 if (bss_size != 0) {
469 /* Calculate BSS start address */
470 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
471 a_out->a_data;
472
473 /* allocate some 'anon' space */
474 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
475 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
476 VM_PROT_ALL, 0);
477 if (error)
478 goto cleanup;
479 }
480
481 cleanup:
482 /* Unlock vnode if needed */
483 if (locked)
484 VOP_UNLOCK(vp, 0);
485
486 /* Release the temporary mapping. */
487 if (a_out)
488 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
489
490 return (error);
491 }
492
493 #endif /* __i386__ */
494
495 #ifdef LINUX_LEGACY_SYSCALLS
496 int
497 linux_select(struct thread *td, struct linux_select_args *args)
498 {
499 l_timeval ltv;
500 struct timeval tv0, tv1, utv, *tvp;
501 int error;
502
503 #ifdef DEBUG
504 if (ldebug(select))
505 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
506 (void *)args->readfds, (void *)args->writefds,
507 (void *)args->exceptfds, (void *)args->timeout);
508 #endif
509
510 /*
511 * Store current time for computation of the amount of
512 * time left.
513 */
514 if (args->timeout) {
515 if ((error = copyin(args->timeout, <v, sizeof(ltv))))
516 goto select_out;
517 utv.tv_sec = ltv.tv_sec;
518 utv.tv_usec = ltv.tv_usec;
519 #ifdef DEBUG
520 if (ldebug(select))
521 printf(LMSG("incoming timeout (%jd/%ld)"),
522 (intmax_t)utv.tv_sec, utv.tv_usec);
523 #endif
524
525 if (itimerfix(&utv)) {
526 /*
527 * The timeval was invalid. Convert it to something
528 * valid that will act as it does under Linux.
529 */
530 utv.tv_sec += utv.tv_usec / 1000000;
531 utv.tv_usec %= 1000000;
532 if (utv.tv_usec < 0) {
533 utv.tv_sec -= 1;
534 utv.tv_usec += 1000000;
535 }
536 if (utv.tv_sec < 0)
537 timevalclear(&utv);
538 }
539 microtime(&tv0);
540 tvp = &utv;
541 } else
542 tvp = NULL;
543
544 error = kern_select(td, args->nfds, args->readfds, args->writefds,
545 args->exceptfds, tvp, LINUX_NFDBITS);
546
547 #ifdef DEBUG
548 if (ldebug(select))
549 printf(LMSG("real select returns %d"), error);
550 #endif
551 if (error)
552 goto select_out;
553
554 if (args->timeout) {
555 if (td->td_retval[0]) {
556 /*
557 * Compute how much time was left of the timeout,
558 * by subtracting the current time and the time
559 * before we started the call, and subtracting
560 * that result from the user-supplied value.
561 */
562 microtime(&tv1);
563 timevalsub(&tv1, &tv0);
564 timevalsub(&utv, &tv1);
565 if (utv.tv_sec < 0)
566 timevalclear(&utv);
567 } else
568 timevalclear(&utv);
569 #ifdef DEBUG
570 if (ldebug(select))
571 printf(LMSG("outgoing timeout (%jd/%ld)"),
572 (intmax_t)utv.tv_sec, utv.tv_usec);
573 #endif
574 ltv.tv_sec = utv.tv_sec;
575 ltv.tv_usec = utv.tv_usec;
576 if ((error = copyout(<v, args->timeout, sizeof(ltv))))
577 goto select_out;
578 }
579
580 select_out:
581 #ifdef DEBUG
582 if (ldebug(select))
583 printf(LMSG("select_out -> %d"), error);
584 #endif
585 return (error);
586 }
587 #endif
588
589 int
590 linux_mremap(struct thread *td, struct linux_mremap_args *args)
591 {
592 uintptr_t addr;
593 size_t len;
594 int error = 0;
595
596 #ifdef DEBUG
597 if (ldebug(mremap))
598 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
599 (void *)(uintptr_t)args->addr,
600 (unsigned long)args->old_len,
601 (unsigned long)args->new_len,
602 (unsigned long)args->flags);
603 #endif
604
605 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
606 td->td_retval[0] = 0;
607 return (EINVAL);
608 }
609
610 /*
611 * Check for the page alignment.
612 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
613 */
614 if (args->addr & PAGE_MASK) {
615 td->td_retval[0] = 0;
616 return (EINVAL);
617 }
618
619 args->new_len = round_page(args->new_len);
620 args->old_len = round_page(args->old_len);
621
622 if (args->new_len > args->old_len) {
623 td->td_retval[0] = 0;
624 return (ENOMEM);
625 }
626
627 if (args->new_len < args->old_len) {
628 addr = args->addr + args->new_len;
629 len = args->old_len - args->new_len;
630 error = kern_munmap(td, addr, len);
631 }
632
633 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
634 return (error);
635 }
636
637 #define LINUX_MS_ASYNC 0x0001
638 #define LINUX_MS_INVALIDATE 0x0002
639 #define LINUX_MS_SYNC 0x0004
640
641 int
642 linux_msync(struct thread *td, struct linux_msync_args *args)
643 {
644
645 return (kern_msync(td, args->addr, args->len,
646 args->fl & ~LINUX_MS_SYNC));
647 }
648
649 #ifdef LINUX_LEGACY_SYSCALLS
650 int
651 linux_time(struct thread *td, struct linux_time_args *args)
652 {
653 struct timeval tv;
654 l_time_t tm;
655 int error;
656
657 #ifdef DEBUG
658 if (ldebug(time))
659 printf(ARGS(time, "*"));
660 #endif
661
662 microtime(&tv);
663 tm = tv.tv_sec;
664 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
665 return (error);
666 td->td_retval[0] = tm;
667 return (0);
668 }
669 #endif
670
671 struct l_times_argv {
672 l_clock_t tms_utime;
673 l_clock_t tms_stime;
674 l_clock_t tms_cutime;
675 l_clock_t tms_cstime;
676 };
677
678
679 /*
680 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
681 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
682 * auxiliary vector entry.
683 */
684 #define CLK_TCK 100
685
686 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
687 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
688
689 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \
690 CONVNTCK(r) : CONVOTCK(r))
691
692 int
693 linux_times(struct thread *td, struct linux_times_args *args)
694 {
695 struct timeval tv, utime, stime, cutime, cstime;
696 struct l_times_argv tms;
697 struct proc *p;
698 int error;
699
700 #ifdef DEBUG
701 if (ldebug(times))
702 printf(ARGS(times, "*"));
703 #endif
704
705 if (args->buf != NULL) {
706 p = td->td_proc;
707 PROC_LOCK(p);
708 PROC_STATLOCK(p);
709 calcru(p, &utime, &stime);
710 PROC_STATUNLOCK(p);
711 calccru(p, &cutime, &cstime);
712 PROC_UNLOCK(p);
713
714 tms.tms_utime = CONVTCK(utime);
715 tms.tms_stime = CONVTCK(stime);
716
717 tms.tms_cutime = CONVTCK(cutime);
718 tms.tms_cstime = CONVTCK(cstime);
719
720 if ((error = copyout(&tms, args->buf, sizeof(tms))))
721 return (error);
722 }
723
724 microuptime(&tv);
725 td->td_retval[0] = (int)CONVTCK(tv);
726 return (0);
727 }
728
729 int
730 linux_newuname(struct thread *td, struct linux_newuname_args *args)
731 {
732 struct l_new_utsname utsname;
733 char osname[LINUX_MAX_UTSNAME];
734 char osrelease[LINUX_MAX_UTSNAME];
735 char *p;
736
737 #ifdef DEBUG
738 if (ldebug(newuname))
739 printf(ARGS(newuname, "*"));
740 #endif
741
742 linux_get_osname(td, osname);
743 linux_get_osrelease(td, osrelease);
744
745 bzero(&utsname, sizeof(utsname));
746 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
747 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
748 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
749 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
750 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
751 for (p = utsname.version; *p != '\0'; ++p)
752 if (*p == '\n') {
753 *p = '\0';
754 break;
755 }
756 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME);
757
758 return (copyout(&utsname, args->buf, sizeof(utsname)));
759 }
760
761 struct l_utimbuf {
762 l_time_t l_actime;
763 l_time_t l_modtime;
764 };
765
766 #ifdef LINUX_LEGACY_SYSCALLS
767 int
768 linux_utime(struct thread *td, struct linux_utime_args *args)
769 {
770 struct timeval tv[2], *tvp;
771 struct l_utimbuf lut;
772 char *fname;
773 int error;
774
775 LCONVPATHEXIST(td, args->fname, &fname);
776
777 #ifdef DEBUG
778 if (ldebug(utime))
779 printf(ARGS(utime, "%s, *"), fname);
780 #endif
781
782 if (args->times) {
783 if ((error = copyin(args->times, &lut, sizeof lut))) {
784 LFREEPATH(fname);
785 return (error);
786 }
787 tv[0].tv_sec = lut.l_actime;
788 tv[0].tv_usec = 0;
789 tv[1].tv_sec = lut.l_modtime;
790 tv[1].tv_usec = 0;
791 tvp = tv;
792 } else
793 tvp = NULL;
794
795 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp,
796 UIO_SYSSPACE);
797 LFREEPATH(fname);
798 return (error);
799 }
800 #endif
801
802 #ifdef LINUX_LEGACY_SYSCALLS
803 int
804 linux_utimes(struct thread *td, struct linux_utimes_args *args)
805 {
806 l_timeval ltv[2];
807 struct timeval tv[2], *tvp = NULL;
808 char *fname;
809 int error;
810
811 LCONVPATHEXIST(td, args->fname, &fname);
812
813 #ifdef DEBUG
814 if (ldebug(utimes))
815 printf(ARGS(utimes, "%s, *"), fname);
816 #endif
817
818 if (args->tptr != NULL) {
819 if ((error = copyin(args->tptr, ltv, sizeof ltv))) {
820 LFREEPATH(fname);
821 return (error);
822 }
823 tv[0].tv_sec = ltv[0].tv_sec;
824 tv[0].tv_usec = ltv[0].tv_usec;
825 tv[1].tv_sec = ltv[1].tv_sec;
826 tv[1].tv_usec = ltv[1].tv_usec;
827 tvp = tv;
828 }
829
830 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE,
831 tvp, UIO_SYSSPACE);
832 LFREEPATH(fname);
833 return (error);
834 }
835 #endif
836
837 static int
838 linux_utimensat_nsec_valid(l_long nsec)
839 {
840
841 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW)
842 return (0);
843 if (nsec >= 0 && nsec <= 999999999)
844 return (0);
845 return (1);
846 }
847
848 int
849 linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
850 {
851 struct l_timespec l_times[2];
852 struct timespec times[2], *timesp = NULL;
853 char *path = NULL;
854 int error, dfd, flags = 0;
855
856 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
857
858 #ifdef DEBUG
859 if (ldebug(utimensat))
860 printf(ARGS(utimensat, "%d, *"), dfd);
861 #endif
862
863 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW)
864 return (EINVAL);
865
866 if (args->times != NULL) {
867 error = copyin(args->times, l_times, sizeof(l_times));
868 if (error != 0)
869 return (error);
870
871 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 ||
872 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0)
873 return (EINVAL);
874
875 times[0].tv_sec = l_times[0].tv_sec;
876 switch (l_times[0].tv_nsec)
877 {
878 case LINUX_UTIME_OMIT:
879 times[0].tv_nsec = UTIME_OMIT;
880 break;
881 case LINUX_UTIME_NOW:
882 times[0].tv_nsec = UTIME_NOW;
883 break;
884 default:
885 times[0].tv_nsec = l_times[0].tv_nsec;
886 }
887
888 times[1].tv_sec = l_times[1].tv_sec;
889 switch (l_times[1].tv_nsec)
890 {
891 case LINUX_UTIME_OMIT:
892 times[1].tv_nsec = UTIME_OMIT;
893 break;
894 case LINUX_UTIME_NOW:
895 times[1].tv_nsec = UTIME_NOW;
896 break;
897 default:
898 times[1].tv_nsec = l_times[1].tv_nsec;
899 break;
900 }
901 timesp = times;
902
903 /* This breaks POSIX, but is what the Linux kernel does
904 * _on purpose_ (documented in the man page for utimensat(2)),
905 * so we must follow that behaviour. */
906 if (times[0].tv_nsec == UTIME_OMIT &&
907 times[1].tv_nsec == UTIME_OMIT)
908 return (0);
909 }
910
911 if (args->pathname != NULL)
912 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
913 else if (args->flags != 0)
914 return (EINVAL);
915
916 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW)
917 flags |= AT_SYMLINK_NOFOLLOW;
918
919 if (path == NULL)
920 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE);
921 else {
922 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp,
923 UIO_SYSSPACE, flags);
924 LFREEPATH(path);
925 }
926
927 return (error);
928 }
929
930 #ifdef LINUX_LEGACY_SYSCALLS
931 int
932 linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
933 {
934 l_timeval ltv[2];
935 struct timeval tv[2], *tvp = NULL;
936 char *fname;
937 int error, dfd;
938
939 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
940 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd);
941
942 #ifdef DEBUG
943 if (ldebug(futimesat))
944 printf(ARGS(futimesat, "%s, *"), fname);
945 #endif
946
947 if (args->utimes != NULL) {
948 if ((error = copyin(args->utimes, ltv, sizeof ltv))) {
949 LFREEPATH(fname);
950 return (error);
951 }
952 tv[0].tv_sec = ltv[0].tv_sec;
953 tv[0].tv_usec = ltv[0].tv_usec;
954 tv[1].tv_sec = ltv[1].tv_sec;
955 tv[1].tv_usec = ltv[1].tv_usec;
956 tvp = tv;
957 }
958
959 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
960 LFREEPATH(fname);
961 return (error);
962 }
963 #endif
964
965 int
966 linux_common_wait(struct thread *td, int pid, int *status,
967 int options, struct rusage *ru)
968 {
969 int error, tmpstat;
970
971 error = kern_wait(td, pid, &tmpstat, options, ru);
972 if (error)
973 return (error);
974
975 if (status) {
976 tmpstat &= 0xffff;
977 if (WIFSIGNALED(tmpstat))
978 tmpstat = (tmpstat & 0xffffff80) |
979 bsd_to_linux_signal(WTERMSIG(tmpstat));
980 else if (WIFSTOPPED(tmpstat))
981 tmpstat = (tmpstat & 0xffff00ff) |
982 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
983 else if (WIFCONTINUED(tmpstat))
984 tmpstat = 0xffff;
985 error = copyout(&tmpstat, status, sizeof(int));
986 }
987
988 return (error);
989 }
990
991 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
992 int
993 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
994 {
995 struct linux_wait4_args wait4_args;
996
997 #ifdef DEBUG
998 if (ldebug(waitpid))
999 printf(ARGS(waitpid, "%d, %p, %d"),
1000 args->pid, (void *)args->status, args->options);
1001 #endif
1002
1003 wait4_args.pid = args->pid;
1004 wait4_args.status = args->status;
1005 wait4_args.options = args->options;
1006 wait4_args.rusage = NULL;
1007
1008 return (linux_wait4(td, &wait4_args));
1009 }
1010 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1011
1012 int
1013 linux_wait4(struct thread *td, struct linux_wait4_args *args)
1014 {
1015 int error, options;
1016 struct rusage ru, *rup;
1017
1018 #ifdef DEBUG
1019 if (ldebug(wait4))
1020 printf(ARGS(wait4, "%d, %p, %d, %p"),
1021 args->pid, (void *)args->status, args->options,
1022 (void *)args->rusage);
1023 #endif
1024 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
1025 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
1026 return (EINVAL);
1027
1028 options = WEXITED;
1029 linux_to_bsd_waitopts(args->options, &options);
1030
1031 if (args->rusage != NULL)
1032 rup = &ru;
1033 else
1034 rup = NULL;
1035 error = linux_common_wait(td, args->pid, args->status, options, rup);
1036 if (error != 0)
1037 return (error);
1038 if (args->rusage != NULL)
1039 error = linux_copyout_rusage(&ru, args->rusage);
1040 return (error);
1041 }
1042
1043 int
1044 linux_waitid(struct thread *td, struct linux_waitid_args *args)
1045 {
1046 int status, options, sig;
1047 struct __wrusage wru;
1048 siginfo_t siginfo;
1049 l_siginfo_t lsi;
1050 idtype_t idtype;
1051 struct proc *p;
1052 int error;
1053
1054 options = 0;
1055 linux_to_bsd_waitopts(args->options, &options);
1056
1057 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED))
1058 return (EINVAL);
1059 if (!(options & (WEXITED | WUNTRACED | WCONTINUED)))
1060 return (EINVAL);
1061
1062 switch (args->idtype) {
1063 case LINUX_P_ALL:
1064 idtype = P_ALL;
1065 break;
1066 case LINUX_P_PID:
1067 if (args->id <= 0)
1068 return (EINVAL);
1069 idtype = P_PID;
1070 break;
1071 case LINUX_P_PGID:
1072 if (args->id <= 0)
1073 return (EINVAL);
1074 idtype = P_PGID;
1075 break;
1076 default:
1077 return (EINVAL);
1078 }
1079
1080 error = kern_wait6(td, idtype, args->id, &status, options,
1081 &wru, &siginfo);
1082 if (error != 0)
1083 return (error);
1084 if (args->rusage != NULL) {
1085 error = linux_copyout_rusage(&wru.wru_children,
1086 args->rusage);
1087 if (error != 0)
1088 return (error);
1089 }
1090 if (args->info != NULL) {
1091 p = td->td_proc;
1092 if (td->td_retval[0] == 0)
1093 bzero(&lsi, sizeof(lsi));
1094 else {
1095 sig = bsd_to_linux_signal(siginfo.si_signo);
1096 siginfo_to_lsiginfo(&siginfo, &lsi, sig);
1097 }
1098 error = copyout(&lsi, args->info, sizeof(lsi));
1099 }
1100 td->td_retval[0] = 0;
1101
1102 return (error);
1103 }
1104
1105 #ifdef LINUX_LEGACY_SYSCALLS
1106 int
1107 linux_mknod(struct thread *td, struct linux_mknod_args *args)
1108 {
1109 char *path;
1110 int error;
1111
1112 LCONVPATHCREAT(td, args->path, &path);
1113
1114 #ifdef DEBUG
1115 if (ldebug(mknod))
1116 printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode,
1117 (uintmax_t)args->dev);
1118 #endif
1119
1120 switch (args->mode & S_IFMT) {
1121 case S_IFIFO:
1122 case S_IFSOCK:
1123 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE,
1124 args->mode);
1125 break;
1126
1127 case S_IFCHR:
1128 case S_IFBLK:
1129 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE,
1130 args->mode, args->dev);
1131 break;
1132
1133 case S_IFDIR:
1134 error = EPERM;
1135 break;
1136
1137 case 0:
1138 args->mode |= S_IFREG;
1139 /* FALLTHROUGH */
1140 case S_IFREG:
1141 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
1142 O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1143 if (error == 0)
1144 kern_close(td, td->td_retval[0]);
1145 break;
1146
1147 default:
1148 error = EINVAL;
1149 break;
1150 }
1151 LFREEPATH(path);
1152 return (error);
1153 }
1154 #endif
1155
1156 int
1157 linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
1158 {
1159 char *path;
1160 int error, dfd;
1161
1162 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
1163 LCONVPATHCREAT_AT(td, args->filename, &path, dfd);
1164
1165 #ifdef DEBUG
1166 if (ldebug(mknodat))
1167 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev);
1168 #endif
1169
1170 switch (args->mode & S_IFMT) {
1171 case S_IFIFO:
1172 case S_IFSOCK:
1173 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode);
1174 break;
1175
1176 case S_IFCHR:
1177 case S_IFBLK:
1178 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode,
1179 args->dev);
1180 break;
1181
1182 case S_IFDIR:
1183 error = EPERM;
1184 break;
1185
1186 case 0:
1187 args->mode |= S_IFREG;
1188 /* FALLTHROUGH */
1189 case S_IFREG:
1190 error = kern_openat(td, dfd, path, UIO_SYSSPACE,
1191 O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1192 if (error == 0)
1193 kern_close(td, td->td_retval[0]);
1194 break;
1195
1196 default:
1197 error = EINVAL;
1198 break;
1199 }
1200 LFREEPATH(path);
1201 return (error);
1202 }
1203
1204 /*
1205 * UGH! This is just about the dumbest idea I've ever heard!!
1206 */
1207 int
1208 linux_personality(struct thread *td, struct linux_personality_args *args)
1209 {
1210 struct linux_pemuldata *pem;
1211 struct proc *p = td->td_proc;
1212 uint32_t old;
1213
1214 #ifdef DEBUG
1215 if (ldebug(personality))
1216 printf(ARGS(personality, "%u"), args->per);
1217 #endif
1218
1219 PROC_LOCK(p);
1220 pem = pem_find(p);
1221 old = pem->persona;
1222 if (args->per != 0xffffffff)
1223 pem->persona = args->per;
1224 PROC_UNLOCK(p);
1225
1226 td->td_retval[0] = old;
1227 return (0);
1228 }
1229
1230 struct l_itimerval {
1231 l_timeval it_interval;
1232 l_timeval it_value;
1233 };
1234
1235 #define B2L_ITIMERVAL(bip, lip) \
1236 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \
1237 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \
1238 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \
1239 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
1240
1241 int
1242 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
1243 {
1244 int error;
1245 struct l_itimerval ls;
1246 struct itimerval aitv, oitv;
1247
1248 #ifdef DEBUG
1249 if (ldebug(setitimer))
1250 printf(ARGS(setitimer, "%p, %p"),
1251 (void *)uap->itv, (void *)uap->oitv);
1252 #endif
1253
1254 if (uap->itv == NULL) {
1255 uap->itv = uap->oitv;
1256 return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
1257 }
1258
1259 error = copyin(uap->itv, &ls, sizeof(ls));
1260 if (error != 0)
1261 return (error);
1262 B2L_ITIMERVAL(&aitv, &ls);
1263 #ifdef DEBUG
1264 if (ldebug(setitimer)) {
1265 printf("setitimer: value: sec: %jd, usec: %ld\n",
1266 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec);
1267 printf("setitimer: interval: sec: %jd, usec: %ld\n",
1268 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec);
1269 }
1270 #endif
1271 error = kern_setitimer(td, uap->which, &aitv, &oitv);
1272 if (error != 0 || uap->oitv == NULL)
1273 return (error);
1274 B2L_ITIMERVAL(&ls, &oitv);
1275
1276 return (copyout(&ls, uap->oitv, sizeof(ls)));
1277 }
1278
1279 int
1280 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
1281 {
1282 int error;
1283 struct l_itimerval ls;
1284 struct itimerval aitv;
1285
1286 #ifdef DEBUG
1287 if (ldebug(getitimer))
1288 printf(ARGS(getitimer, "%p"), (void *)uap->itv);
1289 #endif
1290 error = kern_getitimer(td, uap->which, &aitv);
1291 if (error != 0)
1292 return (error);
1293 B2L_ITIMERVAL(&ls, &aitv);
1294 return (copyout(&ls, uap->itv, sizeof(ls)));
1295 }
1296
1297 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1298 int
1299 linux_nice(struct thread *td, struct linux_nice_args *args)
1300 {
1301 struct setpriority_args bsd_args;
1302
1303 bsd_args.which = PRIO_PROCESS;
1304 bsd_args.who = 0; /* current process */
1305 bsd_args.prio = args->inc;
1306 return (sys_setpriority(td, &bsd_args));
1307 }
1308 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1309
1310 int
1311 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
1312 {
1313 struct ucred *newcred, *oldcred;
1314 l_gid_t *linux_gidset;
1315 gid_t *bsd_gidset;
1316 int ngrp, error;
1317 struct proc *p;
1318
1319 ngrp = args->gidsetsize;
1320 if (ngrp < 0 || ngrp >= ngroups_max + 1)
1321 return (EINVAL);
1322 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
1323 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1324 if (error)
1325 goto out;
1326 newcred = crget();
1327 crextend(newcred, ngrp + 1);
1328 p = td->td_proc;
1329 PROC_LOCK(p);
1330 oldcred = p->p_ucred;
1331 crcopy(newcred, oldcred);
1332
1333 /*
1334 * cr_groups[0] holds egid. Setting the whole set from
1335 * the supplied set will cause egid to be changed too.
1336 * Keep cr_groups[0] unchanged to prevent that.
1337 */
1338
1339 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) {
1340 PROC_UNLOCK(p);
1341 crfree(newcred);
1342 goto out;
1343 }
1344
1345 if (ngrp > 0) {
1346 newcred->cr_ngroups = ngrp + 1;
1347
1348 bsd_gidset = newcred->cr_groups;
1349 ngrp--;
1350 while (ngrp >= 0) {
1351 bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1352 ngrp--;
1353 }
1354 } else
1355 newcred->cr_ngroups = 1;
1356
1357 setsugid(p);
1358 proc_set_cred(p, newcred);
1359 PROC_UNLOCK(p);
1360 crfree(oldcred);
1361 error = 0;
1362 out:
1363 free(linux_gidset, M_LINUX);
1364 return (error);
1365 }
1366
1367 int
1368 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1369 {
1370 struct ucred *cred;
1371 l_gid_t *linux_gidset;
1372 gid_t *bsd_gidset;
1373 int bsd_gidsetsz, ngrp, error;
1374
1375 cred = td->td_ucred;
1376 bsd_gidset = cred->cr_groups;
1377 bsd_gidsetsz = cred->cr_ngroups - 1;
1378
1379 /*
1380 * cr_groups[0] holds egid. Returning the whole set
1381 * here will cause a duplicate. Exclude cr_groups[0]
1382 * to prevent that.
1383 */
1384
1385 if ((ngrp = args->gidsetsize) == 0) {
1386 td->td_retval[0] = bsd_gidsetsz;
1387 return (0);
1388 }
1389
1390 if (ngrp < bsd_gidsetsz)
1391 return (EINVAL);
1392
1393 ngrp = 0;
1394 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
1395 M_LINUX, M_WAITOK);
1396 while (ngrp < bsd_gidsetsz) {
1397 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1398 ngrp++;
1399 }
1400
1401 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
1402 free(linux_gidset, M_LINUX);
1403 if (error)
1404 return (error);
1405
1406 td->td_retval[0] = ngrp;
1407 return (0);
1408 }
1409
1410 int
1411 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1412 {
1413 struct rlimit bsd_rlim;
1414 struct l_rlimit rlim;
1415 u_int which;
1416 int error;
1417
1418 #ifdef DEBUG
1419 if (ldebug(setrlimit))
1420 printf(ARGS(setrlimit, "%d, %p"),
1421 args->resource, (void *)args->rlim);
1422 #endif
1423
1424 if (args->resource >= LINUX_RLIM_NLIMITS)
1425 return (EINVAL);
1426
1427 which = linux_to_bsd_resource[args->resource];
1428 if (which == -1)
1429 return (EINVAL);
1430
1431 error = copyin(args->rlim, &rlim, sizeof(rlim));
1432 if (error)
1433 return (error);
1434
1435 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1436 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1437 return (kern_setrlimit(td, which, &bsd_rlim));
1438 }
1439
1440 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1441 int
1442 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1443 {
1444 struct l_rlimit rlim;
1445 struct rlimit bsd_rlim;
1446 u_int which;
1447
1448 #ifdef DEBUG
1449 if (ldebug(old_getrlimit))
1450 printf(ARGS(old_getrlimit, "%d, %p"),
1451 args->resource, (void *)args->rlim);
1452 #endif
1453
1454 if (args->resource >= LINUX_RLIM_NLIMITS)
1455 return (EINVAL);
1456
1457 which = linux_to_bsd_resource[args->resource];
1458 if (which == -1)
1459 return (EINVAL);
1460
1461 lim_rlimit(td, which, &bsd_rlim);
1462
1463 #ifdef COMPAT_LINUX32
1464 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1465 if (rlim.rlim_cur == UINT_MAX)
1466 rlim.rlim_cur = INT_MAX;
1467 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1468 if (rlim.rlim_max == UINT_MAX)
1469 rlim.rlim_max = INT_MAX;
1470 #else
1471 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1472 if (rlim.rlim_cur == ULONG_MAX)
1473 rlim.rlim_cur = LONG_MAX;
1474 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1475 if (rlim.rlim_max == ULONG_MAX)
1476 rlim.rlim_max = LONG_MAX;
1477 #endif
1478 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1479 }
1480 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1481
1482 int
1483 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1484 {
1485 struct l_rlimit rlim;
1486 struct rlimit bsd_rlim;
1487 u_int which;
1488
1489 #ifdef DEBUG
1490 if (ldebug(getrlimit))
1491 printf(ARGS(getrlimit, "%d, %p"),
1492 args->resource, (void *)args->rlim);
1493 #endif
1494
1495 if (args->resource >= LINUX_RLIM_NLIMITS)
1496 return (EINVAL);
1497
1498 which = linux_to_bsd_resource[args->resource];
1499 if (which == -1)
1500 return (EINVAL);
1501
1502 lim_rlimit(td, which, &bsd_rlim);
1503
1504 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1505 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1506 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1507 }
1508
1509 int
1510 linux_sched_setscheduler(struct thread *td,
1511 struct linux_sched_setscheduler_args *args)
1512 {
1513 struct sched_param sched_param;
1514 struct thread *tdt;
1515 int error, policy;
1516
1517 #ifdef DEBUG
1518 if (ldebug(sched_setscheduler))
1519 printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1520 args->pid, args->policy, (const void *)args->param);
1521 #endif
1522
1523 switch (args->policy) {
1524 case LINUX_SCHED_OTHER:
1525 policy = SCHED_OTHER;
1526 break;
1527 case LINUX_SCHED_FIFO:
1528 policy = SCHED_FIFO;
1529 break;
1530 case LINUX_SCHED_RR:
1531 policy = SCHED_RR;
1532 break;
1533 default:
1534 return (EINVAL);
1535 }
1536
1537 error = copyin(args->param, &sched_param, sizeof(sched_param));
1538 if (error)
1539 return (error);
1540
1541 tdt = linux_tdfind(td, args->pid, -1);
1542 if (tdt == NULL)
1543 return (ESRCH);
1544
1545 error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
1546 PROC_UNLOCK(tdt->td_proc);
1547 return (error);
1548 }
1549
1550 int
1551 linux_sched_getscheduler(struct thread *td,
1552 struct linux_sched_getscheduler_args *args)
1553 {
1554 struct thread *tdt;
1555 int error, policy;
1556
1557 #ifdef DEBUG
1558 if (ldebug(sched_getscheduler))
1559 printf(ARGS(sched_getscheduler, "%d"), args->pid);
1560 #endif
1561
1562 tdt = linux_tdfind(td, args->pid, -1);
1563 if (tdt == NULL)
1564 return (ESRCH);
1565
1566 error = kern_sched_getscheduler(td, tdt, &policy);
1567 PROC_UNLOCK(tdt->td_proc);
1568
1569 switch (policy) {
1570 case SCHED_OTHER:
1571 td->td_retval[0] = LINUX_SCHED_OTHER;
1572 break;
1573 case SCHED_FIFO:
1574 td->td_retval[0] = LINUX_SCHED_FIFO;
1575 break;
1576 case SCHED_RR:
1577 td->td_retval[0] = LINUX_SCHED_RR;
1578 break;
1579 }
1580 return (error);
1581 }
1582
1583 int
1584 linux_sched_get_priority_max(struct thread *td,
1585 struct linux_sched_get_priority_max_args *args)
1586 {
1587 struct sched_get_priority_max_args bsd;
1588
1589 #ifdef DEBUG
1590 if (ldebug(sched_get_priority_max))
1591 printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1592 #endif
1593
1594 switch (args->policy) {
1595 case LINUX_SCHED_OTHER:
1596 bsd.policy = SCHED_OTHER;
1597 break;
1598 case LINUX_SCHED_FIFO:
1599 bsd.policy = SCHED_FIFO;
1600 break;
1601 case LINUX_SCHED_RR:
1602 bsd.policy = SCHED_RR;
1603 break;
1604 default:
1605 return (EINVAL);
1606 }
1607 return (sys_sched_get_priority_max(td, &bsd));
1608 }
1609
1610 int
1611 linux_sched_get_priority_min(struct thread *td,
1612 struct linux_sched_get_priority_min_args *args)
1613 {
1614 struct sched_get_priority_min_args bsd;
1615
1616 #ifdef DEBUG
1617 if (ldebug(sched_get_priority_min))
1618 printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1619 #endif
1620
1621 switch (args->policy) {
1622 case LINUX_SCHED_OTHER:
1623 bsd.policy = SCHED_OTHER;
1624 break;
1625 case LINUX_SCHED_FIFO:
1626 bsd.policy = SCHED_FIFO;
1627 break;
1628 case LINUX_SCHED_RR:
1629 bsd.policy = SCHED_RR;
1630 break;
1631 default:
1632 return (EINVAL);
1633 }
1634 return (sys_sched_get_priority_min(td, &bsd));
1635 }
1636
1637 #define REBOOT_CAD_ON 0x89abcdef
1638 #define REBOOT_CAD_OFF 0
1639 #define REBOOT_HALT 0xcdef0123
1640 #define REBOOT_RESTART 0x01234567
1641 #define REBOOT_RESTART2 0xA1B2C3D4
1642 #define REBOOT_POWEROFF 0x4321FEDC
1643 #define REBOOT_MAGIC1 0xfee1dead
1644 #define REBOOT_MAGIC2 0x28121969
1645 #define REBOOT_MAGIC2A 0x05121996
1646 #define REBOOT_MAGIC2B 0x16041998
1647
1648 int
1649 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1650 {
1651 struct reboot_args bsd_args;
1652
1653 #ifdef DEBUG
1654 if (ldebug(reboot))
1655 printf(ARGS(reboot, "0x%x"), args->cmd);
1656 #endif
1657
1658 if (args->magic1 != REBOOT_MAGIC1)
1659 return (EINVAL);
1660
1661 switch (args->magic2) {
1662 case REBOOT_MAGIC2:
1663 case REBOOT_MAGIC2A:
1664 case REBOOT_MAGIC2B:
1665 break;
1666 default:
1667 return (EINVAL);
1668 }
1669
1670 switch (args->cmd) {
1671 case REBOOT_CAD_ON:
1672 case REBOOT_CAD_OFF:
1673 return (priv_check(td, PRIV_REBOOT));
1674 case REBOOT_HALT:
1675 bsd_args.opt = RB_HALT;
1676 break;
1677 case REBOOT_RESTART:
1678 case REBOOT_RESTART2:
1679 bsd_args.opt = 0;
1680 break;
1681 case REBOOT_POWEROFF:
1682 bsd_args.opt = RB_POWEROFF;
1683 break;
1684 default:
1685 return (EINVAL);
1686 }
1687 return (sys_reboot(td, &bsd_args));
1688 }
1689
1690
1691 /*
1692 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1693 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that
1694 * are assumed to be preserved. The following lightweight syscalls fixes
1695 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c
1696 *
1697 * linux_getpid() - MP SAFE
1698 * linux_getgid() - MP SAFE
1699 * linux_getuid() - MP SAFE
1700 */
1701
1702 int
1703 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1704 {
1705
1706 #ifdef DEBUG
1707 if (ldebug(getpid))
1708 printf(ARGS(getpid, ""));
1709 #endif
1710 td->td_retval[0] = td->td_proc->p_pid;
1711
1712 return (0);
1713 }
1714
1715 int
1716 linux_gettid(struct thread *td, struct linux_gettid_args *args)
1717 {
1718 struct linux_emuldata *em;
1719
1720 #ifdef DEBUG
1721 if (ldebug(gettid))
1722 printf(ARGS(gettid, ""));
1723 #endif
1724
1725 em = em_find(td);
1726 KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
1727
1728 td->td_retval[0] = em->em_tid;
1729
1730 return (0);
1731 }
1732
1733
1734 int
1735 linux_getppid(struct thread *td, struct linux_getppid_args *args)
1736 {
1737
1738 #ifdef DEBUG
1739 if (ldebug(getppid))
1740 printf(ARGS(getppid, ""));
1741 #endif
1742
1743 td->td_retval[0] = kern_getppid(td);
1744 return (0);
1745 }
1746
1747 int
1748 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1749 {
1750
1751 #ifdef DEBUG
1752 if (ldebug(getgid))
1753 printf(ARGS(getgid, ""));
1754 #endif
1755
1756 td->td_retval[0] = td->td_ucred->cr_rgid;
1757 return (0);
1758 }
1759
1760 int
1761 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1762 {
1763
1764 #ifdef DEBUG
1765 if (ldebug(getuid))
1766 printf(ARGS(getuid, ""));
1767 #endif
1768
1769 td->td_retval[0] = td->td_ucred->cr_ruid;
1770 return (0);
1771 }
1772
1773
1774 int
1775 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1776 {
1777 struct getsid_args bsd;
1778
1779 #ifdef DEBUG
1780 if (ldebug(getsid))
1781 printf(ARGS(getsid, "%i"), args->pid);
1782 #endif
1783
1784 bsd.pid = args->pid;
1785 return (sys_getsid(td, &bsd));
1786 }
1787
1788 int
1789 linux_nosys(struct thread *td, struct nosys_args *ignore)
1790 {
1791
1792 return (ENOSYS);
1793 }
1794
1795 int
1796 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
1797 {
1798 struct getpriority_args bsd_args;
1799 int error;
1800
1801 #ifdef DEBUG
1802 if (ldebug(getpriority))
1803 printf(ARGS(getpriority, "%i, %i"), args->which, args->who);
1804 #endif
1805
1806 bsd_args.which = args->which;
1807 bsd_args.who = args->who;
1808 error = sys_getpriority(td, &bsd_args);
1809 td->td_retval[0] = 20 - td->td_retval[0];
1810 return (error);
1811 }
1812
1813 int
1814 linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
1815 {
1816 int name[2];
1817
1818 #ifdef DEBUG
1819 if (ldebug(sethostname))
1820 printf(ARGS(sethostname, "*, %i"), args->len);
1821 #endif
1822
1823 name[0] = CTL_KERN;
1824 name[1] = KERN_HOSTNAME;
1825 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
1826 args->len, 0, 0));
1827 }
1828
1829 int
1830 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
1831 {
1832 int name[2];
1833
1834 #ifdef DEBUG
1835 if (ldebug(setdomainname))
1836 printf(ARGS(setdomainname, "*, %i"), args->len);
1837 #endif
1838
1839 name[0] = CTL_KERN;
1840 name[1] = KERN_NISDOMAINNAME;
1841 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
1842 args->len, 0, 0));
1843 }
1844
1845 int
1846 linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
1847 {
1848
1849 #ifdef DEBUG
1850 if (ldebug(exit_group))
1851 printf(ARGS(exit_group, "%i"), args->error_code);
1852 #endif
1853
1854 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
1855 args->error_code);
1856
1857 /*
1858 * XXX: we should send a signal to the parent if
1859 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
1860 * as it doesnt occur often.
1861 */
1862 exit1(td, args->error_code, 0);
1863 /* NOTREACHED */
1864 }
1865
1866 #define _LINUX_CAPABILITY_VERSION_1 0x19980330
1867 #define _LINUX_CAPABILITY_VERSION_2 0x20071026
1868 #define _LINUX_CAPABILITY_VERSION_3 0x20080522
1869
1870 struct l_user_cap_header {
1871 l_int version;
1872 l_int pid;
1873 };
1874
1875 struct l_user_cap_data {
1876 l_int effective;
1877 l_int permitted;
1878 l_int inheritable;
1879 };
1880
1881 int
1882 linux_capget(struct thread *td, struct linux_capget_args *uap)
1883 {
1884 struct l_user_cap_header luch;
1885 struct l_user_cap_data lucd[2];
1886 int error, u32s;
1887
1888 if (uap->hdrp == NULL)
1889 return (EFAULT);
1890
1891 error = copyin(uap->hdrp, &luch, sizeof(luch));
1892 if (error != 0)
1893 return (error);
1894
1895 switch (luch.version) {
1896 case _LINUX_CAPABILITY_VERSION_1:
1897 u32s = 1;
1898 break;
1899 case _LINUX_CAPABILITY_VERSION_2:
1900 case _LINUX_CAPABILITY_VERSION_3:
1901 u32s = 2;
1902 break;
1903 default:
1904 #ifdef DEBUG
1905 if (ldebug(capget))
1906 printf(LMSG("invalid capget capability version 0x%x"),
1907 luch.version);
1908 #endif
1909 luch.version = _LINUX_CAPABILITY_VERSION_1;
1910 error = copyout(&luch, uap->hdrp, sizeof(luch));
1911 if (error)
1912 return (error);
1913 return (EINVAL);
1914 }
1915
1916 if (luch.pid)
1917 return (EPERM);
1918
1919 if (uap->datap) {
1920 /*
1921 * The current implementation doesn't support setting
1922 * a capability (it's essentially a stub) so indicate
1923 * that no capabilities are currently set or available
1924 * to request.
1925 */
1926 memset(&lucd, 0, u32s * sizeof(lucd[0]));
1927 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0]));
1928 }
1929
1930 return (error);
1931 }
1932
1933 int
1934 linux_capset(struct thread *td, struct linux_capset_args *uap)
1935 {
1936 struct l_user_cap_header luch;
1937 struct l_user_cap_data lucd[2];
1938 int error, i, u32s;
1939
1940 if (uap->hdrp == NULL || uap->datap == NULL)
1941 return (EFAULT);
1942
1943 error = copyin(uap->hdrp, &luch, sizeof(luch));
1944 if (error != 0)
1945 return (error);
1946
1947 switch (luch.version) {
1948 case _LINUX_CAPABILITY_VERSION_1:
1949 u32s = 1;
1950 break;
1951 case _LINUX_CAPABILITY_VERSION_2:
1952 case _LINUX_CAPABILITY_VERSION_3:
1953 u32s = 2;
1954 break;
1955 default:
1956 #ifdef DEBUG
1957 if (ldebug(capset))
1958 printf(LMSG("invalid capset capability version 0x%x"),
1959 luch.version);
1960 #endif
1961 luch.version = _LINUX_CAPABILITY_VERSION_1;
1962 error = copyout(&luch, uap->hdrp, sizeof(luch));
1963 if (error)
1964 return (error);
1965 return (EINVAL);
1966 }
1967
1968 if (luch.pid)
1969 return (EPERM);
1970
1971 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0]));
1972 if (error != 0)
1973 return (error);
1974
1975 /* We currently don't support setting any capabilities. */
1976 for (i = 0; i < u32s; i++) {
1977 if (lucd[i].effective || lucd[i].permitted ||
1978 lucd[i].inheritable) {
1979 linux_msg(td,
1980 "capset[%d] effective=0x%x, permitted=0x%x, "
1981 "inheritable=0x%x is not implemented", i,
1982 (int)lucd[i].effective, (int)lucd[i].permitted,
1983 (int)lucd[i].inheritable);
1984 return (EPERM);
1985 }
1986 }
1987
1988 return (0);
1989 }
1990
1991 int
1992 linux_prctl(struct thread *td, struct linux_prctl_args *args)
1993 {
1994 int error = 0, max_size;
1995 struct proc *p = td->td_proc;
1996 char comm[LINUX_MAX_COMM_LEN];
1997 struct linux_emuldata *em;
1998 int pdeath_signal;
1999
2000 #ifdef DEBUG
2001 if (ldebug(prctl))
2002 printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option,
2003 (uintmax_t)args->arg2, (uintmax_t)args->arg3,
2004 (uintmax_t)args->arg4, (uintmax_t)args->arg5);
2005 #endif
2006
2007 switch (args->option) {
2008 case LINUX_PR_SET_PDEATHSIG:
2009 if (!LINUX_SIG_VALID(args->arg2))
2010 return (EINVAL);
2011 em = em_find(td);
2012 KASSERT(em != NULL, ("prctl: emuldata not found.\n"));
2013 em->pdeath_signal = args->arg2;
2014 break;
2015 case LINUX_PR_GET_PDEATHSIG:
2016 em = em_find(td);
2017 KASSERT(em != NULL, ("prctl: emuldata not found.\n"));
2018 pdeath_signal = em->pdeath_signal;
2019 error = copyout(&pdeath_signal,
2020 (void *)(register_t)args->arg2,
2021 sizeof(pdeath_signal));
2022 break;
2023 case LINUX_PR_GET_KEEPCAPS:
2024 /*
2025 * Indicate that we always clear the effective and
2026 * permitted capability sets when the user id becomes
2027 * non-zero (actually the capability sets are simply
2028 * always zero in the current implementation).
2029 */
2030 td->td_retval[0] = 0;
2031 break;
2032 case LINUX_PR_SET_KEEPCAPS:
2033 /*
2034 * Ignore requests to keep the effective and permitted
2035 * capability sets when the user id becomes non-zero.
2036 */
2037 break;
2038 case LINUX_PR_SET_NAME:
2039 /*
2040 * To be on the safe side we need to make sure to not
2041 * overflow the size a Linux program expects. We already
2042 * do this here in the copyin, so that we don't need to
2043 * check on copyout.
2044 */
2045 max_size = MIN(sizeof(comm), sizeof(p->p_comm));
2046 error = copyinstr((void *)(register_t)args->arg2, comm,
2047 max_size, NULL);
2048
2049 /* Linux silently truncates the name if it is too long. */
2050 if (error == ENAMETOOLONG) {
2051 /*
2052 * XXX: copyinstr() isn't documented to populate the
2053 * array completely, so do a copyin() to be on the
2054 * safe side. This should be changed in case
2055 * copyinstr() is changed to guarantee this.
2056 */
2057 error = copyin((void *)(register_t)args->arg2, comm,
2058 max_size - 1);
2059 comm[max_size - 1] = '\0';
2060 }
2061 if (error)
2062 return (error);
2063
2064 PROC_LOCK(p);
2065 strlcpy(p->p_comm, comm, sizeof(p->p_comm));
2066 PROC_UNLOCK(p);
2067 break;
2068 case LINUX_PR_GET_NAME:
2069 PROC_LOCK(p);
2070 strlcpy(comm, p->p_comm, sizeof(comm));
2071 PROC_UNLOCK(p);
2072 error = copyout(comm, (void *)(register_t)args->arg2,
2073 strlen(comm) + 1);
2074 break;
2075 default:
2076 error = EINVAL;
2077 break;
2078 }
2079
2080 return (error);
2081 }
2082
2083 int
2084 linux_sched_setparam(struct thread *td,
2085 struct linux_sched_setparam_args *uap)
2086 {
2087 struct sched_param sched_param;
2088 struct thread *tdt;
2089 int error;
2090
2091 #ifdef DEBUG
2092 if (ldebug(sched_setparam))
2093 printf(ARGS(sched_setparam, "%d, *"), uap->pid);
2094 #endif
2095
2096 error = copyin(uap->param, &sched_param, sizeof(sched_param));
2097 if (error)
2098 return (error);
2099
2100 tdt = linux_tdfind(td, uap->pid, -1);
2101 if (tdt == NULL)
2102 return (ESRCH);
2103
2104 error = kern_sched_setparam(td, tdt, &sched_param);
2105 PROC_UNLOCK(tdt->td_proc);
2106 return (error);
2107 }
2108
2109 int
2110 linux_sched_getparam(struct thread *td,
2111 struct linux_sched_getparam_args *uap)
2112 {
2113 struct sched_param sched_param;
2114 struct thread *tdt;
2115 int error;
2116
2117 #ifdef DEBUG
2118 if (ldebug(sched_getparam))
2119 printf(ARGS(sched_getparam, "%d, *"), uap->pid);
2120 #endif
2121
2122 tdt = linux_tdfind(td, uap->pid, -1);
2123 if (tdt == NULL)
2124 return (ESRCH);
2125
2126 error = kern_sched_getparam(td, tdt, &sched_param);
2127 PROC_UNLOCK(tdt->td_proc);
2128 if (error == 0)
2129 error = copyout(&sched_param, uap->param,
2130 sizeof(sched_param));
2131 return (error);
2132 }
2133
2134 /*
2135 * Get affinity of a process.
2136 */
2137 int
2138 linux_sched_getaffinity(struct thread *td,
2139 struct linux_sched_getaffinity_args *args)
2140 {
2141 int error;
2142 struct thread *tdt;
2143
2144 #ifdef DEBUG
2145 if (ldebug(sched_getaffinity))
2146 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid,
2147 args->len);
2148 #endif
2149 if (args->len < sizeof(cpuset_t))
2150 return (EINVAL);
2151
2152 tdt = linux_tdfind(td, args->pid, -1);
2153 if (tdt == NULL)
2154 return (ESRCH);
2155
2156 PROC_UNLOCK(tdt->td_proc);
2157
2158 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
2159 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr);
2160 if (error == 0)
2161 td->td_retval[0] = sizeof(cpuset_t);
2162
2163 return (error);
2164 }
2165
2166 /*
2167 * Set affinity of a process.
2168 */
2169 int
2170 linux_sched_setaffinity(struct thread *td,
2171 struct linux_sched_setaffinity_args *args)
2172 {
2173 struct thread *tdt;
2174
2175 #ifdef DEBUG
2176 if (ldebug(sched_setaffinity))
2177 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid,
2178 args->len);
2179 #endif
2180 if (args->len < sizeof(cpuset_t))
2181 return (EINVAL);
2182
2183 tdt = linux_tdfind(td, args->pid, -1);
2184 if (tdt == NULL)
2185 return (ESRCH);
2186
2187 PROC_UNLOCK(tdt->td_proc);
2188
2189 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
2190 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr));
2191 }
2192
2193 struct linux_rlimit64 {
2194 uint64_t rlim_cur;
2195 uint64_t rlim_max;
2196 };
2197
2198 int
2199 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
2200 {
2201 struct rlimit rlim, nrlim;
2202 struct linux_rlimit64 lrlim;
2203 struct proc *p;
2204 u_int which;
2205 int flags;
2206 int error;
2207
2208 #ifdef DEBUG
2209 if (ldebug(prlimit64))
2210 printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid,
2211 args->resource, (void *)args->new, (void *)args->old);
2212 #endif
2213
2214 if (args->resource >= LINUX_RLIM_NLIMITS)
2215 return (EINVAL);
2216
2217 which = linux_to_bsd_resource[args->resource];
2218 if (which == -1)
2219 return (EINVAL);
2220
2221 if (args->new != NULL) {
2222 /*
2223 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
2224 * rlim is unsigned 64-bit. FreeBSD treats negative limits
2225 * as INFINITY so we do not need a conversion even.
2226 */
2227 error = copyin(args->new, &nrlim, sizeof(nrlim));
2228 if (error != 0)
2229 return (error);
2230 }
2231
2232 flags = PGET_HOLD | PGET_NOTWEXIT;
2233 if (args->new != NULL)
2234 flags |= PGET_CANDEBUG;
2235 else
2236 flags |= PGET_CANSEE;
2237 error = pget(args->pid, flags, &p);
2238 if (error != 0)
2239 return (error);
2240
2241 if (args->old != NULL) {
2242 PROC_LOCK(p);
2243 lim_rlimit_proc(p, which, &rlim);
2244 PROC_UNLOCK(p);
2245 if (rlim.rlim_cur == RLIM_INFINITY)
2246 lrlim.rlim_cur = LINUX_RLIM_INFINITY;
2247 else
2248 lrlim.rlim_cur = rlim.rlim_cur;
2249 if (rlim.rlim_max == RLIM_INFINITY)
2250 lrlim.rlim_max = LINUX_RLIM_INFINITY;
2251 else
2252 lrlim.rlim_max = rlim.rlim_max;
2253 error = copyout(&lrlim, args->old, sizeof(lrlim));
2254 if (error != 0)
2255 goto out;
2256 }
2257
2258 if (args->new != NULL)
2259 error = kern_proc_setrlimit(td, p, which, &nrlim);
2260
2261 out:
2262 PRELE(p);
2263 return (error);
2264 }
2265
2266 int
2267 linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
2268 {
2269 struct timeval utv, tv0, tv1, *tvp;
2270 struct l_pselect6arg lpse6;
2271 struct l_timespec lts;
2272 struct timespec uts;
2273 l_sigset_t l_ss;
2274 sigset_t *ssp;
2275 sigset_t ss;
2276 int error;
2277
2278 ssp = NULL;
2279 if (args->sig != NULL) {
2280 error = copyin(args->sig, &lpse6, sizeof(lpse6));
2281 if (error != 0)
2282 return (error);
2283 if (lpse6.ss_len != sizeof(l_ss))
2284 return (EINVAL);
2285 if (lpse6.ss != 0) {
2286 error = copyin(PTRIN(lpse6.ss), &l_ss,
2287 sizeof(l_ss));
2288 if (error != 0)
2289 return (error);
2290 linux_to_bsd_sigset(&l_ss, &ss);
2291 ssp = &ss;
2292 }
2293 }
2294
2295 /*
2296 * Currently glibc changes nanosecond number to microsecond.
2297 * This mean losing precision but for now it is hardly seen.
2298 */
2299 if (args->tsp != NULL) {
2300 error = copyin(args->tsp, <s, sizeof(lts));
2301 if (error != 0)
2302 return (error);
2303 error = linux_to_native_timespec(&uts, <s);
2304 if (error != 0)
2305 return (error);
2306
2307 TIMESPEC_TO_TIMEVAL(&utv, &uts);
2308 if (itimerfix(&utv))
2309 return (EINVAL);
2310
2311 microtime(&tv0);
2312 tvp = &utv;
2313 } else
2314 tvp = NULL;
2315
2316 error = kern_pselect(td, args->nfds, args->readfds, args->writefds,
2317 args->exceptfds, tvp, ssp, LINUX_NFDBITS);
2318
2319 if (error == 0 && args->tsp != NULL) {
2320 if (td->td_retval[0] != 0) {
2321 /*
2322 * Compute how much time was left of the timeout,
2323 * by subtracting the current time and the time
2324 * before we started the call, and subtracting
2325 * that result from the user-supplied value.
2326 */
2327
2328 microtime(&tv1);
2329 timevalsub(&tv1, &tv0);
2330 timevalsub(&utv, &tv1);
2331 if (utv.tv_sec < 0)
2332 timevalclear(&utv);
2333 } else
2334 timevalclear(&utv);
2335
2336 TIMEVAL_TO_TIMESPEC(&utv, &uts);
2337
2338 error = native_to_linux_timespec(<s, &uts);
2339 if (error == 0)
2340 error = copyout(<s, args->tsp, sizeof(lts));
2341 }
2342
2343 return (error);
2344 }
2345
2346 int
2347 linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
2348 {
2349 struct timespec ts0, ts1;
2350 struct l_timespec lts;
2351 struct timespec uts, *tsp;
2352 l_sigset_t l_ss;
2353 sigset_t *ssp;
2354 sigset_t ss;
2355 int error;
2356
2357 if (args->sset != NULL) {
2358 if (args->ssize != sizeof(l_ss))
2359 return (EINVAL);
2360 error = copyin(args->sset, &l_ss, sizeof(l_ss));
2361 if (error)
2362 return (error);
2363 linux_to_bsd_sigset(&l_ss, &ss);
2364 ssp = &ss;
2365 } else
2366 ssp = NULL;
2367 if (args->tsp != NULL) {
2368 error = copyin(args->tsp, <s, sizeof(lts));
2369 if (error)
2370 return (error);
2371 error = linux_to_native_timespec(&uts, <s);
2372 if (error != 0)
2373 return (error);
2374
2375 nanotime(&ts0);
2376 tsp = &uts;
2377 } else
2378 tsp = NULL;
2379
2380 error = kern_poll(td, args->fds, args->nfds, tsp, ssp);
2381
2382 if (error == 0 && args->tsp != NULL) {
2383 if (td->td_retval[0]) {
2384 nanotime(&ts1);
2385 timespecsub(&ts1, &ts0, &ts1);
2386 timespecsub(&uts, &ts1, &uts);
2387 if (uts.tv_sec < 0)
2388 timespecclear(&uts);
2389 } else
2390 timespecclear(&uts);
2391
2392 error = native_to_linux_timespec(<s, &uts);
2393 if (error == 0)
2394 error = copyout(<s, args->tsp, sizeof(lts));
2395 }
2396
2397 return (error);
2398 }
2399
2400 #if defined(DEBUG) || defined(KTR)
2401 /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */
2402
2403 #ifdef COMPAT_LINUX32
2404 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL
2405 #else
2406 #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL
2407 #endif
2408
2409 u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))];
2410
2411 static int
2412 linux_debug(int syscall, int toggle, int global)
2413 {
2414
2415 if (global) {
2416 char c = toggle ? 0 : 0xff;
2417
2418 memset(linux_debug_map, c, sizeof(linux_debug_map));
2419 return (0);
2420 }
2421 if (syscall < 0 || syscall >= L_MAXSYSCALL)
2422 return (EINVAL);
2423 if (toggle)
2424 clrbit(linux_debug_map, syscall);
2425 else
2426 setbit(linux_debug_map, syscall);
2427 return (0);
2428 }
2429 #undef L_MAXSYSCALL
2430
2431 /*
2432 * Usage: sysctl linux.debug=<syscall_nr>.<0/1>
2433 *
2434 * E.g.: sysctl linux.debug=21.0
2435 *
2436 * As a special case, syscall "all" will apply to all syscalls globally.
2437 */
2438 #define LINUX_MAX_DEBUGSTR 16
2439 int
2440 linux_sysctl_debug(SYSCTL_HANDLER_ARGS)
2441 {
2442 char value[LINUX_MAX_DEBUGSTR], *p;
2443 int error, sysc, toggle;
2444 int global = 0;
2445
2446 value[0] = '\0';
2447 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req);
2448 if (error || req->newptr == NULL)
2449 return (error);
2450 for (p = value; *p != '\0' && *p != '.'; p++);
2451 if (*p == '\0')
2452 return (EINVAL);
2453 *p++ = '\0';
2454 sysc = strtol(value, NULL, 0);
2455 toggle = strtol(p, NULL, 0);
2456 if (strcmp(value, "all") == 0)
2457 global = 1;
2458 error = linux_debug(sysc, toggle, global);
2459 return (error);
2460 }
2461
2462 #endif /* DEBUG || KTR */
2463
2464 int
2465 linux_sched_rr_get_interval(struct thread *td,
2466 struct linux_sched_rr_get_interval_args *uap)
2467 {
2468 struct timespec ts;
2469 struct l_timespec lts;
2470 struct thread *tdt;
2471 int error;
2472
2473 /*
2474 * According to man in case the invalid pid specified
2475 * EINVAL should be returned.
2476 */
2477 if (uap->pid < 0)
2478 return (EINVAL);
2479
2480 tdt = linux_tdfind(td, uap->pid, -1);
2481 if (tdt == NULL)
2482 return (ESRCH);
2483
2484 error = kern_sched_rr_get_interval_td(td, tdt, &ts);
2485 PROC_UNLOCK(tdt->td_proc);
2486 if (error != 0)
2487 return (error);
2488 error = native_to_linux_timespec(<s, &ts);
2489 if (error != 0)
2490 return (error);
2491 return (copyout(<s, uap->interval, sizeof(lts)));
2492 }
2493
2494 /*
2495 * In case when the Linux thread is the initial thread in
2496 * the thread group thread id is equal to the process id.
2497 * Glibc depends on this magic (assert in pthread_getattr_np.c).
2498 */
2499 struct thread *
2500 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
2501 {
2502 struct linux_emuldata *em;
2503 struct thread *tdt;
2504 struct proc *p;
2505
2506 tdt = NULL;
2507 if (tid == 0 || tid == td->td_tid) {
2508 tdt = td;
2509 PROC_LOCK(tdt->td_proc);
2510 } else if (tid > PID_MAX)
2511 tdt = tdfind(tid, pid);
2512 else {
2513 /*
2514 * Initial thread where the tid equal to the pid.
2515 */
2516 p = pfind(tid);
2517 if (p != NULL) {
2518 if (SV_PROC_ABI(p) != SV_ABI_LINUX) {
2519 /*
2520 * p is not a Linuxulator process.
2521 */
2522 PROC_UNLOCK(p);
2523 return (NULL);
2524 }
2525 FOREACH_THREAD_IN_PROC(p, tdt) {
2526 em = em_find(tdt);
2527 if (tid == em->em_tid)
2528 return (tdt);
2529 }
2530 PROC_UNLOCK(p);
2531 }
2532 return (NULL);
2533 }
2534
2535 return (tdt);
2536 }
2537
2538 void
2539 linux_to_bsd_waitopts(int options, int *bsdopts)
2540 {
2541
2542 if (options & LINUX_WNOHANG)
2543 *bsdopts |= WNOHANG;
2544 if (options & LINUX_WUNTRACED)
2545 *bsdopts |= WUNTRACED;
2546 if (options & LINUX_WEXITED)
2547 *bsdopts |= WEXITED;
2548 if (options & LINUX_WCONTINUED)
2549 *bsdopts |= WCONTINUED;
2550 if (options & LINUX_WNOWAIT)
2551 *bsdopts |= WNOWAIT;
2552
2553 if (options & __WCLONE)
2554 *bsdopts |= WLINUXCLONE;
2555 }
2556
2557 int
2558 linux_getrandom(struct thread *td, struct linux_getrandom_args *args)
2559 {
2560 struct uio uio;
2561 struct iovec iov;
2562 int error;
2563
2564 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM))
2565 return (EINVAL);
2566 if (args->count > INT_MAX)
2567 args->count = INT_MAX;
2568
2569 iov.iov_base = args->buf;
2570 iov.iov_len = args->count;
2571
2572 uio.uio_iov = &iov;
2573 uio.uio_iovcnt = 1;
2574 uio.uio_resid = iov.iov_len;
2575 uio.uio_segflg = UIO_USERSPACE;
2576 uio.uio_rw = UIO_READ;
2577 uio.uio_td = td;
2578
2579 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK);
2580 if (error == 0)
2581 td->td_retval[0] = args->count - uio.uio_resid;
2582 return (error);
2583 }
2584
2585 int
2586 linux_mincore(struct thread *td, struct linux_mincore_args *args)
2587 {
2588
2589 /* Needs to be page-aligned */
2590 if (args->start & PAGE_MASK)
2591 return (EINVAL);
2592 return (kern_mincore(td, args->start, args->len, args->vec));
2593 }
Cache object: a27cd7d35c3be29ad124660a5dc5550b
|