1 /*-
2 * Copyright (c) 2002 Doug Rabson
3 * Copyright (c) 1994-1995 Søren Schmidt
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_mac.h"
34
35 #include <sys/param.h>
36 #include <sys/blist.h>
37 #include <sys/fcntl.h>
38 #if defined(__i386__) || defined(__alpha__)
39 #include <sys/imgact_aout.h>
40 #endif
41 #include <sys/jail.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
44 #include <sys/lock.h>
45 #include <sys/mac.h>
46 #include <sys/malloc.h>
47 #include <sys/mman.h>
48 #include <sys/mount.h>
49 #include <sys/mutex.h>
50 #include <sys/namei.h>
51 #include <sys/proc.h>
52 #include <sys/reboot.h>
53 #include <sys/resourcevar.h>
54 #include <sys/signalvar.h>
55 #include <sys/stat.h>
56 #include <sys/syscallsubr.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
59 #include <sys/systm.h>
60 #include <sys/time.h>
61 #include <sys/vmmeter.h>
62 #include <sys/vnode.h>
63 #include <sys/wait.h>
64
65 #include <vm/vm.h>
66 #include <vm/pmap.h>
67 #include <vm/vm_kern.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_object.h>
71 #include <vm/swap_pager.h>
72
73 #include <posix4/sched.h>
74
75 #include "opt_compat.h"
76
77 #include <compat/linux/linux_sysproto.h>
78
79 #ifdef COMPAT_LINUX32
80 #include <machine/../linux32/linux.h>
81 #include <machine/../linux32/linux32_proto.h>
82 #else
83 #include <machine/../linux/linux.h>
84 #include <machine/../linux/linux_proto.h>
85 #endif
86
87 #include <compat/linux/linux_mib.h>
88 #include <compat/linux/linux_util.h>
89
90 #ifdef __i386__
91 #include <machine/cputypes.h>
92 #endif
93
94 #ifdef __alpha__
95 #define BSD_TO_LINUX_SIGNAL(sig) (sig)
96 #else
97 #define BSD_TO_LINUX_SIGNAL(sig) \
98 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
99 #endif
100
101 #ifndef __alpha__
102 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
103 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
104 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
105 RLIMIT_MEMLOCK, -1
106 };
107 #endif /*!__alpha__*/
108
109 struct l_sysinfo {
110 l_long uptime; /* Seconds since boot */
111 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */
112 #define LINUX_SYSINFO_LOADS_SCALE 65536
113 l_ulong totalram; /* Total usable main memory size */
114 l_ulong freeram; /* Available memory size */
115 l_ulong sharedram; /* Amount of shared memory */
116 l_ulong bufferram; /* Memory used by buffers */
117 l_ulong totalswap; /* Total swap space size */
118 l_ulong freeswap; /* swap space still available */
119 l_ushort procs; /* Number of current processes */
120 l_ulong totalbig;
121 l_ulong freebig;
122 l_uint mem_unit;
123 char _f[6]; /* Pads structure to 64 bytes */
124 };
125 #ifndef __alpha__
126 int
127 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
128 {
129 struct l_sysinfo sysinfo;
130 vm_object_t object;
131 int i, j;
132 struct timespec ts;
133
134 /* Uptime is copied out of print_uptime() in kern_shutdown.c */
135 getnanouptime(&ts);
136 i = 0;
137 if (ts.tv_sec >= 86400) {
138 ts.tv_sec %= 86400;
139 i = 1;
140 }
141 if (i || ts.tv_sec >= 3600) {
142 ts.tv_sec %= 3600;
143 i = 1;
144 }
145 if (i || ts.tv_sec >= 60) {
146 ts.tv_sec %= 60;
147 i = 1;
148 }
149 sysinfo.uptime=ts.tv_sec;
150
151 /* Use the information from the mib to get our load averages */
152 for (i = 0; i < 3; i++)
153 sysinfo.loads[i] = averunnable.ldavg[i] *
154 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
155
156 sysinfo.totalram = physmem * PAGE_SIZE;
157 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
158
159 sysinfo.sharedram = 0;
160 mtx_lock(&vm_object_list_mtx);
161 TAILQ_FOREACH(object, &vm_object_list, object_list)
162 if (object->shadow_count > 1)
163 sysinfo.sharedram += object->resident_page_count;
164 mtx_unlock(&vm_object_list_mtx);
165
166 sysinfo.sharedram *= PAGE_SIZE;
167 sysinfo.bufferram = 0;
168
169 swap_pager_status(&i, &j);
170 sysinfo.totalswap= i * PAGE_SIZE;
171 sysinfo.freeswap = (i - j) * PAGE_SIZE;
172
173 sysinfo.procs = nprocs;
174
175 /* The following are only present in newer Linux kernels. */
176 sysinfo.totalbig = 0;
177 sysinfo.freebig = 0;
178 sysinfo.mem_unit = 1;
179
180 return copyout(&sysinfo, args->info, sizeof(sysinfo));
181 }
182 #endif /*!__alpha__*/
183
184 #ifndef __alpha__
185 int
186 linux_alarm(struct thread *td, struct linux_alarm_args *args)
187 {
188 struct itimerval it, old_it;
189 int error;
190
191 #ifdef DEBUG
192 if (ldebug(alarm))
193 printf(ARGS(alarm, "%u"), args->secs);
194 #endif
195
196 if (args->secs > 100000000)
197 return (EINVAL);
198
199 it.it_value.tv_sec = (long)args->secs;
200 it.it_value.tv_usec = 0;
201 it.it_interval.tv_sec = 0;
202 it.it_interval.tv_usec = 0;
203 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
204 if (error)
205 return (error);
206 if (timevalisset(&old_it.it_value)) {
207 if (old_it.it_value.tv_usec != 0)
208 old_it.it_value.tv_sec++;
209 td->td_retval[0] = old_it.it_value.tv_sec;
210 }
211 return (0);
212 }
213 #endif /*!__alpha__*/
214
215 int
216 linux_brk(struct thread *td, struct linux_brk_args *args)
217 {
218 struct vmspace *vm = td->td_proc->p_vmspace;
219 vm_offset_t new, old;
220 struct obreak_args /* {
221 char * nsize;
222 } */ tmp;
223
224 #ifdef DEBUG
225 if (ldebug(brk))
226 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend);
227 #endif
228 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
229 new = (vm_offset_t)args->dsend;
230 tmp.nsize = (char *) new;
231 if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
232 td->td_retval[0] = (long)new;
233 else
234 td->td_retval[0] = (long)old;
235
236 return 0;
237 }
238
239 #if defined(__i386__) || defined(__alpha__)
240
241 int
242 linux_uselib(struct thread *td, struct linux_uselib_args *args)
243 {
244 struct nameidata ni;
245 struct vnode *vp;
246 struct exec *a_out;
247 struct vattr attr;
248 vm_offset_t vmaddr;
249 unsigned long file_offset;
250 vm_offset_t buffer;
251 unsigned long bss_size;
252 char *library;
253 int error;
254 int locked;
255
256 LCONVPATHEXIST(td, args->library, &library);
257
258 #ifdef DEBUG
259 if (ldebug(uselib))
260 printf(ARGS(uselib, "%s"), library);
261 #endif
262
263 a_out = NULL;
264 locked = 0;
265 vp = NULL;
266
267 /*
268 * XXX: This code should make use of vn_open(), rather than doing
269 * all this stuff itself.
270 */
271 NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td);
272 error = namei(&ni);
273 LFREEPATH(library);
274 if (error)
275 goto cleanup;
276
277 vp = ni.ni_vp;
278 /*
279 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
280 * succeed without returning a vnode.
281 */
282 if (vp == NULL) {
283 error = ENOEXEC; /* ?? */
284 goto cleanup;
285 }
286 NDFREE(&ni, NDF_ONLY_PNBUF);
287
288 /*
289 * From here on down, we have a locked vnode that must be unlocked.
290 */
291 locked++;
292
293 /* Writable? */
294 if (vp->v_writecount) {
295 error = ETXTBSY;
296 goto cleanup;
297 }
298
299 /* Executable? */
300 error = VOP_GETATTR(vp, &attr, td->td_ucred, td);
301 if (error)
302 goto cleanup;
303
304 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
305 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
306 error = ENOEXEC;
307 goto cleanup;
308 }
309
310 /* Sensible size? */
311 if (attr.va_size == 0) {
312 error = ENOEXEC;
313 goto cleanup;
314 }
315
316 /* Can we access it? */
317 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
318 if (error)
319 goto cleanup;
320
321 /*
322 * XXX: This should use vn_open() so that it is properly authorized,
323 * and to reduce code redundancy all over the place here.
324 */
325 #ifdef MAC
326 error = mac_check_vnode_open(td->td_ucred, vp, FREAD);
327 if (error)
328 goto cleanup;
329 #endif
330 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
331 if (error)
332 goto cleanup;
333
334 /* Pull in executable header into kernel_map */
335 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
336 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
337 /*
338 * Lock no longer needed
339 */
340 locked = 0;
341 VOP_UNLOCK(vp, 0, td);
342
343 if (error)
344 goto cleanup;
345
346 /* Is it a Linux binary ? */
347 if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
348 error = ENOEXEC;
349 goto cleanup;
350 }
351
352 /*
353 * While we are here, we should REALLY do some more checks
354 */
355
356 /* Set file/virtual offset based on a.out variant. */
357 switch ((int)(a_out->a_magic & 0xffff)) {
358 case 0413: /* ZMAGIC */
359 file_offset = 1024;
360 break;
361 case 0314: /* QMAGIC */
362 file_offset = 0;
363 break;
364 default:
365 error = ENOEXEC;
366 goto cleanup;
367 }
368
369 bss_size = round_page(a_out->a_bss);
370
371 /* Check various fields in header for validity/bounds. */
372 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
373 error = ENOEXEC;
374 goto cleanup;
375 }
376
377 /* text + data can't exceed file size */
378 if (a_out->a_data + a_out->a_text > attr.va_size) {
379 error = EFAULT;
380 goto cleanup;
381 }
382
383 /*
384 * text/data/bss must not exceed limits
385 * XXX - this is not complete. it should check current usage PLUS
386 * the resources needed by this library.
387 */
388 PROC_LOCK(td->td_proc);
389 if (a_out->a_text > maxtsiz ||
390 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA)) {
391 PROC_UNLOCK(td->td_proc);
392 error = ENOMEM;
393 goto cleanup;
394 }
395 PROC_UNLOCK(td->td_proc);
396
397 mp_fixme("Unlocked vflags access.");
398 /* prevent more writers */
399 vp->v_vflag |= VV_TEXT;
400
401 /*
402 * Check if file_offset page aligned. Currently we cannot handle
403 * misalinged file offsets, and so we read in the entire image
404 * (what a waste).
405 */
406 if (file_offset & PAGE_MASK) {
407 #ifdef DEBUG
408 printf("uselib: Non page aligned binary %lu\n", file_offset);
409 #endif
410 /* Map text+data read/write/execute */
411
412 /* a_entry is the load address and is page aligned */
413 vmaddr = trunc_page(a_out->a_entry);
414
415 /* get anon user mapping, read+write+execute */
416 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
417 &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
418 VM_PROT_ALL, 0);
419 if (error)
420 goto cleanup;
421
422 /* map file into kernel_map */
423 error = vm_mmap(kernel_map, &buffer,
424 round_page(a_out->a_text + a_out->a_data + file_offset),
425 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp,
426 trunc_page(file_offset));
427 if (error)
428 goto cleanup;
429
430 /* copy from kernel VM space to user space */
431 error = copyout(PTRIN(buffer + file_offset),
432 (void *)vmaddr, a_out->a_text + a_out->a_data);
433
434 /* release temporary kernel space */
435 vm_map_remove(kernel_map, buffer, buffer +
436 round_page(a_out->a_text + a_out->a_data + file_offset));
437
438 if (error)
439 goto cleanup;
440 } else {
441 #ifdef DEBUG
442 printf("uselib: Page aligned binary %lu\n", file_offset);
443 #endif
444 /*
445 * for QMAGIC, a_entry is 20 bytes beyond the load address
446 * to skip the executable header
447 */
448 vmaddr = trunc_page(a_out->a_entry);
449
450 /*
451 * Map it all into the process's space as a single
452 * copy-on-write "data" segment.
453 */
454 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
455 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
456 MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset);
457 if (error)
458 goto cleanup;
459 }
460 #ifdef DEBUG
461 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
462 ((long*)vmaddr)[1]);
463 #endif
464 if (bss_size != 0) {
465 /* Calculate BSS start address */
466 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
467 a_out->a_data;
468
469 /* allocate some 'anon' space */
470 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
471 &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
472 if (error)
473 goto cleanup;
474 }
475
476 cleanup:
477 /* Unlock vnode if needed */
478 if (locked)
479 VOP_UNLOCK(vp, 0, td);
480
481 /* Release the kernel mapping. */
482 if (a_out)
483 vm_map_remove(kernel_map, (vm_offset_t)a_out,
484 (vm_offset_t)a_out + PAGE_SIZE);
485
486 return error;
487 }
488
489 #endif /* __i386__ || __alpha__ */
490
491 int
492 linux_select(struct thread *td, struct linux_select_args *args)
493 {
494 l_timeval ltv;
495 struct timeval tv0, tv1, utv, *tvp;
496 int error;
497
498 #ifdef DEBUG
499 if (ldebug(select))
500 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
501 (void *)args->readfds, (void *)args->writefds,
502 (void *)args->exceptfds, (void *)args->timeout);
503 #endif
504
505 /*
506 * Store current time for computation of the amount of
507 * time left.
508 */
509 if (args->timeout) {
510 if ((error = copyin(args->timeout, <v, sizeof(ltv))))
511 goto select_out;
512 utv.tv_sec = ltv.tv_sec;
513 utv.tv_usec = ltv.tv_usec;
514 #ifdef DEBUG
515 if (ldebug(select))
516 printf(LMSG("incoming timeout (%ld/%ld)"),
517 utv.tv_sec, utv.tv_usec);
518 #endif
519
520 if (itimerfix(&utv)) {
521 /*
522 * The timeval was invalid. Convert it to something
523 * valid that will act as it does under Linux.
524 */
525 utv.tv_sec += utv.tv_usec / 1000000;
526 utv.tv_usec %= 1000000;
527 if (utv.tv_usec < 0) {
528 utv.tv_sec -= 1;
529 utv.tv_usec += 1000000;
530 }
531 if (utv.tv_sec < 0)
532 timevalclear(&utv);
533 }
534 microtime(&tv0);
535 tvp = &utv;
536 } else
537 tvp = NULL;
538
539 error = kern_select(td, args->nfds, args->readfds, args->writefds,
540 args->exceptfds, tvp);
541
542 #ifdef DEBUG
543 if (ldebug(select))
544 printf(LMSG("real select returns %d"), error);
545 #endif
546 if (error) {
547 /*
548 * See fs/select.c in the Linux kernel. Without this,
549 * Maelstrom doesn't work.
550 */
551 if (error == ERESTART)
552 error = EINTR;
553 goto select_out;
554 }
555
556 if (args->timeout) {
557 if (td->td_retval[0]) {
558 /*
559 * Compute how much time was left of the timeout,
560 * by subtracting the current time and the time
561 * before we started the call, and subtracting
562 * that result from the user-supplied value.
563 */
564 microtime(&tv1);
565 timevalsub(&tv1, &tv0);
566 timevalsub(&utv, &tv1);
567 if (utv.tv_sec < 0)
568 timevalclear(&utv);
569 } else
570 timevalclear(&utv);
571 #ifdef DEBUG
572 if (ldebug(select))
573 printf(LMSG("outgoing timeout (%ld/%ld)"),
574 utv.tv_sec, utv.tv_usec);
575 #endif
576 ltv.tv_sec = utv.tv_sec;
577 ltv.tv_usec = utv.tv_usec;
578 if ((error = copyout(<v, args->timeout, sizeof(ltv))))
579 goto select_out;
580 }
581
582 select_out:
583 #ifdef DEBUG
584 if (ldebug(select))
585 printf(LMSG("select_out -> %d"), error);
586 #endif
587 return error;
588 }
589
590 int
591 linux_mremap(struct thread *td, struct linux_mremap_args *args)
592 {
593 struct munmap_args /* {
594 void *addr;
595 size_t len;
596 } */ bsd_args;
597 int error = 0;
598
599 #ifdef DEBUG
600 if (ldebug(mremap))
601 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
602 (void *)(uintptr_t)args->addr,
603 (unsigned long)args->old_len,
604 (unsigned long)args->new_len,
605 (unsigned long)args->flags);
606 #endif
607 args->new_len = round_page(args->new_len);
608 args->old_len = round_page(args->old_len);
609
610 if (args->new_len > args->old_len) {
611 td->td_retval[0] = 0;
612 return ENOMEM;
613 }
614
615 if (args->new_len < args->old_len) {
616 bsd_args.addr =
617 (caddr_t)((uintptr_t)args->addr + args->new_len);
618 bsd_args.len = args->old_len - args->new_len;
619 error = munmap(td, &bsd_args);
620 }
621
622 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
623 return error;
624 }
625
626 #define LINUX_MS_ASYNC 0x0001
627 #define LINUX_MS_INVALIDATE 0x0002
628 #define LINUX_MS_SYNC 0x0004
629
630 int
631 linux_msync(struct thread *td, struct linux_msync_args *args)
632 {
633 struct msync_args bsd_args;
634
635 bsd_args.addr = (caddr_t)(uintptr_t)args->addr;
636 bsd_args.len = (uintptr_t)args->len;
637 bsd_args.flags = args->fl & ~LINUX_MS_SYNC;
638
639 return msync(td, &bsd_args);
640 }
641
642 #ifndef __alpha__
643 int
644 linux_time(struct thread *td, struct linux_time_args *args)
645 {
646 struct timeval tv;
647 l_time_t tm;
648 int error;
649
650 #ifdef DEBUG
651 if (ldebug(time))
652 printf(ARGS(time, "*"));
653 #endif
654
655 microtime(&tv);
656 tm = tv.tv_sec;
657 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
658 return error;
659 td->td_retval[0] = tm;
660 return 0;
661 }
662 #endif /*!__alpha__*/
663
664 struct l_times_argv {
665 l_long tms_utime;
666 l_long tms_stime;
667 l_long tms_cutime;
668 l_long tms_cstime;
669 };
670
671 #ifdef __alpha__
672 #define CLK_TCK 1024 /* Linux uses 1024 on alpha */
673 #else
674 #define CLK_TCK 100 /* Linux uses 100 */
675 #endif
676
677 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
678
679 int
680 linux_times(struct thread *td, struct linux_times_args *args)
681 {
682 struct timeval tv;
683 struct l_times_argv tms;
684 struct rusage ru;
685 int error;
686
687 #ifdef DEBUG
688 if (ldebug(times))
689 printf(ARGS(times, "*"));
690 #endif
691
692 mtx_lock_spin(&sched_lock);
693 calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
694 mtx_unlock_spin(&sched_lock);
695
696 tms.tms_utime = CONVTCK(ru.ru_utime);
697 tms.tms_stime = CONVTCK(ru.ru_stime);
698
699 tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
700 tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
701
702 if ((error = copyout(&tms, args->buf, sizeof(tms))))
703 return error;
704
705 microuptime(&tv);
706 td->td_retval[0] = (int)CONVTCK(tv);
707 return 0;
708 }
709
710 int
711 linux_newuname(struct thread *td, struct linux_newuname_args *args)
712 {
713 struct l_new_utsname utsname;
714 char osname[LINUX_MAX_UTSNAME];
715 char osrelease[LINUX_MAX_UTSNAME];
716 char *p;
717
718 #ifdef DEBUG
719 if (ldebug(newuname))
720 printf(ARGS(newuname, "*"));
721 #endif
722
723 linux_get_osname(td, osname);
724 linux_get_osrelease(td, osrelease);
725
726 bzero(&utsname, sizeof(utsname));
727 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
728 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
729 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
730 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
731 for (p = utsname.version; *p != '\0'; ++p)
732 if (*p == '\n') {
733 *p = '\0';
734 break;
735 }
736 #ifdef __i386__
737 {
738 const char *class;
739 switch (cpu_class) {
740 case CPUCLASS_686:
741 class = "i686";
742 break;
743 case CPUCLASS_586:
744 class = "i586";
745 break;
746 case CPUCLASS_486:
747 class = "i486";
748 break;
749 default:
750 class = "i386";
751 }
752 strlcpy(utsname.machine, class, LINUX_MAX_UTSNAME);
753 }
754 #elif defined(__amd64__) /* XXX: Linux can change 'personality'. */
755 #ifdef COMPAT_LINUX32
756 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
757 #else
758 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
759 #endif /* COMPAT_LINUX32 */
760 #else /* something other than i386 or amd64 - assume we and Linux agree */
761 strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME);
762 #endif /* __i386__ */
763 strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME);
764
765 return (copyout(&utsname, args->buf, sizeof(utsname)));
766 }
767
768 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
769 struct l_utimbuf {
770 l_time_t l_actime;
771 l_time_t l_modtime;
772 };
773
774 int
775 linux_utime(struct thread *td, struct linux_utime_args *args)
776 {
777 struct timeval tv[2], *tvp;
778 struct l_utimbuf lut;
779 char *fname;
780 int error;
781
782 LCONVPATHEXIST(td, args->fname, &fname);
783
784 #ifdef DEBUG
785 if (ldebug(utime))
786 printf(ARGS(utime, "%s, *"), fname);
787 #endif
788
789 if (args->times) {
790 if ((error = copyin(args->times, &lut, sizeof lut))) {
791 LFREEPATH(fname);
792 return error;
793 }
794 tv[0].tv_sec = lut.l_actime;
795 tv[0].tv_usec = 0;
796 tv[1].tv_sec = lut.l_modtime;
797 tv[1].tv_usec = 0;
798 tvp = tv;
799 } else
800 tvp = NULL;
801
802 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
803 LFREEPATH(fname);
804 return (error);
805 }
806 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
807
808 #define __WCLONE 0x80000000
809
810 #ifndef __alpha__
811 int
812 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
813 {
814 int error, options, tmpstat;
815
816 #ifdef DEBUG
817 if (ldebug(waitpid))
818 printf(ARGS(waitpid, "%d, %p, %d"),
819 args->pid, (void *)args->status, args->options);
820 #endif
821
822 options = (args->options & (WNOHANG | WUNTRACED));
823 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
824 if (args->options & __WCLONE)
825 options |= WLINUXCLONE;
826
827 error = kern_wait(td, args->pid, &tmpstat, options, NULL);
828 if (error)
829 return error;
830
831 if (args->status) {
832 tmpstat &= 0xffff;
833 if (WIFSIGNALED(tmpstat))
834 tmpstat = (tmpstat & 0xffffff80) |
835 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
836 else if (WIFSTOPPED(tmpstat))
837 tmpstat = (tmpstat & 0xffff00ff) |
838 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
839 return copyout(&tmpstat, args->status, sizeof(int));
840 }
841
842 return 0;
843 }
844 #endif /*!__alpha__*/
845
846 int
847 linux_wait4(struct thread *td, struct linux_wait4_args *args)
848 {
849 int error, options, tmpstat;
850 struct rusage ru;
851 struct proc *p;
852
853 #ifdef DEBUG
854 if (ldebug(wait4))
855 printf(ARGS(wait4, "%d, %p, %d, %p"),
856 args->pid, (void *)args->status, args->options,
857 (void *)args->rusage);
858 #endif
859
860 options = (args->options & (WNOHANG | WUNTRACED));
861 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
862 if (args->options & __WCLONE)
863 options |= WLINUXCLONE;
864
865 error = kern_wait(td, args->pid, &tmpstat, options, &ru);
866 if (error)
867 return error;
868
869 p = td->td_proc;
870 PROC_LOCK(p);
871 SIGDELSET(p->p_siglist, SIGCHLD);
872 PROC_UNLOCK(p);
873
874 if (args->status) {
875 tmpstat &= 0xffff;
876 if (WIFSIGNALED(tmpstat))
877 tmpstat = (tmpstat & 0xffffff80) |
878 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
879 else if (WIFSTOPPED(tmpstat))
880 tmpstat = (tmpstat & 0xffff00ff) |
881 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
882 error = copyout(&tmpstat, args->status, sizeof(int));
883 }
884 if (args->rusage != NULL && error == 0)
885 error = copyout(&ru, args->rusage, sizeof(ru));
886
887 return (error);
888 }
889
890 int
891 linux_mknod(struct thread *td, struct linux_mknod_args *args)
892 {
893 char *path;
894 int error;
895
896 LCONVPATHCREAT(td, args->path, &path);
897
898 #ifdef DEBUG
899 if (ldebug(mknod))
900 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev);
901 #endif
902
903 if (args->mode & S_IFIFO)
904 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode);
905 else
906 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode,
907 args->dev);
908 LFREEPATH(path);
909 return (error);
910 }
911
912 /*
913 * UGH! This is just about the dumbest idea I've ever heard!!
914 */
915 int
916 linux_personality(struct thread *td, struct linux_personality_args *args)
917 {
918 #ifdef DEBUG
919 if (ldebug(personality))
920 printf(ARGS(personality, "%lu"), (unsigned long)args->per);
921 #endif
922 #ifndef __alpha__
923 if (args->per != 0)
924 return EINVAL;
925 #endif
926
927 /* Yes Jim, it's still a Linux... */
928 td->td_retval[0] = 0;
929 return 0;
930 }
931
932 struct l_itimerval {
933 l_timeval it_interval;
934 l_timeval it_value;
935 };
936
937 #define B2L_ITIMERVAL(bip, lip) \
938 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \
939 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \
940 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \
941 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
942
943 int
944 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
945 {
946 int error;
947 struct l_itimerval ls;
948 struct itimerval aitv, oitv;
949
950 #ifdef DEBUG
951 if (ldebug(setitimer))
952 printf(ARGS(setitimer, "%p, %p"),
953 (void *)uap->itv, (void *)uap->oitv);
954 #endif
955
956 if (uap->itv == NULL) {
957 uap->itv = uap->oitv;
958 return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
959 }
960
961 error = copyin(uap->itv, &ls, sizeof(ls));
962 if (error != 0)
963 return (error);
964 B2L_ITIMERVAL(&aitv, &ls);
965 #ifdef DEBUG
966 if (ldebug(setitimer)) {
967 printf("setitimer: value: sec: %ld, usec: %ld\n",
968 aitv.it_value.tv_sec, aitv.it_value.tv_usec);
969 printf("setitimer: interval: sec: %ld, usec: %ld\n",
970 aitv.it_interval.tv_sec, aitv.it_interval.tv_usec);
971 }
972 #endif
973 error = kern_setitimer(td, uap->which, &aitv, &oitv);
974 if (error != 0 || uap->oitv == NULL)
975 return (error);
976 B2L_ITIMERVAL(&ls, &oitv);
977
978 return (copyout(&ls, uap->oitv, sizeof(ls)));
979 }
980
981 int
982 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
983 {
984 int error;
985 struct l_itimerval ls;
986 struct itimerval aitv;
987
988 #ifdef DEBUG
989 if (ldebug(getitimer))
990 printf(ARGS(getitimer, "%p"), (void *)uap->itv);
991 #endif
992 error = kern_getitimer(td, uap->which, &aitv);
993 if (error != 0)
994 return (error);
995 B2L_ITIMERVAL(&ls, &aitv);
996 return (copyout(&ls, uap->itv, sizeof(ls)));
997 }
998
999 #ifndef __alpha__
1000 int
1001 linux_nice(struct thread *td, struct linux_nice_args *args)
1002 {
1003 struct setpriority_args bsd_args;
1004
1005 bsd_args.which = PRIO_PROCESS;
1006 bsd_args.who = 0; /* current process */
1007 bsd_args.prio = args->inc;
1008 return setpriority(td, &bsd_args);
1009 }
1010 #endif /*!__alpha__*/
1011
1012 int
1013 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
1014 {
1015 struct ucred *newcred, *oldcred;
1016 l_gid_t linux_gidset[NGROUPS];
1017 gid_t *bsd_gidset;
1018 int ngrp, error;
1019 struct proc *p;
1020
1021 ngrp = args->gidsetsize;
1022 if (ngrp < 0 || ngrp >= NGROUPS)
1023 return (EINVAL);
1024 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1025 if (error)
1026 return (error);
1027 newcred = crget();
1028 p = td->td_proc;
1029 PROC_LOCK(p);
1030 oldcred = p->p_ucred;
1031
1032 /*
1033 * cr_groups[0] holds egid. Setting the whole set from
1034 * the supplied set will cause egid to be changed too.
1035 * Keep cr_groups[0] unchanged to prevent that.
1036 */
1037
1038 if ((error = suser_cred(oldcred, SUSER_ALLOWJAIL)) != 0) {
1039 PROC_UNLOCK(p);
1040 crfree(newcred);
1041 return (error);
1042 }
1043
1044 crcopy(newcred, oldcred);
1045 if (ngrp > 0) {
1046 newcred->cr_ngroups = ngrp + 1;
1047
1048 bsd_gidset = newcred->cr_groups;
1049 ngrp--;
1050 while (ngrp >= 0) {
1051 bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1052 ngrp--;
1053 }
1054 }
1055 else
1056 newcred->cr_ngroups = 1;
1057
1058 setsugid(p);
1059 p->p_ucred = newcred;
1060 PROC_UNLOCK(p);
1061 crfree(oldcred);
1062 return (0);
1063 }
1064
1065 int
1066 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1067 {
1068 struct ucred *cred;
1069 l_gid_t linux_gidset[NGROUPS];
1070 gid_t *bsd_gidset;
1071 int bsd_gidsetsz, ngrp, error;
1072
1073 cred = td->td_ucred;
1074 bsd_gidset = cred->cr_groups;
1075 bsd_gidsetsz = cred->cr_ngroups - 1;
1076
1077 /*
1078 * cr_groups[0] holds egid. Returning the whole set
1079 * here will cause a duplicate. Exclude cr_groups[0]
1080 * to prevent that.
1081 */
1082
1083 if ((ngrp = args->gidsetsize) == 0) {
1084 td->td_retval[0] = bsd_gidsetsz;
1085 return (0);
1086 }
1087
1088 if (ngrp < bsd_gidsetsz)
1089 return (EINVAL);
1090
1091 ngrp = 0;
1092 while (ngrp < bsd_gidsetsz) {
1093 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1094 ngrp++;
1095 }
1096
1097 if ((error = copyout(linux_gidset, args->grouplist,
1098 ngrp * sizeof(l_gid_t))))
1099 return (error);
1100
1101 td->td_retval[0] = ngrp;
1102 return (0);
1103 }
1104
1105 #ifndef __alpha__
1106 int
1107 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1108 {
1109 struct rlimit bsd_rlim;
1110 struct l_rlimit rlim;
1111 u_int which;
1112 int error;
1113
1114 #ifdef DEBUG
1115 if (ldebug(setrlimit))
1116 printf(ARGS(setrlimit, "%d, %p"),
1117 args->resource, (void *)args->rlim);
1118 #endif
1119
1120 if (args->resource >= LINUX_RLIM_NLIMITS)
1121 return (EINVAL);
1122
1123 which = linux_to_bsd_resource[args->resource];
1124 if (which == -1)
1125 return (EINVAL);
1126
1127 error = copyin(args->rlim, &rlim, sizeof(rlim));
1128 if (error)
1129 return (error);
1130
1131 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1132 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1133 return (kern_setrlimit(td, which, &bsd_rlim));
1134 }
1135
1136 int
1137 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1138 {
1139 struct l_rlimit rlim;
1140 struct proc *p = td->td_proc;
1141 struct rlimit bsd_rlim;
1142 u_int which;
1143
1144 #ifdef DEBUG
1145 if (ldebug(old_getrlimit))
1146 printf(ARGS(old_getrlimit, "%d, %p"),
1147 args->resource, (void *)args->rlim);
1148 #endif
1149
1150 if (args->resource >= LINUX_RLIM_NLIMITS)
1151 return (EINVAL);
1152
1153 which = linux_to_bsd_resource[args->resource];
1154 if (which == -1)
1155 return (EINVAL);
1156
1157 PROC_LOCK(p);
1158 lim_rlimit(p, which, &bsd_rlim);
1159 PROC_UNLOCK(p);
1160
1161 #ifdef COMPAT_LINUX32
1162 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1163 if (rlim.rlim_cur == UINT_MAX)
1164 rlim.rlim_cur = INT_MAX;
1165 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1166 if (rlim.rlim_max == UINT_MAX)
1167 rlim.rlim_max = INT_MAX;
1168 #else
1169 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1170 if (rlim.rlim_cur == ULONG_MAX)
1171 rlim.rlim_cur = LONG_MAX;
1172 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1173 if (rlim.rlim_max == ULONG_MAX)
1174 rlim.rlim_max = LONG_MAX;
1175 #endif
1176 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1177 }
1178
1179 int
1180 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1181 {
1182 struct l_rlimit rlim;
1183 struct proc *p = td->td_proc;
1184 struct rlimit bsd_rlim;
1185 u_int which;
1186
1187 #ifdef DEBUG
1188 if (ldebug(getrlimit))
1189 printf(ARGS(getrlimit, "%d, %p"),
1190 args->resource, (void *)args->rlim);
1191 #endif
1192
1193 if (args->resource >= LINUX_RLIM_NLIMITS)
1194 return (EINVAL);
1195
1196 which = linux_to_bsd_resource[args->resource];
1197 if (which == -1)
1198 return (EINVAL);
1199
1200 PROC_LOCK(p);
1201 lim_rlimit(p, which, &bsd_rlim);
1202 PROC_UNLOCK(p);
1203
1204 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1205 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1206 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1207 }
1208 #endif /*!__alpha__*/
1209
1210 int
1211 linux_sched_setscheduler(struct thread *td,
1212 struct linux_sched_setscheduler_args *args)
1213 {
1214 struct sched_setscheduler_args bsd;
1215
1216 #ifdef DEBUG
1217 if (ldebug(sched_setscheduler))
1218 printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1219 args->pid, args->policy, (const void *)args->param);
1220 #endif
1221
1222 switch (args->policy) {
1223 case LINUX_SCHED_OTHER:
1224 bsd.policy = SCHED_OTHER;
1225 break;
1226 case LINUX_SCHED_FIFO:
1227 bsd.policy = SCHED_FIFO;
1228 break;
1229 case LINUX_SCHED_RR:
1230 bsd.policy = SCHED_RR;
1231 break;
1232 default:
1233 return EINVAL;
1234 }
1235
1236 bsd.pid = args->pid;
1237 bsd.param = (struct sched_param *)args->param;
1238 return sched_setscheduler(td, &bsd);
1239 }
1240
1241 int
1242 linux_sched_getscheduler(struct thread *td,
1243 struct linux_sched_getscheduler_args *args)
1244 {
1245 struct sched_getscheduler_args bsd;
1246 int error;
1247
1248 #ifdef DEBUG
1249 if (ldebug(sched_getscheduler))
1250 printf(ARGS(sched_getscheduler, "%d"), args->pid);
1251 #endif
1252
1253 bsd.pid = args->pid;
1254 error = sched_getscheduler(td, &bsd);
1255
1256 switch (td->td_retval[0]) {
1257 case SCHED_OTHER:
1258 td->td_retval[0] = LINUX_SCHED_OTHER;
1259 break;
1260 case SCHED_FIFO:
1261 td->td_retval[0] = LINUX_SCHED_FIFO;
1262 break;
1263 case SCHED_RR:
1264 td->td_retval[0] = LINUX_SCHED_RR;
1265 break;
1266 }
1267
1268 return error;
1269 }
1270
1271 int
1272 linux_sched_get_priority_max(struct thread *td,
1273 struct linux_sched_get_priority_max_args *args)
1274 {
1275 struct sched_get_priority_max_args bsd;
1276
1277 #ifdef DEBUG
1278 if (ldebug(sched_get_priority_max))
1279 printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1280 #endif
1281
1282 switch (args->policy) {
1283 case LINUX_SCHED_OTHER:
1284 bsd.policy = SCHED_OTHER;
1285 break;
1286 case LINUX_SCHED_FIFO:
1287 bsd.policy = SCHED_FIFO;
1288 break;
1289 case LINUX_SCHED_RR:
1290 bsd.policy = SCHED_RR;
1291 break;
1292 default:
1293 return EINVAL;
1294 }
1295 return sched_get_priority_max(td, &bsd);
1296 }
1297
1298 int
1299 linux_sched_get_priority_min(struct thread *td,
1300 struct linux_sched_get_priority_min_args *args)
1301 {
1302 struct sched_get_priority_min_args bsd;
1303
1304 #ifdef DEBUG
1305 if (ldebug(sched_get_priority_min))
1306 printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1307 #endif
1308
1309 switch (args->policy) {
1310 case LINUX_SCHED_OTHER:
1311 bsd.policy = SCHED_OTHER;
1312 break;
1313 case LINUX_SCHED_FIFO:
1314 bsd.policy = SCHED_FIFO;
1315 break;
1316 case LINUX_SCHED_RR:
1317 bsd.policy = SCHED_RR;
1318 break;
1319 default:
1320 return EINVAL;
1321 }
1322 return sched_get_priority_min(td, &bsd);
1323 }
1324
1325 #define REBOOT_CAD_ON 0x89abcdef
1326 #define REBOOT_CAD_OFF 0
1327 #define REBOOT_HALT 0xcdef0123
1328
1329 int
1330 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1331 {
1332 struct reboot_args bsd_args;
1333
1334 #ifdef DEBUG
1335 if (ldebug(reboot))
1336 printf(ARGS(reboot, "0x%x"), args->cmd);
1337 #endif
1338 if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1339 return (0);
1340 bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1341 return (reboot(td, &bsd_args));
1342 }
1343
1344 #ifndef __alpha__
1345
1346 /*
1347 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1348 * td->td_retval[1] when COMPAT_43 is defined. This
1349 * globbers registers that are assumed to be preserved. The following
1350 * lightweight syscalls fixes this. See also linux_getgid16() and
1351 * linux_getuid16() in linux_uid16.c.
1352 *
1353 * linux_getpid() - MP SAFE
1354 * linux_getgid() - MP SAFE
1355 * linux_getuid() - MP SAFE
1356 */
1357
1358 int
1359 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1360 {
1361
1362 td->td_retval[0] = td->td_proc->p_pid;
1363 return (0);
1364 }
1365
1366 int
1367 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1368 {
1369
1370 td->td_retval[0] = td->td_ucred->cr_rgid;
1371 return (0);
1372 }
1373
1374 int
1375 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1376 {
1377
1378 td->td_retval[0] = td->td_ucred->cr_ruid;
1379 return (0);
1380 }
1381
1382 #endif /*!__alpha__*/
1383
1384 int
1385 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1386 {
1387 struct getsid_args bsd;
1388 bsd.pid = args->pid;
1389 return getsid(td, &bsd);
1390 }
1391
1392 int
1393 linux_nosys(struct thread *td, struct nosys_args *ignore)
1394 {
1395
1396 return (ENOSYS);
1397 }
Cache object: a2f811d1f022b88a05e0a83170ad5bc7
|