1 /*-
2 * Copyright (c) 2002 Doug Rabson
3 * Copyright (c) 1994-1995 Søren Schmidt
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: releng/6.0/sys/compat/linux/linux_misc.c 147816 2005-07-07 19:17:55Z jhb $");
32
33 #include "opt_mac.h"
34
35 #include <sys/param.h>
36 #include <sys/blist.h>
37 #include <sys/fcntl.h>
38 #if defined(__i386__) || defined(__alpha__)
39 #include <sys/imgact_aout.h>
40 #endif
41 #include <sys/jail.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
44 #include <sys/lock.h>
45 #include <sys/mac.h>
46 #include <sys/malloc.h>
47 #include <sys/mman.h>
48 #include <sys/mount.h>
49 #include <sys/mutex.h>
50 #include <sys/namei.h>
51 #include <sys/proc.h>
52 #include <sys/reboot.h>
53 #include <sys/resourcevar.h>
54 #include <sys/signalvar.h>
55 #include <sys/stat.h>
56 #include <sys/syscallsubr.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
59 #include <sys/systm.h>
60 #include <sys/time.h>
61 #include <sys/vmmeter.h>
62 #include <sys/vnode.h>
63 #include <sys/wait.h>
64
65 #include <vm/vm.h>
66 #include <vm/pmap.h>
67 #include <vm/vm_kern.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_object.h>
71 #include <vm/swap_pager.h>
72
73 #include <posix4/sched.h>
74
75 #include "opt_compat.h"
76
77 #include <compat/linux/linux_sysproto.h>
78
79 #ifdef COMPAT_LINUX32
80 #include <machine/../linux32/linux.h>
81 #include <machine/../linux32/linux32_proto.h>
82 #else
83 #include <machine/../linux/linux.h>
84 #include <machine/../linux/linux_proto.h>
85 #endif
86
87 #include <compat/linux/linux_mib.h>
88 #include <compat/linux/linux_util.h>
89
90 #ifdef __i386__
91 #include <machine/cputypes.h>
92 #endif
93
94 #ifdef __alpha__
95 #define BSD_TO_LINUX_SIGNAL(sig) (sig)
96 #else
97 #define BSD_TO_LINUX_SIGNAL(sig) \
98 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
99 #endif
100
101 #ifndef __alpha__
102 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
103 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
104 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
105 RLIMIT_MEMLOCK, -1
106 };
107 #endif /*!__alpha__*/
108
109 struct l_sysinfo {
110 l_long uptime; /* Seconds since boot */
111 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */
112 #define LINUX_SYSINFO_LOADS_SCALE 65536
113 l_ulong totalram; /* Total usable main memory size */
114 l_ulong freeram; /* Available memory size */
115 l_ulong sharedram; /* Amount of shared memory */
116 l_ulong bufferram; /* Memory used by buffers */
117 l_ulong totalswap; /* Total swap space size */
118 l_ulong freeswap; /* swap space still available */
119 l_ushort procs; /* Number of current processes */
120 l_ulong totalbig;
121 l_ulong freebig;
122 l_uint mem_unit;
123 char _f[6]; /* Pads structure to 64 bytes */
124 };
125 #ifndef __alpha__
126 int
127 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
128 {
129 struct l_sysinfo sysinfo;
130 vm_object_t object;
131 int i, j;
132 struct timespec ts;
133
134 getnanouptime(&ts);
135 if (ts.tv_nsec != 0)
136 ts.tv_sec++;
137 sysinfo.uptime = ts.tv_sec;
138
139 /* Use the information from the mib to get our load averages */
140 for (i = 0; i < 3; i++)
141 sysinfo.loads[i] = averunnable.ldavg[i] *
142 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
143
144 sysinfo.totalram = physmem * PAGE_SIZE;
145 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE;
146
147 sysinfo.sharedram = 0;
148 mtx_lock(&vm_object_list_mtx);
149 TAILQ_FOREACH(object, &vm_object_list, object_list)
150 if (object->shadow_count > 1)
151 sysinfo.sharedram += object->resident_page_count;
152 mtx_unlock(&vm_object_list_mtx);
153
154 sysinfo.sharedram *= PAGE_SIZE;
155 sysinfo.bufferram = 0;
156
157 swap_pager_status(&i, &j);
158 sysinfo.totalswap= i * PAGE_SIZE;
159 sysinfo.freeswap = (i - j) * PAGE_SIZE;
160
161 sysinfo.procs = nprocs;
162
163 /* The following are only present in newer Linux kernels. */
164 sysinfo.totalbig = 0;
165 sysinfo.freebig = 0;
166 sysinfo.mem_unit = 1;
167
168 return copyout(&sysinfo, args->info, sizeof(sysinfo));
169 }
170 #endif /*!__alpha__*/
171
172 #ifndef __alpha__
173 int
174 linux_alarm(struct thread *td, struct linux_alarm_args *args)
175 {
176 struct itimerval it, old_it;
177 int error;
178
179 #ifdef DEBUG
180 if (ldebug(alarm))
181 printf(ARGS(alarm, "%u"), args->secs);
182 #endif
183
184 if (args->secs > 100000000)
185 return (EINVAL);
186
187 it.it_value.tv_sec = (long)args->secs;
188 it.it_value.tv_usec = 0;
189 it.it_interval.tv_sec = 0;
190 it.it_interval.tv_usec = 0;
191 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
192 if (error)
193 return (error);
194 if (timevalisset(&old_it.it_value)) {
195 if (old_it.it_value.tv_usec != 0)
196 old_it.it_value.tv_sec++;
197 td->td_retval[0] = old_it.it_value.tv_sec;
198 }
199 return (0);
200 }
201 #endif /*!__alpha__*/
202
203 int
204 linux_brk(struct thread *td, struct linux_brk_args *args)
205 {
206 struct vmspace *vm = td->td_proc->p_vmspace;
207 vm_offset_t new, old;
208 struct obreak_args /* {
209 char * nsize;
210 } */ tmp;
211
212 #ifdef DEBUG
213 if (ldebug(brk))
214 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend);
215 #endif
216 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
217 new = (vm_offset_t)args->dsend;
218 tmp.nsize = (char *) new;
219 if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp))
220 td->td_retval[0] = (long)new;
221 else
222 td->td_retval[0] = (long)old;
223
224 return 0;
225 }
226
227 #if defined(__i386__) || defined(__alpha__)
228
229 int
230 linux_uselib(struct thread *td, struct linux_uselib_args *args)
231 {
232 struct nameidata ni;
233 struct vnode *vp;
234 struct exec *a_out;
235 struct vattr attr;
236 vm_offset_t vmaddr;
237 unsigned long file_offset;
238 vm_offset_t buffer;
239 unsigned long bss_size;
240 char *library;
241 int error;
242 int locked;
243
244 LCONVPATHEXIST(td, args->library, &library);
245
246 #ifdef DEBUG
247 if (ldebug(uselib))
248 printf(ARGS(uselib, "%s"), library);
249 #endif
250
251 a_out = NULL;
252 locked = 0;
253 vp = NULL;
254
255 /*
256 * XXX: This code should make use of vn_open(), rather than doing
257 * all this stuff itself.
258 */
259 NDINIT(&ni, LOOKUP, ISOPEN|FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td);
260 error = namei(&ni);
261 LFREEPATH(library);
262 if (error)
263 goto cleanup;
264
265 vp = ni.ni_vp;
266 /*
267 * XXX - This looks like a bogus check. A LOCKLEAF namei should not
268 * succeed without returning a vnode.
269 */
270 if (vp == NULL) {
271 error = ENOEXEC; /* ?? */
272 goto cleanup;
273 }
274 NDFREE(&ni, NDF_ONLY_PNBUF);
275
276 /*
277 * From here on down, we have a locked vnode that must be unlocked.
278 */
279 locked++;
280
281 /* Writable? */
282 if (vp->v_writecount) {
283 error = ETXTBSY;
284 goto cleanup;
285 }
286
287 /* Executable? */
288 error = VOP_GETATTR(vp, &attr, td->td_ucred, td);
289 if (error)
290 goto cleanup;
291
292 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
293 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
294 error = ENOEXEC;
295 goto cleanup;
296 }
297
298 /* Sensible size? */
299 if (attr.va_size == 0) {
300 error = ENOEXEC;
301 goto cleanup;
302 }
303
304 /* Can we access it? */
305 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
306 if (error)
307 goto cleanup;
308
309 /*
310 * XXX: This should use vn_open() so that it is properly authorized,
311 * and to reduce code redundancy all over the place here.
312 */
313 #ifdef MAC
314 error = mac_check_vnode_open(td->td_ucred, vp, FREAD);
315 if (error)
316 goto cleanup;
317 #endif
318 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
319 if (error)
320 goto cleanup;
321
322 /* Pull in executable header into kernel_map */
323 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
324 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
325 /*
326 * Lock no longer needed
327 */
328 locked = 0;
329 VOP_UNLOCK(vp, 0, td);
330
331 if (error)
332 goto cleanup;
333
334 /* Is it a Linux binary ? */
335 if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
336 error = ENOEXEC;
337 goto cleanup;
338 }
339
340 /*
341 * While we are here, we should REALLY do some more checks
342 */
343
344 /* Set file/virtual offset based on a.out variant. */
345 switch ((int)(a_out->a_magic & 0xffff)) {
346 case 0413: /* ZMAGIC */
347 file_offset = 1024;
348 break;
349 case 0314: /* QMAGIC */
350 file_offset = 0;
351 break;
352 default:
353 error = ENOEXEC;
354 goto cleanup;
355 }
356
357 bss_size = round_page(a_out->a_bss);
358
359 /* Check various fields in header for validity/bounds. */
360 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
361 error = ENOEXEC;
362 goto cleanup;
363 }
364
365 /* text + data can't exceed file size */
366 if (a_out->a_data + a_out->a_text > attr.va_size) {
367 error = EFAULT;
368 goto cleanup;
369 }
370
371 /*
372 * text/data/bss must not exceed limits
373 * XXX - this is not complete. it should check current usage PLUS
374 * the resources needed by this library.
375 */
376 PROC_LOCK(td->td_proc);
377 if (a_out->a_text > maxtsiz ||
378 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA)) {
379 PROC_UNLOCK(td->td_proc);
380 error = ENOMEM;
381 goto cleanup;
382 }
383 PROC_UNLOCK(td->td_proc);
384
385 mp_fixme("Unlocked vflags access.");
386 /* prevent more writers */
387 vp->v_vflag |= VV_TEXT;
388
389 /*
390 * Check if file_offset page aligned. Currently we cannot handle
391 * misalinged file offsets, and so we read in the entire image
392 * (what a waste).
393 */
394 if (file_offset & PAGE_MASK) {
395 #ifdef DEBUG
396 printf("uselib: Non page aligned binary %lu\n", file_offset);
397 #endif
398 /* Map text+data read/write/execute */
399
400 /* a_entry is the load address and is page aligned */
401 vmaddr = trunc_page(a_out->a_entry);
402
403 /* get anon user mapping, read+write+execute */
404 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
405 &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL,
406 VM_PROT_ALL, 0);
407 if (error)
408 goto cleanup;
409
410 /* map file into kernel_map */
411 error = vm_mmap(kernel_map, &buffer,
412 round_page(a_out->a_text + a_out->a_data + file_offset),
413 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp,
414 trunc_page(file_offset));
415 if (error)
416 goto cleanup;
417
418 /* copy from kernel VM space to user space */
419 error = copyout(PTRIN(buffer + file_offset),
420 (void *)vmaddr, a_out->a_text + a_out->a_data);
421
422 /* release temporary kernel space */
423 vm_map_remove(kernel_map, buffer, buffer +
424 round_page(a_out->a_text + a_out->a_data + file_offset));
425
426 if (error)
427 goto cleanup;
428 } else {
429 #ifdef DEBUG
430 printf("uselib: Page aligned binary %lu\n", file_offset);
431 #endif
432 /*
433 * for QMAGIC, a_entry is 20 bytes beyond the load address
434 * to skip the executable header
435 */
436 vmaddr = trunc_page(a_out->a_entry);
437
438 /*
439 * Map it all into the process's space as a single
440 * copy-on-write "data" segment.
441 */
442 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
443 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
444 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
445 if (error)
446 goto cleanup;
447 }
448 #ifdef DEBUG
449 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0],
450 ((long*)vmaddr)[1]);
451 #endif
452 if (bss_size != 0) {
453 /* Calculate BSS start address */
454 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
455 a_out->a_data;
456
457 /* allocate some 'anon' space */
458 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
459 &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
460 if (error)
461 goto cleanup;
462 }
463
464 cleanup:
465 /* Unlock vnode if needed */
466 if (locked)
467 VOP_UNLOCK(vp, 0, td);
468
469 /* Release the kernel mapping. */
470 if (a_out)
471 vm_map_remove(kernel_map, (vm_offset_t)a_out,
472 (vm_offset_t)a_out + PAGE_SIZE);
473
474 return error;
475 }
476
477 #endif /* __i386__ || __alpha__ */
478
479 int
480 linux_select(struct thread *td, struct linux_select_args *args)
481 {
482 l_timeval ltv;
483 struct timeval tv0, tv1, utv, *tvp;
484 int error;
485
486 #ifdef DEBUG
487 if (ldebug(select))
488 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds,
489 (void *)args->readfds, (void *)args->writefds,
490 (void *)args->exceptfds, (void *)args->timeout);
491 #endif
492
493 /*
494 * Store current time for computation of the amount of
495 * time left.
496 */
497 if (args->timeout) {
498 if ((error = copyin(args->timeout, <v, sizeof(ltv))))
499 goto select_out;
500 utv.tv_sec = ltv.tv_sec;
501 utv.tv_usec = ltv.tv_usec;
502 #ifdef DEBUG
503 if (ldebug(select))
504 printf(LMSG("incoming timeout (%ld/%ld)"),
505 utv.tv_sec, utv.tv_usec);
506 #endif
507
508 if (itimerfix(&utv)) {
509 /*
510 * The timeval was invalid. Convert it to something
511 * valid that will act as it does under Linux.
512 */
513 utv.tv_sec += utv.tv_usec / 1000000;
514 utv.tv_usec %= 1000000;
515 if (utv.tv_usec < 0) {
516 utv.tv_sec -= 1;
517 utv.tv_usec += 1000000;
518 }
519 if (utv.tv_sec < 0)
520 timevalclear(&utv);
521 }
522 microtime(&tv0);
523 tvp = &utv;
524 } else
525 tvp = NULL;
526
527 error = kern_select(td, args->nfds, args->readfds, args->writefds,
528 args->exceptfds, tvp);
529
530 #ifdef DEBUG
531 if (ldebug(select))
532 printf(LMSG("real select returns %d"), error);
533 #endif
534 if (error) {
535 /*
536 * See fs/select.c in the Linux kernel. Without this,
537 * Maelstrom doesn't work.
538 */
539 if (error == ERESTART)
540 error = EINTR;
541 goto select_out;
542 }
543
544 if (args->timeout) {
545 if (td->td_retval[0]) {
546 /*
547 * Compute how much time was left of the timeout,
548 * by subtracting the current time and the time
549 * before we started the call, and subtracting
550 * that result from the user-supplied value.
551 */
552 microtime(&tv1);
553 timevalsub(&tv1, &tv0);
554 timevalsub(&utv, &tv1);
555 if (utv.tv_sec < 0)
556 timevalclear(&utv);
557 } else
558 timevalclear(&utv);
559 #ifdef DEBUG
560 if (ldebug(select))
561 printf(LMSG("outgoing timeout (%ld/%ld)"),
562 utv.tv_sec, utv.tv_usec);
563 #endif
564 ltv.tv_sec = utv.tv_sec;
565 ltv.tv_usec = utv.tv_usec;
566 if ((error = copyout(<v, args->timeout, sizeof(ltv))))
567 goto select_out;
568 }
569
570 select_out:
571 #ifdef DEBUG
572 if (ldebug(select))
573 printf(LMSG("select_out -> %d"), error);
574 #endif
575 return error;
576 }
577
578 int
579 linux_mremap(struct thread *td, struct linux_mremap_args *args)
580 {
581 struct munmap_args /* {
582 void *addr;
583 size_t len;
584 } */ bsd_args;
585 int error = 0;
586
587 #ifdef DEBUG
588 if (ldebug(mremap))
589 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"),
590 (void *)(uintptr_t)args->addr,
591 (unsigned long)args->old_len,
592 (unsigned long)args->new_len,
593 (unsigned long)args->flags);
594 #endif
595 args->new_len = round_page(args->new_len);
596 args->old_len = round_page(args->old_len);
597
598 if (args->new_len > args->old_len) {
599 td->td_retval[0] = 0;
600 return ENOMEM;
601 }
602
603 if (args->new_len < args->old_len) {
604 bsd_args.addr =
605 (caddr_t)((uintptr_t)args->addr + args->new_len);
606 bsd_args.len = args->old_len - args->new_len;
607 error = munmap(td, &bsd_args);
608 }
609
610 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
611 return error;
612 }
613
614 #define LINUX_MS_ASYNC 0x0001
615 #define LINUX_MS_INVALIDATE 0x0002
616 #define LINUX_MS_SYNC 0x0004
617
618 int
619 linux_msync(struct thread *td, struct linux_msync_args *args)
620 {
621 struct msync_args bsd_args;
622
623 bsd_args.addr = (caddr_t)(uintptr_t)args->addr;
624 bsd_args.len = (uintptr_t)args->len;
625 bsd_args.flags = args->fl & ~LINUX_MS_SYNC;
626
627 return msync(td, &bsd_args);
628 }
629
630 #ifndef __alpha__
631 int
632 linux_time(struct thread *td, struct linux_time_args *args)
633 {
634 struct timeval tv;
635 l_time_t tm;
636 int error;
637
638 #ifdef DEBUG
639 if (ldebug(time))
640 printf(ARGS(time, "*"));
641 #endif
642
643 microtime(&tv);
644 tm = tv.tv_sec;
645 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
646 return error;
647 td->td_retval[0] = tm;
648 return 0;
649 }
650 #endif /*!__alpha__*/
651
652 struct l_times_argv {
653 l_long tms_utime;
654 l_long tms_stime;
655 l_long tms_cutime;
656 l_long tms_cstime;
657 };
658
659 #ifdef __alpha__
660 #define CLK_TCK 1024 /* Linux uses 1024 on alpha */
661 #else
662 #define CLK_TCK 100 /* Linux uses 100 */
663 #endif
664
665 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
666
667 int
668 linux_times(struct thread *td, struct linux_times_args *args)
669 {
670 struct timeval tv, utime, stime, cutime, cstime;
671 struct l_times_argv tms;
672 struct proc *p;
673 int error;
674
675 #ifdef DEBUG
676 if (ldebug(times))
677 printf(ARGS(times, "*"));
678 #endif
679
680 p = td->td_proc;
681 PROC_LOCK(p);
682 calcru(p, &utime, &stime);
683 calccru(p, &cutime, &cstime);
684 PROC_UNLOCK(p);
685
686 tms.tms_utime = CONVTCK(utime);
687 tms.tms_stime = CONVTCK(stime);
688
689 tms.tms_cutime = CONVTCK(cutime);
690 tms.tms_cstime = CONVTCK(cstime);
691
692 if ((error = copyout(&tms, args->buf, sizeof(tms))))
693 return error;
694
695 microuptime(&tv);
696 td->td_retval[0] = (int)CONVTCK(tv);
697 return 0;
698 }
699
700 int
701 linux_newuname(struct thread *td, struct linux_newuname_args *args)
702 {
703 struct l_new_utsname utsname;
704 char osname[LINUX_MAX_UTSNAME];
705 char osrelease[LINUX_MAX_UTSNAME];
706 char *p;
707
708 #ifdef DEBUG
709 if (ldebug(newuname))
710 printf(ARGS(newuname, "*"));
711 #endif
712
713 linux_get_osname(td, osname);
714 linux_get_osrelease(td, osrelease);
715
716 bzero(&utsname, sizeof(utsname));
717 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
718 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
719 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
720 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
721 for (p = utsname.version; *p != '\0'; ++p)
722 if (*p == '\n') {
723 *p = '\0';
724 break;
725 }
726 #ifdef __i386__
727 {
728 const char *class;
729 switch (cpu_class) {
730 case CPUCLASS_686:
731 class = "i686";
732 break;
733 case CPUCLASS_586:
734 class = "i586";
735 break;
736 case CPUCLASS_486:
737 class = "i486";
738 break;
739 default:
740 class = "i386";
741 }
742 strlcpy(utsname.machine, class, LINUX_MAX_UTSNAME);
743 }
744 #elif defined(__amd64__) /* XXX: Linux can change 'personality'. */
745 #ifdef COMPAT_LINUX32
746 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
747 #else
748 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
749 #endif /* COMPAT_LINUX32 */
750 #else /* something other than i386 or amd64 - assume we and Linux agree */
751 strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME);
752 #endif /* __i386__ */
753 strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME);
754
755 return (copyout(&utsname, args->buf, sizeof(utsname)));
756 }
757
758 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
759 struct l_utimbuf {
760 l_time_t l_actime;
761 l_time_t l_modtime;
762 };
763
764 int
765 linux_utime(struct thread *td, struct linux_utime_args *args)
766 {
767 struct timeval tv[2], *tvp;
768 struct l_utimbuf lut;
769 char *fname;
770 int error;
771
772 LCONVPATHEXIST(td, args->fname, &fname);
773
774 #ifdef DEBUG
775 if (ldebug(utime))
776 printf(ARGS(utime, "%s, *"), fname);
777 #endif
778
779 if (args->times) {
780 if ((error = copyin(args->times, &lut, sizeof lut))) {
781 LFREEPATH(fname);
782 return error;
783 }
784 tv[0].tv_sec = lut.l_actime;
785 tv[0].tv_usec = 0;
786 tv[1].tv_sec = lut.l_modtime;
787 tv[1].tv_usec = 0;
788 tvp = tv;
789 } else
790 tvp = NULL;
791
792 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
793 LFREEPATH(fname);
794 return (error);
795 }
796 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
797
798 #define __WCLONE 0x80000000
799
800 #ifndef __alpha__
801 int
802 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
803 {
804 int error, options, tmpstat;
805
806 #ifdef DEBUG
807 if (ldebug(waitpid))
808 printf(ARGS(waitpid, "%d, %p, %d"),
809 args->pid, (void *)args->status, args->options);
810 #endif
811
812 options = (args->options & (WNOHANG | WUNTRACED));
813 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
814 if (args->options & __WCLONE)
815 options |= WLINUXCLONE;
816
817 error = kern_wait(td, args->pid, &tmpstat, options, NULL);
818 if (error)
819 return error;
820
821 if (args->status) {
822 tmpstat &= 0xffff;
823 if (WIFSIGNALED(tmpstat))
824 tmpstat = (tmpstat & 0xffffff80) |
825 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
826 else if (WIFSTOPPED(tmpstat))
827 tmpstat = (tmpstat & 0xffff00ff) |
828 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
829 return copyout(&tmpstat, args->status, sizeof(int));
830 }
831
832 return 0;
833 }
834 #endif /*!__alpha__*/
835
836 int
837 linux_wait4(struct thread *td, struct linux_wait4_args *args)
838 {
839 int error, options, tmpstat;
840 struct rusage ru, *rup;
841 struct proc *p;
842
843 #ifdef DEBUG
844 if (ldebug(wait4))
845 printf(ARGS(wait4, "%d, %p, %d, %p"),
846 args->pid, (void *)args->status, args->options,
847 (void *)args->rusage);
848 #endif
849
850 options = (args->options & (WNOHANG | WUNTRACED));
851 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
852 if (args->options & __WCLONE)
853 options |= WLINUXCLONE;
854
855 if (args->rusage != NULL)
856 rup = &ru;
857 else
858 rup = NULL;
859 error = kern_wait(td, args->pid, &tmpstat, options, rup);
860 if (error)
861 return error;
862
863 p = td->td_proc;
864 PROC_LOCK(p);
865 SIGDELSET(p->p_siglist, SIGCHLD);
866 PROC_UNLOCK(p);
867
868 if (args->status) {
869 tmpstat &= 0xffff;
870 if (WIFSIGNALED(tmpstat))
871 tmpstat = (tmpstat & 0xffffff80) |
872 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
873 else if (WIFSTOPPED(tmpstat))
874 tmpstat = (tmpstat & 0xffff00ff) |
875 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
876 error = copyout(&tmpstat, args->status, sizeof(int));
877 }
878 if (args->rusage != NULL && error == 0)
879 error = copyout(&ru, args->rusage, sizeof(ru));
880
881 return (error);
882 }
883
884 int
885 linux_mknod(struct thread *td, struct linux_mknod_args *args)
886 {
887 char *path;
888 int error;
889
890 LCONVPATHCREAT(td, args->path, &path);
891
892 #ifdef DEBUG
893 if (ldebug(mknod))
894 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev);
895 #endif
896
897 if (args->mode & S_IFIFO)
898 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode);
899 else
900 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode,
901 args->dev);
902 LFREEPATH(path);
903 return (error);
904 }
905
906 /*
907 * UGH! This is just about the dumbest idea I've ever heard!!
908 */
909 int
910 linux_personality(struct thread *td, struct linux_personality_args *args)
911 {
912 #ifdef DEBUG
913 if (ldebug(personality))
914 printf(ARGS(personality, "%lu"), (unsigned long)args->per);
915 #endif
916 #ifndef __alpha__
917 if (args->per != 0)
918 return EINVAL;
919 #endif
920
921 /* Yes Jim, it's still a Linux... */
922 td->td_retval[0] = 0;
923 return 0;
924 }
925
926 struct l_itimerval {
927 l_timeval it_interval;
928 l_timeval it_value;
929 };
930
931 #define B2L_ITIMERVAL(bip, lip) \
932 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \
933 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \
934 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \
935 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
936
937 int
938 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
939 {
940 int error;
941 struct l_itimerval ls;
942 struct itimerval aitv, oitv;
943
944 #ifdef DEBUG
945 if (ldebug(setitimer))
946 printf(ARGS(setitimer, "%p, %p"),
947 (void *)uap->itv, (void *)uap->oitv);
948 #endif
949
950 if (uap->itv == NULL) {
951 uap->itv = uap->oitv;
952 return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
953 }
954
955 error = copyin(uap->itv, &ls, sizeof(ls));
956 if (error != 0)
957 return (error);
958 B2L_ITIMERVAL(&aitv, &ls);
959 #ifdef DEBUG
960 if (ldebug(setitimer)) {
961 printf("setitimer: value: sec: %ld, usec: %ld\n",
962 aitv.it_value.tv_sec, aitv.it_value.tv_usec);
963 printf("setitimer: interval: sec: %ld, usec: %ld\n",
964 aitv.it_interval.tv_sec, aitv.it_interval.tv_usec);
965 }
966 #endif
967 error = kern_setitimer(td, uap->which, &aitv, &oitv);
968 if (error != 0 || uap->oitv == NULL)
969 return (error);
970 B2L_ITIMERVAL(&ls, &oitv);
971
972 return (copyout(&ls, uap->oitv, sizeof(ls)));
973 }
974
975 int
976 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
977 {
978 int error;
979 struct l_itimerval ls;
980 struct itimerval aitv;
981
982 #ifdef DEBUG
983 if (ldebug(getitimer))
984 printf(ARGS(getitimer, "%p"), (void *)uap->itv);
985 #endif
986 error = kern_getitimer(td, uap->which, &aitv);
987 if (error != 0)
988 return (error);
989 B2L_ITIMERVAL(&ls, &aitv);
990 return (copyout(&ls, uap->itv, sizeof(ls)));
991 }
992
993 #ifndef __alpha__
994 int
995 linux_nice(struct thread *td, struct linux_nice_args *args)
996 {
997 struct setpriority_args bsd_args;
998
999 bsd_args.which = PRIO_PROCESS;
1000 bsd_args.who = 0; /* current process */
1001 bsd_args.prio = args->inc;
1002 return setpriority(td, &bsd_args);
1003 }
1004 #endif /*!__alpha__*/
1005
1006 int
1007 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
1008 {
1009 struct ucred *newcred, *oldcred;
1010 l_gid_t linux_gidset[NGROUPS];
1011 gid_t *bsd_gidset;
1012 int ngrp, error;
1013 struct proc *p;
1014
1015 ngrp = args->gidsetsize;
1016 if (ngrp < 0 || ngrp >= NGROUPS)
1017 return (EINVAL);
1018 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1019 if (error)
1020 return (error);
1021 newcred = crget();
1022 p = td->td_proc;
1023 PROC_LOCK(p);
1024 oldcred = p->p_ucred;
1025
1026 /*
1027 * cr_groups[0] holds egid. Setting the whole set from
1028 * the supplied set will cause egid to be changed too.
1029 * Keep cr_groups[0] unchanged to prevent that.
1030 */
1031
1032 if ((error = suser_cred(oldcred, SUSER_ALLOWJAIL)) != 0) {
1033 PROC_UNLOCK(p);
1034 crfree(newcred);
1035 return (error);
1036 }
1037
1038 crcopy(newcred, oldcred);
1039 if (ngrp > 0) {
1040 newcred->cr_ngroups = ngrp + 1;
1041
1042 bsd_gidset = newcred->cr_groups;
1043 ngrp--;
1044 while (ngrp >= 0) {
1045 bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1046 ngrp--;
1047 }
1048 }
1049 else
1050 newcred->cr_ngroups = 1;
1051
1052 setsugid(p);
1053 p->p_ucred = newcred;
1054 PROC_UNLOCK(p);
1055 crfree(oldcred);
1056 return (0);
1057 }
1058
1059 int
1060 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1061 {
1062 struct ucred *cred;
1063 l_gid_t linux_gidset[NGROUPS];
1064 gid_t *bsd_gidset;
1065 int bsd_gidsetsz, ngrp, error;
1066
1067 cred = td->td_ucred;
1068 bsd_gidset = cred->cr_groups;
1069 bsd_gidsetsz = cred->cr_ngroups - 1;
1070
1071 /*
1072 * cr_groups[0] holds egid. Returning the whole set
1073 * here will cause a duplicate. Exclude cr_groups[0]
1074 * to prevent that.
1075 */
1076
1077 if ((ngrp = args->gidsetsize) == 0) {
1078 td->td_retval[0] = bsd_gidsetsz;
1079 return (0);
1080 }
1081
1082 if (ngrp < bsd_gidsetsz)
1083 return (EINVAL);
1084
1085 ngrp = 0;
1086 while (ngrp < bsd_gidsetsz) {
1087 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1088 ngrp++;
1089 }
1090
1091 if ((error = copyout(linux_gidset, args->grouplist,
1092 ngrp * sizeof(l_gid_t))))
1093 return (error);
1094
1095 td->td_retval[0] = ngrp;
1096 return (0);
1097 }
1098
1099 #ifndef __alpha__
1100 int
1101 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1102 {
1103 struct rlimit bsd_rlim;
1104 struct l_rlimit rlim;
1105 u_int which;
1106 int error;
1107
1108 #ifdef DEBUG
1109 if (ldebug(setrlimit))
1110 printf(ARGS(setrlimit, "%d, %p"),
1111 args->resource, (void *)args->rlim);
1112 #endif
1113
1114 if (args->resource >= LINUX_RLIM_NLIMITS)
1115 return (EINVAL);
1116
1117 which = linux_to_bsd_resource[args->resource];
1118 if (which == -1)
1119 return (EINVAL);
1120
1121 error = copyin(args->rlim, &rlim, sizeof(rlim));
1122 if (error)
1123 return (error);
1124
1125 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1126 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1127 return (kern_setrlimit(td, which, &bsd_rlim));
1128 }
1129
1130 int
1131 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1132 {
1133 struct l_rlimit rlim;
1134 struct proc *p = td->td_proc;
1135 struct rlimit bsd_rlim;
1136 u_int which;
1137
1138 #ifdef DEBUG
1139 if (ldebug(old_getrlimit))
1140 printf(ARGS(old_getrlimit, "%d, %p"),
1141 args->resource, (void *)args->rlim);
1142 #endif
1143
1144 if (args->resource >= LINUX_RLIM_NLIMITS)
1145 return (EINVAL);
1146
1147 which = linux_to_bsd_resource[args->resource];
1148 if (which == -1)
1149 return (EINVAL);
1150
1151 PROC_LOCK(p);
1152 lim_rlimit(p, which, &bsd_rlim);
1153 PROC_UNLOCK(p);
1154
1155 #ifdef COMPAT_LINUX32
1156 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1157 if (rlim.rlim_cur == UINT_MAX)
1158 rlim.rlim_cur = INT_MAX;
1159 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1160 if (rlim.rlim_max == UINT_MAX)
1161 rlim.rlim_max = INT_MAX;
1162 #else
1163 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1164 if (rlim.rlim_cur == ULONG_MAX)
1165 rlim.rlim_cur = LONG_MAX;
1166 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1167 if (rlim.rlim_max == ULONG_MAX)
1168 rlim.rlim_max = LONG_MAX;
1169 #endif
1170 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1171 }
1172
1173 int
1174 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1175 {
1176 struct l_rlimit rlim;
1177 struct proc *p = td->td_proc;
1178 struct rlimit bsd_rlim;
1179 u_int which;
1180
1181 #ifdef DEBUG
1182 if (ldebug(getrlimit))
1183 printf(ARGS(getrlimit, "%d, %p"),
1184 args->resource, (void *)args->rlim);
1185 #endif
1186
1187 if (args->resource >= LINUX_RLIM_NLIMITS)
1188 return (EINVAL);
1189
1190 which = linux_to_bsd_resource[args->resource];
1191 if (which == -1)
1192 return (EINVAL);
1193
1194 PROC_LOCK(p);
1195 lim_rlimit(p, which, &bsd_rlim);
1196 PROC_UNLOCK(p);
1197
1198 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1199 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1200 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1201 }
1202 #endif /*!__alpha__*/
1203
1204 int
1205 linux_sched_setscheduler(struct thread *td,
1206 struct linux_sched_setscheduler_args *args)
1207 {
1208 struct sched_setscheduler_args bsd;
1209
1210 #ifdef DEBUG
1211 if (ldebug(sched_setscheduler))
1212 printf(ARGS(sched_setscheduler, "%d, %d, %p"),
1213 args->pid, args->policy, (const void *)args->param);
1214 #endif
1215
1216 switch (args->policy) {
1217 case LINUX_SCHED_OTHER:
1218 bsd.policy = SCHED_OTHER;
1219 break;
1220 case LINUX_SCHED_FIFO:
1221 bsd.policy = SCHED_FIFO;
1222 break;
1223 case LINUX_SCHED_RR:
1224 bsd.policy = SCHED_RR;
1225 break;
1226 default:
1227 return EINVAL;
1228 }
1229
1230 bsd.pid = args->pid;
1231 bsd.param = (struct sched_param *)args->param;
1232 return sched_setscheduler(td, &bsd);
1233 }
1234
1235 int
1236 linux_sched_getscheduler(struct thread *td,
1237 struct linux_sched_getscheduler_args *args)
1238 {
1239 struct sched_getscheduler_args bsd;
1240 int error;
1241
1242 #ifdef DEBUG
1243 if (ldebug(sched_getscheduler))
1244 printf(ARGS(sched_getscheduler, "%d"), args->pid);
1245 #endif
1246
1247 bsd.pid = args->pid;
1248 error = sched_getscheduler(td, &bsd);
1249
1250 switch (td->td_retval[0]) {
1251 case SCHED_OTHER:
1252 td->td_retval[0] = LINUX_SCHED_OTHER;
1253 break;
1254 case SCHED_FIFO:
1255 td->td_retval[0] = LINUX_SCHED_FIFO;
1256 break;
1257 case SCHED_RR:
1258 td->td_retval[0] = LINUX_SCHED_RR;
1259 break;
1260 }
1261
1262 return error;
1263 }
1264
1265 int
1266 linux_sched_get_priority_max(struct thread *td,
1267 struct linux_sched_get_priority_max_args *args)
1268 {
1269 struct sched_get_priority_max_args bsd;
1270
1271 #ifdef DEBUG
1272 if (ldebug(sched_get_priority_max))
1273 printf(ARGS(sched_get_priority_max, "%d"), args->policy);
1274 #endif
1275
1276 switch (args->policy) {
1277 case LINUX_SCHED_OTHER:
1278 bsd.policy = SCHED_OTHER;
1279 break;
1280 case LINUX_SCHED_FIFO:
1281 bsd.policy = SCHED_FIFO;
1282 break;
1283 case LINUX_SCHED_RR:
1284 bsd.policy = SCHED_RR;
1285 break;
1286 default:
1287 return EINVAL;
1288 }
1289 return sched_get_priority_max(td, &bsd);
1290 }
1291
1292 int
1293 linux_sched_get_priority_min(struct thread *td,
1294 struct linux_sched_get_priority_min_args *args)
1295 {
1296 struct sched_get_priority_min_args bsd;
1297
1298 #ifdef DEBUG
1299 if (ldebug(sched_get_priority_min))
1300 printf(ARGS(sched_get_priority_min, "%d"), args->policy);
1301 #endif
1302
1303 switch (args->policy) {
1304 case LINUX_SCHED_OTHER:
1305 bsd.policy = SCHED_OTHER;
1306 break;
1307 case LINUX_SCHED_FIFO:
1308 bsd.policy = SCHED_FIFO;
1309 break;
1310 case LINUX_SCHED_RR:
1311 bsd.policy = SCHED_RR;
1312 break;
1313 default:
1314 return EINVAL;
1315 }
1316 return sched_get_priority_min(td, &bsd);
1317 }
1318
1319 #define REBOOT_CAD_ON 0x89abcdef
1320 #define REBOOT_CAD_OFF 0
1321 #define REBOOT_HALT 0xcdef0123
1322
1323 int
1324 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1325 {
1326 struct reboot_args bsd_args;
1327
1328 #ifdef DEBUG
1329 if (ldebug(reboot))
1330 printf(ARGS(reboot, "0x%x"), args->cmd);
1331 #endif
1332 if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF)
1333 return (0);
1334 bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0;
1335 return (reboot(td, &bsd_args));
1336 }
1337
1338 #ifndef __alpha__
1339
1340 /*
1341 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify
1342 * td->td_retval[1] when COMPAT_43 is defined. This
1343 * globbers registers that are assumed to be preserved. The following
1344 * lightweight syscalls fixes this. See also linux_getgid16() and
1345 * linux_getuid16() in linux_uid16.c.
1346 *
1347 * linux_getpid() - MP SAFE
1348 * linux_getgid() - MP SAFE
1349 * linux_getuid() - MP SAFE
1350 */
1351
1352 int
1353 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1354 {
1355
1356 td->td_retval[0] = td->td_proc->p_pid;
1357 return (0);
1358 }
1359
1360 int
1361 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1362 {
1363
1364 td->td_retval[0] = td->td_ucred->cr_rgid;
1365 return (0);
1366 }
1367
1368 int
1369 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1370 {
1371
1372 td->td_retval[0] = td->td_ucred->cr_ruid;
1373 return (0);
1374 }
1375
1376 #endif /*!__alpha__*/
1377
1378 int
1379 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1380 {
1381 struct getsid_args bsd;
1382 bsd.pid = args->pid;
1383 return getsid(td, &bsd);
1384 }
1385
1386 int
1387 linux_nosys(struct thread *td, struct nosys_args *ignore)
1388 {
1389
1390 return (ENOSYS);
1391 }
1392
1393 int
1394 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
1395 {
1396 struct getpriority_args bsd_args;
1397 int error;
1398
1399 bsd_args.which = args->which;
1400 bsd_args.who = args->who;
1401 error = getpriority(td, &bsd_args);
1402 td->td_retval[0] = 20 - td->td_retval[0];
1403 return error;
1404 }
Cache object: d6f03cd37d2e36298cbdd95565a584fb
|