1 /*-
2 * Copyright (c) 1994-1995 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31 #include "opt_compat.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/kernel.h>
37 #include <sys/mman.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/imgact_aout.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/resourcevar.h>
44 #include <sys/stat.h>
45 #include <sys/sysctl.h>
46 #include <sys/unistd.h>
47 #include <sys/vnode.h>
48 #include <sys/wait.h>
49 #include <sys/time.h>
50
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_prot.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_extern.h>
57
58 #include <machine/frame.h>
59 #include <machine/psl.h>
60
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <i386/linux/linux_util.h>
64
65 #include <posix4/sched.h>
66
67 #define BSD_TO_LINUX_SIGNAL(sig) \
68 ((sig) < NSIG ? bsd_to_linux_signal[sig] : sig)
69
70 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
71 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
72 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
73 RLIMIT_MEMLOCK, -1
74 };
75
76 int
77 linux_alarm(struct proc *p, struct linux_alarm_args *args)
78 {
79 struct itimerval it, old_it;
80 struct timeval tv;
81 int s;
82
83 #ifdef DEBUG
84 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs);
85 #endif
86 if (args->secs > 100000000)
87 return EINVAL;
88 it.it_value.tv_sec = (long)args->secs;
89 it.it_value.tv_usec = 0;
90 it.it_interval.tv_sec = 0;
91 it.it_interval.tv_usec = 0;
92 s = splsoftclock();
93 old_it = p->p_realtimer;
94 getmicrouptime(&tv);
95 if (timevalisset(&old_it.it_value))
96 untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
97 if (it.it_value.tv_sec != 0) {
98 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value));
99 timevaladd(&it.it_value, &tv);
100 }
101 p->p_realtimer = it;
102 splx(s);
103 if (timevalcmp(&old_it.it_value, &tv, >)) {
104 timevalsub(&old_it.it_value, &tv);
105 if (old_it.it_value.tv_usec != 0)
106 old_it.it_value.tv_sec++;
107 p->p_retval[0] = old_it.it_value.tv_sec;
108 }
109 return 0;
110 }
111
112 int
113 linux_brk(struct proc *p, struct linux_brk_args *args)
114 {
115 #if 0
116 struct vmspace *vm = p->p_vmspace;
117 vm_offset_t new, old;
118 int error;
119
120 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
121 return EINVAL;
122 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
123 > p->p_rlimit[RLIMIT_DATA].rlim_cur)
124 return ENOMEM;
125
126 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
127 new = round_page((vm_offset_t)args->dsend);
128 p->p_retval[0] = old;
129 if ((new-old) > 0) {
130 if (swap_pager_full)
131 return ENOMEM;
132 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
133 VM_PROT_ALL, VM_PROT_ALL, 0);
134 if (error)
135 return error;
136 vm->vm_dsize += btoc((new-old));
137 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
138 }
139 return 0;
140 #else
141 struct vmspace *vm = p->p_vmspace;
142 vm_offset_t new, old;
143 struct obreak_args /* {
144 char * nsize;
145 } */ tmp;
146
147 #ifdef DEBUG
148 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend);
149 #endif
150 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
151 new = (vm_offset_t)args->dsend;
152 tmp.nsize = (char *) new;
153 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp))
154 p->p_retval[0] = (int)new;
155 else
156 p->p_retval[0] = (int)old;
157
158 return 0;
159 #endif
160 }
161
162 int
163 linux_uselib(struct proc *p, struct linux_uselib_args *args)
164 {
165 struct nameidata ni;
166 struct vnode *vp;
167 struct exec *a_out;
168 struct vattr attr;
169 vm_offset_t vmaddr;
170 unsigned long file_offset;
171 vm_offset_t buffer;
172 unsigned long bss_size;
173 int error;
174 caddr_t sg;
175 int locked;
176
177 sg = stackgap_init();
178 CHECKALTEXIST(p, &sg, args->library);
179
180 #ifdef DEBUG
181 printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library);
182 #endif
183
184 a_out = NULL;
185 locked = 0;
186 vp = NULL;
187
188 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p);
189 if (error = namei(&ni))
190 goto cleanup;
191
192 vp = ni.ni_vp;
193 if (vp == NULL) {
194 error = ENOEXEC; /* ?? */
195 goto cleanup;
196 }
197
198 /*
199 * From here on down, we have a locked vnode that must be unlocked.
200 */
201 locked++;
202
203 /*
204 * Writable?
205 */
206 if (vp->v_writecount) {
207 error = ETXTBSY;
208 goto cleanup;
209 }
210
211 /*
212 * Executable?
213 */
214 if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p))
215 goto cleanup;
216
217 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
218 ((attr.va_mode & 0111) == 0) ||
219 (attr.va_type != VREG)) {
220 error = ENOEXEC;
221 goto cleanup;
222 }
223
224 /*
225 * Sensible size?
226 */
227 if (attr.va_size == 0) {
228 error = ENOEXEC;
229 goto cleanup;
230 }
231
232 /*
233 * Can we access it?
234 */
235 if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p))
236 goto cleanup;
237
238 if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p))
239 goto cleanup;
240
241 /*
242 * Lock no longer needed
243 */
244 VOP_UNLOCK(vp, 0, p);
245 locked = 0;
246
247 /*
248 * Pull in executable header into kernel_map
249 */
250 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
251 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
252 if (error)
253 goto cleanup;
254
255 /*
256 * Is it a Linux binary ?
257 */
258 if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
259 error = ENOEXEC;
260 goto cleanup;
261 }
262
263 /* While we are here, we should REALLY do some more checks */
264
265 /*
266 * Set file/virtual offset based on a.out variant.
267 */
268 switch ((int)(a_out->a_magic & 0xffff)) {
269 case 0413: /* ZMAGIC */
270 file_offset = 1024;
271 break;
272 case 0314: /* QMAGIC */
273 file_offset = 0;
274 break;
275 default:
276 error = ENOEXEC;
277 goto cleanup;
278 }
279
280 bss_size = round_page(a_out->a_bss);
281
282 /*
283 * Check various fields in header for validity/bounds.
284 */
285 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
286 error = ENOEXEC;
287 goto cleanup;
288 }
289
290 /* text + data can't exceed file size */
291 if (a_out->a_data + a_out->a_text > attr.va_size) {
292 error = EFAULT;
293 goto cleanup;
294 }
295
296 /*
297 * text/data/bss must not exceed limits
298 * XXX: this is not complete. it should check current usage PLUS
299 * the resources needed by this library.
300 */
301 if (a_out->a_text > MAXTSIZ ||
302 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
303 error = ENOMEM;
304 goto cleanup;
305 }
306
307 /*
308 * prevent more writers
309 */
310 vp->v_flag |= VTEXT;
311
312 /*
313 * Check if file_offset page aligned,.
314 * Currently we cannot handle misalinged file offsets,
315 * and so we read in the entire image (what a waste).
316 */
317 if (file_offset & PAGE_MASK) {
318 #ifdef DEBUG
319 printf("uselib: Non page aligned binary %lu\n", file_offset);
320 #endif
321 /*
322 * Map text+data read/write/execute
323 */
324
325 /* a_entry is the load address and is page aligned */
326 vmaddr = trunc_page(a_out->a_entry);
327
328 /* get anon user mapping, read+write+execute */
329 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
330 a_out->a_text + a_out->a_data, FALSE,
331 VM_PROT_ALL, VM_PROT_ALL, 0);
332 if (error)
333 goto cleanup;
334
335 /* map file into kernel_map */
336 error = vm_mmap(kernel_map, &buffer,
337 round_page(a_out->a_text + a_out->a_data + file_offset),
338 VM_PROT_READ, VM_PROT_READ, 0,
339 (caddr_t)vp, trunc_page(file_offset));
340 if (error)
341 goto cleanup;
342
343 /* copy from kernel VM space to user space */
344 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset),
345 (caddr_t)vmaddr, a_out->a_text + a_out->a_data);
346
347 /* release temporary kernel space */
348 vm_map_remove(kernel_map, buffer,
349 buffer + round_page(a_out->a_text + a_out->a_data + file_offset));
350
351 if (error)
352 goto cleanup;
353 }
354 else {
355 #ifdef DEBUG
356 printf("uselib: Page aligned binary %lu\n", file_offset);
357 #endif
358 /*
359 * for QMAGIC, a_entry is 20 bytes beyond the load address
360 * to skip the executable header
361 */
362 vmaddr = trunc_page(a_out->a_entry);
363
364 /*
365 * Map it all into the process's space as a single copy-on-write
366 * "data" segment.
367 */
368 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
369 a_out->a_text + a_out->a_data,
370 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
371 (caddr_t)vp, file_offset);
372 if (error)
373 goto cleanup;
374 }
375 #ifdef DEBUG
376 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
377 #endif
378 if (bss_size != 0) {
379 /*
380 * Calculate BSS start address
381 */
382 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
383
384 /*
385 * allocate some 'anon' space
386 */
387 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
388 bss_size, FALSE,
389 VM_PROT_ALL, VM_PROT_ALL, 0);
390 if (error)
391 goto cleanup;
392 }
393
394 cleanup:
395 /*
396 * Unlock vnode if needed
397 */
398 if (locked)
399 VOP_UNLOCK(vp, 0, p);
400
401 /*
402 * Release the kernel mapping.
403 */
404 if (a_out)
405 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE);
406
407 return error;
408 }
409
410 /* XXX move */
411 struct linux_select_argv {
412 int nfds;
413 fd_set *readfds;
414 fd_set *writefds;
415 fd_set *exceptfds;
416 struct timeval *timeout;
417 };
418
419 int
420 linux_select(struct proc *p, struct linux_select_args *args)
421 {
422 struct linux_select_argv linux_args;
423 struct linux_newselect_args newsel;
424 int error;
425
426 #ifdef SELECT_DEBUG
427 printf("Linux-emul(%d): select(%x)\n",
428 p->p_pid, args->ptr);
429 #endif
430 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
431 sizeof(linux_args))))
432 return error;
433
434 newsel.nfds = linux_args.nfds;
435 newsel.readfds = linux_args.readfds;
436 newsel.writefds = linux_args.writefds;
437 newsel.exceptfds = linux_args.exceptfds;
438 newsel.timeout = linux_args.timeout;
439
440 return linux_newselect(p, &newsel);
441 }
442
443 int
444 linux_newselect(struct proc *p, struct linux_newselect_args *args)
445 {
446 struct select_args bsa;
447 struct timeval tv0, tv1, utv, *tvp;
448 caddr_t sg;
449 int error;
450
451 #ifdef DEBUG
452 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n",
453 (long)p->p_pid, args->nfds, (void *)args->readfds,
454 (void *)args->writefds, (void *)args->exceptfds,
455 (void *)args->timeout);
456 #endif
457 error = 0;
458 bsa.nd = args->nfds;
459 bsa.in = args->readfds;
460 bsa.ou = args->writefds;
461 bsa.ex = args->exceptfds;
462 bsa.tv = args->timeout;
463
464 /*
465 * Store current time for computation of the amount of
466 * time left.
467 */
468 if (args->timeout) {
469 if ((error = copyin(args->timeout, &utv, sizeof(utv))))
470 goto select_out;
471 #ifdef DEBUG
472 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n",
473 (long)p->p_pid, utv.tv_sec, utv.tv_usec);
474 #endif
475 if (itimerfix(&utv)) {
476 /*
477 * The timeval was invalid. Convert it to something
478 * valid that will act as it does under Linux.
479 */
480 sg = stackgap_init();
481 tvp = stackgap_alloc(&sg, sizeof(utv));
482 utv.tv_sec += utv.tv_usec / 1000000;
483 utv.tv_usec %= 1000000;
484 if (utv.tv_usec < 0) {
485 utv.tv_sec -= 1;
486 utv.tv_usec += 1000000;
487 }
488 if (utv.tv_sec < 0)
489 timevalclear(&utv);
490 if ((error = copyout(&utv, tvp, sizeof(utv))))
491 goto select_out;
492 bsa.tv = tvp;
493 }
494 microtime(&tv0);
495 }
496
497 error = select(p, &bsa);
498 #ifdef DEBUG
499 printf("Linux-emul(%d): real select returns %d\n",
500 p->p_pid, error);
501 #endif
502
503 if (error) {
504 /*
505 * See fs/select.c in the Linux kernel. Without this,
506 * Maelstrom doesn't work.
507 */
508 if (error == ERESTART)
509 error = EINTR;
510 goto select_out;
511 }
512
513 if (args->timeout) {
514 if (p->p_retval[0]) {
515 /*
516 * Compute how much time was left of the timeout,
517 * by subtracting the current time and the time
518 * before we started the call, and subtracting
519 * that result from the user-supplied value.
520 */
521 microtime(&tv1);
522 timevalsub(&tv1, &tv0);
523 timevalsub(&utv, &tv1);
524 if (utv.tv_sec < 0)
525 timevalclear(&utv);
526 } else
527 timevalclear(&utv);
528 #ifdef DEBUG
529 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n",
530 (long)p->p_pid, utv.tv_sec, utv.tv_usec);
531 #endif
532 if ((error = copyout(&utv, args->timeout, sizeof(utv))))
533 goto select_out;
534 }
535
536 select_out:
537 #ifdef DEBUG
538 printf("Linux-emul(%d): newselect_out -> %d\n",
539 p->p_pid, error);
540 #endif
541 return error;
542 }
543
544 int
545 linux_getpgid(struct proc *p, struct linux_getpgid_args *args)
546 {
547 struct proc *curproc;
548
549 #ifdef DEBUG
550 printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid);
551 #endif
552 if (args->pid != p->p_pid) {
553 if (!(curproc = pfind(args->pid)))
554 return ESRCH;
555 }
556 else
557 curproc = p;
558 p->p_retval[0] = curproc->p_pgid;
559 return 0;
560 }
561
562 int
563 linux_fork(struct proc *p, struct linux_fork_args *args)
564 {
565 int error;
566
567 #ifdef DEBUG
568 printf("Linux-emul(%d): fork()\n", p->p_pid);
569 #endif
570 if ((error = fork(p, (struct fork_args *)args)) != 0)
571 return error;
572 if (p->p_retval[1] == 1)
573 p->p_retval[0] = 0;
574 return 0;
575 }
576
577 int
578 linux_vfork(p, args)
579 struct proc *p;
580 struct linux_vfork_args *args;
581 {
582 int error;
583
584 #ifdef DEBUG
585 printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid);
586 #endif
587
588 if ((error = vfork(p, (struct vfork_args *)args)) != 0)
589 return error;
590 /* Are we the child? */
591 if (p->p_retval[1] == 1)
592 p->p_retval[0] = 0;
593 return 0;
594 }
595
596 #define CLONE_VM 0x100
597 #define CLONE_FS 0x200
598 #define CLONE_FILES 0x400
599 #define CLONE_SIGHAND 0x800
600 #define CLONE_PID 0x1000
601
602 int
603 linux_clone(struct proc *p, struct linux_clone_args *args)
604 {
605 int error, ff = RFPROC;
606 struct proc *p2;
607 int exit_signal;
608 vm_offset_t start;
609 struct rfork_args rf_args;
610
611 #ifdef SMP
612 printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid);
613 return (EOPNOTSUPP);
614 #endif
615 #ifdef DEBUG
616 if (args->flags & CLONE_PID)
617 printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid);
618 printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid,
619 (unsigned int)args->flags, (unsigned int)args->stack);
620 #endif
621
622 if (!args->stack)
623 return (EINVAL);
624
625 exit_signal = args->flags & 0x000000ff;
626 if (exit_signal >= LINUX_NSIG)
627 return EINVAL;
628 exit_signal = linux_to_bsd_signal[exit_signal];
629
630 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */
631 ff |= RFTHREAD;
632
633 if (args->flags & CLONE_VM)
634 ff |= RFMEM;
635 if (args->flags & CLONE_SIGHAND)
636 ff |= RFSIGSHARE;
637 if (!(args->flags & CLONE_FILES))
638 ff |= RFFDG;
639
640 error = 0;
641 start = 0;
642
643 rf_args.flags = ff;
644 if ((error = rfork(p, &rf_args)) != 0)
645 return error;
646
647 p2 = pfind(p->p_retval[0]);
648 if (p2 == 0)
649 return ESRCH;
650
651 p2->p_sigparent = exit_signal;
652 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack;
653
654 #ifdef DEBUG
655 printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid);
656 #endif
657 return 0;
658 }
659
660 /* XXX move */
661 struct linux_mmap_argv {
662 linux_caddr_t addr;
663 int len;
664 int prot;
665 int flags;
666 int fd;
667 int pos;
668 };
669
670 #define STACK_SIZE (2 * 1024 * 1024)
671 #define GUARD_SIZE (4 * PAGE_SIZE)
672 int
673 linux_mmap(struct proc *p, struct linux_mmap_args *args)
674 {
675 struct mmap_args /* {
676 caddr_t addr;
677 size_t len;
678 int prot;
679 int flags;
680 int fd;
681 long pad;
682 off_t pos;
683 } */ bsd_args;
684 int error;
685 struct linux_mmap_argv linux_args;
686
687 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
688 sizeof(linux_args))))
689 return error;
690 #ifdef DEBUG
691 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n",
692 (long)p->p_pid, (void *)linux_args.addr, linux_args.len,
693 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos);
694 #endif
695 bsd_args.flags = 0;
696 if (linux_args.flags & LINUX_MAP_SHARED)
697 bsd_args.flags |= MAP_SHARED;
698 if (linux_args.flags & LINUX_MAP_PRIVATE)
699 bsd_args.flags |= MAP_PRIVATE;
700 if (linux_args.flags & LINUX_MAP_FIXED)
701 bsd_args.flags |= MAP_FIXED;
702 if (linux_args.flags & LINUX_MAP_ANON)
703 bsd_args.flags |= MAP_ANON;
704
705 #ifndef VM_STACK
706 /* Linux Threads will map into the proc stack space, unless
707 * we prevent it. This causes problems if we're not using
708 * our VM_STACK options.
709 */
710 if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ))
711 return (EINVAL);
712 #endif
713
714 if (linux_args.flags & LINUX_MAP_GROWSDOWN) {
715
716 #ifdef VM_STACK
717 bsd_args.flags |= MAP_STACK;
718 #endif
719
720 /* The linux MAP_GROWSDOWN option does not limit auto
721 * growth of the region. Linux mmap with this option
722 * takes as addr the inital BOS, and as len, the initial
723 * region size. It can then grow down from addr without
724 * limit. However, linux threads has an implicit internal
725 * limit to stack size of STACK_SIZE. Its just not
726 * enforced explicitly in linux. But, here we impose
727 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
728 * region, since we can do this with our mmap.
729 *
730 * Our mmap with MAP_STACK takes addr as the maximum
731 * downsize limit on BOS, and as len the max size of
732 * the region. It them maps the top SGROWSIZ bytes,
733 * and autgrows the region down, up to the limit
734 * in addr.
735 *
736 * If we don't use the MAP_STACK option, the effect
737 * of this code is to allocate a stack region of a
738 * fixed size of (STACK_SIZE - GUARD_SIZE).
739 */
740
741 /* This gives us TOS */
742 bsd_args.addr = linux_args.addr + linux_args.len;
743
744 /* This gives us our maximum stack size */
745 if (linux_args.len > STACK_SIZE - GUARD_SIZE)
746 bsd_args.len = linux_args.len;
747 else
748 bsd_args.len = STACK_SIZE - GUARD_SIZE;
749
750 /* This gives us a new BOS. If we're using VM_STACK, then
751 * mmap will just map the top SGROWSIZ bytes, and let
752 * the stack grow down to the limit at BOS. If we're
753 * not using VM_STACK we map the full stack, since we
754 * don't have a way to autogrow it.
755 */
756 bsd_args.addr -= bsd_args.len;
757
758 } else {
759 bsd_args.addr = linux_args.addr;
760 bsd_args.len = linux_args.len;
761 }
762
763 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */
764 bsd_args.fd = linux_args.fd;
765 bsd_args.pos = linux_args.pos;
766 bsd_args.pad = 0;
767 return mmap(p, &bsd_args);
768 }
769
770 int
771 linux_mremap(struct proc *p, struct linux_mremap_args *args)
772 {
773 struct munmap_args /* {
774 void *addr;
775 size_t len;
776 } */ bsd_args;
777 int error = 0;
778
779 #ifdef DEBUG
780 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n",
781 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len,
782 args->flags);
783 #endif
784 args->new_len = round_page(args->new_len);
785 args->old_len = round_page(args->old_len);
786
787 if (args->new_len > args->old_len) {
788 p->p_retval[0] = 0;
789 return ENOMEM;
790 }
791
792 if (args->new_len < args->old_len) {
793 bsd_args.addr = args->addr + args->new_len;
794 bsd_args.len = args->old_len - args->new_len;
795 error = munmap(p, &bsd_args);
796 }
797
798 p->p_retval[0] = error ? 0 : (int)args->addr;
799 return error;
800 }
801
802 int
803 linux_msync(struct proc *p, struct linux_msync_args *args)
804 {
805 struct msync_args bsd_args;
806
807 bsd_args.addr = args->addr;
808 bsd_args.len = args->len;
809 bsd_args.flags = 0; /* XXX ignore */
810
811 return msync(p, &bsd_args);
812 }
813
814 int
815 linux_pipe(struct proc *p, struct linux_pipe_args *args)
816 {
817 int error;
818 int reg_edx;
819
820 #ifdef DEBUG
821 printf("Linux-emul(%d): pipe(*)\n", p->p_pid);
822 #endif
823 reg_edx = p->p_retval[1];
824 if (error = pipe(p, 0)) {
825 p->p_retval[1] = reg_edx;
826 return error;
827 }
828
829 if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) {
830 p->p_retval[1] = reg_edx;
831 return error;
832 }
833
834 p->p_retval[1] = reg_edx;
835 p->p_retval[0] = 0;
836 return 0;
837 }
838
839 int
840 linux_time(struct proc *p, struct linux_time_args *args)
841 {
842 struct timeval tv;
843 linux_time_t tm;
844 int error;
845
846 #ifdef DEBUG
847 printf("Linux-emul(%d): time(*)\n", p->p_pid);
848 #endif
849 microtime(&tv);
850 tm = tv.tv_sec;
851 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t))))
852 return error;
853 p->p_retval[0] = tm;
854 return 0;
855 }
856
857 struct linux_times_argv {
858 long tms_utime;
859 long tms_stime;
860 long tms_cutime;
861 long tms_cstime;
862 };
863
864 #define CLK_TCK 100 /* Linux uses 100 */
865 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
866
867 int
868 linux_times(struct proc *p, struct linux_times_args *args)
869 {
870 struct timeval tv;
871 struct linux_times_argv tms;
872 struct rusage ru;
873 int error;
874
875 #ifdef DEBUG
876 printf("Linux-emul(%d): times(*)\n", p->p_pid);
877 #endif
878 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL);
879
880 tms.tms_utime = CONVTCK(ru.ru_utime);
881 tms.tms_stime = CONVTCK(ru.ru_stime);
882
883 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime);
884 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime);
885
886 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf,
887 sizeof(struct linux_times_argv))))
888 return error;
889
890 microuptime(&tv);
891 p->p_retval[0] = (int)CONVTCK(tv);
892 return 0;
893 }
894
895 /* XXX move */
896 struct linux_newuname_t {
897 char sysname[65];
898 char nodename[65];
899 char release[65];
900 char version[65];
901 char machine[65];
902 char domainname[65];
903 };
904
905 int
906 linux_newuname(struct proc *p, struct linux_newuname_args *args)
907 {
908 struct linux_newuname_t linux_newuname;
909
910 #ifdef DEBUG
911 printf("Linux-emul(%d): newuname(*)\n", p->p_pid);
912 #endif
913 bzero(&linux_newuname, sizeof(struct linux_newuname_t));
914 strncpy(linux_newuname.sysname, "Linux",
915 sizeof(linux_newuname.sysname) - 1);
916 strncpy(linux_newuname.nodename, hostname,
917 sizeof(linux_newuname.nodename) - 1);
918 strncpy(linux_newuname.release, "2.0.36",
919 sizeof(linux_newuname.release) - 1);
920 strncpy(linux_newuname.version, version,
921 sizeof(linux_newuname.version) - 1);
922 strncpy(linux_newuname.machine, machine,
923 sizeof(linux_newuname.machine) - 1);
924 strncpy(linux_newuname.domainname, domainname,
925 sizeof(linux_newuname.domainname) - 1);
926 return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf,
927 sizeof(struct linux_newuname_t)));
928 }
929
930 struct linux_utimbuf {
931 linux_time_t l_actime;
932 linux_time_t l_modtime;
933 };
934
935 int
936 linux_utime(struct proc *p, struct linux_utime_args *args)
937 {
938 struct utimes_args /* {
939 char *path;
940 struct timeval *tptr;
941 } */ bsdutimes;
942 struct timeval tv[2], *tvp;
943 struct linux_utimbuf lut;
944 int error;
945 caddr_t sg;
946
947 sg = stackgap_init();
948 CHECKALTEXIST(p, &sg, args->fname);
949
950 #ifdef DEBUG
951 printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname);
952 #endif
953 if (args->times) {
954 if ((error = copyin(args->times, &lut, sizeof lut)))
955 return error;
956 tv[0].tv_sec = lut.l_actime;
957 tv[0].tv_usec = 0;
958 tv[1].tv_sec = lut.l_modtime;
959 tv[1].tv_usec = 0;
960 /* so that utimes can copyin */
961 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv));
962 if (tvp == NULL)
963 return (ENAMETOOLONG);
964 if ((error = copyout(tv, tvp, sizeof(tv))))
965 return error;
966 bsdutimes.tptr = tvp;
967 } else
968 bsdutimes.tptr = NULL;
969
970 bsdutimes.path = args->fname;
971 return utimes(p, &bsdutimes);
972 }
973
974 #define __WCLONE 0x80000000
975
976 int
977 linux_waitpid(struct proc *p, struct linux_waitpid_args *args)
978 {
979 struct wait_args /* {
980 int pid;
981 int *status;
982 int options;
983 struct rusage *rusage;
984 } */ tmp;
985 int error, tmpstat;
986
987 #ifdef DEBUG
988 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n",
989 (long)p->p_pid, args->pid, (void *)args->status, args->options);
990 #endif
991 tmp.pid = args->pid;
992 tmp.status = args->status;
993 tmp.options = (args->options & (WNOHANG | WUNTRACED));
994 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
995 if (args->options & __WCLONE)
996 tmp.options |= WLINUXCLONE;
997 tmp.rusage = NULL;
998
999 if ((error = wait4(p, &tmp)) != 0)
1000 return error;
1001
1002 if (args->status) {
1003 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1004 return error;
1005 tmpstat &= 0xffff;
1006 if (WIFSIGNALED(tmpstat))
1007 tmpstat = (tmpstat & 0xffffff80) |
1008 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1009 else if (WIFSTOPPED(tmpstat))
1010 tmpstat = (tmpstat & 0xffff00ff) |
1011 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1012 return copyout(&tmpstat, args->status, sizeof(int));
1013 } else
1014 return 0;
1015 }
1016
1017 int
1018 linux_wait4(struct proc *p, struct linux_wait4_args *args)
1019 {
1020 struct wait_args /* {
1021 int pid;
1022 int *status;
1023 int options;
1024 struct rusage *rusage;
1025 } */ tmp;
1026 int error, tmpstat;
1027
1028 #ifdef DEBUG
1029 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n",
1030 (long)p->p_pid, args->pid, (void *)args->status, args->options,
1031 (void *)args->rusage);
1032 #endif
1033 tmp.pid = args->pid;
1034 tmp.status = args->status;
1035 tmp.options = (args->options & (WNOHANG | WUNTRACED));
1036 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1037 if (args->options & __WCLONE)
1038 tmp.options |= WLINUXCLONE;
1039 tmp.rusage = args->rusage;
1040
1041 if ((error = wait4(p, &tmp)) != 0)
1042 return error;
1043
1044 p->p_siglist &= ~sigmask(SIGCHLD);
1045
1046 if (args->status) {
1047 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0)
1048 return error;
1049 tmpstat &= 0xffff;
1050 if (WIFSIGNALED(tmpstat))
1051 tmpstat = (tmpstat & 0xffffff80) |
1052 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
1053 else if (WIFSTOPPED(tmpstat))
1054 tmpstat = (tmpstat & 0xffff00ff) |
1055 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
1056 return copyout(&tmpstat, args->status, sizeof(int));
1057 } else
1058 return 0;
1059 }
1060
1061 int
1062 linux_mknod(struct proc *p, struct linux_mknod_args *args)
1063 {
1064 caddr_t sg;
1065 struct mknod_args bsd_mknod;
1066 struct mkfifo_args bsd_mkfifo;
1067
1068 sg = stackgap_init();
1069
1070 CHECKALTCREAT(p, &sg, args->path);
1071
1072 #ifdef DEBUG
1073 printf("Linux-emul(%d): mknod(%s, %d, %d)\n",
1074 p->p_pid, args->path, args->mode, args->dev);
1075 #endif
1076
1077 if (args->mode & S_IFIFO) {
1078 bsd_mkfifo.path = args->path;
1079 bsd_mkfifo.mode = args->mode;
1080 return mkfifo(p, &bsd_mkfifo);
1081 } else {
1082 bsd_mknod.path = args->path;
1083 bsd_mknod.mode = args->mode;
1084 bsd_mknod.dev = args->dev;
1085 return mknod(p, &bsd_mknod);
1086 }
1087 }
1088
1089 /*
1090 * UGH! This is just about the dumbest idea I've ever heard!!
1091 */
1092 int
1093 linux_personality(struct proc *p, struct linux_personality_args *args)
1094 {
1095 #ifdef DEBUG
1096 printf("Linux-emul(%d): personality(%d)\n",
1097 p->p_pid, args->per);
1098 #endif
1099 if (args->per != 0)
1100 return EINVAL;
1101
1102 /* Yes Jim, it's still a Linux... */
1103 p->p_retval[0] = 0;
1104 return 0;
1105 }
1106
1107 /*
1108 * Wrappers for get/setitimer for debugging..
1109 */
1110 int
1111 linux_setitimer(struct proc *p, struct linux_setitimer_args *args)
1112 {
1113 struct setitimer_args bsa;
1114 struct itimerval foo;
1115 int error;
1116
1117 #ifdef DEBUG
1118 printf("Linux-emul(%ld): setitimer(%p, %p)\n",
1119 (long)p->p_pid, (void *)args->itv, (void *)args->oitv);
1120 #endif
1121 bsa.which = args->which;
1122 bsa.itv = args->itv;
1123 bsa.oitv = args->oitv;
1124 if (args->itv) {
1125 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo,
1126 sizeof(foo))))
1127 return error;
1128 #ifdef DEBUG
1129 printf("setitimer: value: sec: %ld, usec: %ld\n",
1130 foo.it_value.tv_sec, foo.it_value.tv_usec);
1131 printf("setitimer: interval: sec: %ld, usec: %ld\n",
1132 foo.it_interval.tv_sec, foo.it_interval.tv_usec);
1133 #endif
1134 }
1135 return setitimer(p, &bsa);
1136 }
1137
1138 int
1139 linux_getitimer(struct proc *p, struct linux_getitimer_args *args)
1140 {
1141 struct getitimer_args bsa;
1142 #ifdef DEBUG
1143 printf("Linux-emul(%ld): getitimer(%p)\n",
1144 (long)p->p_pid, (void *)args->itv);
1145 #endif
1146 bsa.which = args->which;
1147 bsa.itv = args->itv;
1148 return getitimer(p, &bsa);
1149 }
1150
1151 int
1152 linux_iopl(struct proc *p, struct linux_iopl_args *args)
1153 {
1154 int error;
1155
1156 error = suser(p->p_ucred, &p->p_acflag);
1157 if (error != 0)
1158 return error;
1159 if (securelevel > 0)
1160 return EPERM;
1161 p->p_md.md_regs->tf_eflags |= PSL_IOPL;
1162 return 0;
1163 }
1164
1165 int
1166 linux_nice(struct proc *p, struct linux_nice_args *args)
1167 {
1168 struct setpriority_args bsd_args;
1169
1170 bsd_args.which = PRIO_PROCESS;
1171 bsd_args.who = 0; /* current process */
1172 bsd_args.prio = args->inc;
1173 return setpriority(p, &bsd_args);
1174 }
1175
1176 int
1177 linux_setgroups(p, uap)
1178 struct proc *p;
1179 struct linux_setgroups_args *uap;
1180 {
1181 struct pcred *pc;
1182 linux_gid_t linux_gidset[NGROUPS];
1183 gid_t *bsd_gidset;
1184 int ngrp, error;
1185
1186 pc = p->p_cred;
1187 ngrp = uap->gidsetsize;
1188
1189 /*
1190 * cr_groups[0] holds egid. Setting the whole set from
1191 * the supplied set will cause egid to be changed too.
1192 * Keep cr_groups[0] unchanged to prevent that.
1193 */
1194
1195 if ((error = suser(pc->pc_ucred, &p->p_acflag)))
1196 return (error);
1197
1198 if (ngrp >= NGROUPS)
1199 return (EINVAL);
1200
1201 pc->pc_ucred = crcopy(pc->pc_ucred);
1202 if (ngrp > 0) {
1203 error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset,
1204 ngrp * sizeof(linux_gid_t));
1205 if (error)
1206 return (error);
1207
1208 pc->pc_ucred->cr_ngroups = ngrp + 1;
1209
1210 bsd_gidset = pc->pc_ucred->cr_groups;
1211 ngrp--;
1212 while (ngrp >= 0) {
1213 bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1214 ngrp--;
1215 }
1216 }
1217 else
1218 pc->pc_ucred->cr_ngroups = 1;
1219
1220 setsugid(p);
1221 return (0);
1222 }
1223
1224 int
1225 linux_getgroups(p, uap)
1226 struct proc *p;
1227 struct linux_getgroups_args *uap;
1228 {
1229 struct pcred *pc;
1230 linux_gid_t linux_gidset[NGROUPS];
1231 gid_t *bsd_gidset;
1232 int bsd_gidsetsz, ngrp, error;
1233
1234 pc = p->p_cred;
1235 bsd_gidset = pc->pc_ucred->cr_groups;
1236 bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1;
1237
1238 /*
1239 * cr_groups[0] holds egid. Returning the whole set
1240 * here will cause a duplicate. Exclude cr_groups[0]
1241 * to prevent that.
1242 */
1243
1244 if ((ngrp = uap->gidsetsize) == 0) {
1245 p->p_retval[0] = bsd_gidsetsz;
1246 return (0);
1247 }
1248
1249 if (ngrp < bsd_gidsetsz)
1250 return (EINVAL);
1251
1252 ngrp = 0;
1253 while (ngrp < bsd_gidsetsz) {
1254 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1255 ngrp++;
1256 }
1257
1258 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset,
1259 ngrp * sizeof(linux_gid_t))))
1260 return (error);
1261
1262 p->p_retval[0] = ngrp;
1263 return (0);
1264 }
1265
1266 int
1267 linux_setrlimit(p, uap)
1268 struct proc *p;
1269 struct linux_setrlimit_args *uap;
1270 {
1271 struct osetrlimit_args bsd;
1272
1273 #ifdef DEBUG
1274 printf("Linux-emul(%ld): setrlimit(%d, %p)\n", (long)p->p_pid,
1275 uap->resource, (void *)uap->rlim);
1276 #endif
1277
1278 if (uap->resource >= LINUX_RLIM_NLIMITS)
1279 return EINVAL;
1280
1281 bsd.which = linux_to_bsd_resource[uap->resource];
1282
1283 if (bsd.which == -1)
1284 return EINVAL;
1285
1286 bsd.rlp = uap->rlim;
1287 return osetrlimit(p, &bsd);
1288 }
1289
1290 int
1291 linux_getrlimit(p, uap)
1292 struct proc *p;
1293 struct linux_getrlimit_args *uap;
1294 {
1295 struct ogetrlimit_args bsd;
1296
1297 #ifdef DEBUG
1298 printf("Linux-emul(%ld): getrlimit(%d, %p)\n", (long)p->p_pid,
1299 uap->resource, (void *)uap->rlim);
1300 #endif
1301
1302 if (uap->resource >= LINUX_RLIM_NLIMITS)
1303 return EINVAL;
1304
1305 bsd.which = linux_to_bsd_resource[uap->resource];
1306
1307 if (bsd.which == -1)
1308 return EINVAL;
1309
1310 bsd.rlp = uap->rlim;
1311 return ogetrlimit(p, &bsd);
1312 }
1313
1314 int
1315 linux_sched_setscheduler(p, uap)
1316 struct proc *p;
1317 struct linux_sched_setscheduler_args *uap;
1318 {
1319 struct sched_setscheduler_args bsd;
1320
1321 #ifdef DEBUG
1322 printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n",
1323 (long)p->p_pid, uap->pid, uap->policy, (void *)uap->param);
1324 #endif
1325
1326 switch (uap->policy) {
1327 case LINUX_SCHED_OTHER:
1328 bsd.policy = SCHED_OTHER;
1329 break;
1330 case LINUX_SCHED_FIFO:
1331 bsd.policy = SCHED_FIFO;
1332 break;
1333 case LINUX_SCHED_RR:
1334 bsd.policy = SCHED_RR;
1335 break;
1336 default:
1337 return EINVAL;
1338 }
1339
1340 bsd.pid = uap->pid;
1341 bsd.param = uap->param;
1342 return sched_setscheduler(p, &bsd);
1343 }
1344
1345 int
1346 linux_sched_getscheduler(p, uap)
1347 struct proc *p;
1348 struct linux_sched_getscheduler_args *uap;
1349 {
1350 struct sched_getscheduler_args bsd;
1351 int error;
1352
1353 #ifdef DEBUG
1354 printf("Linux-emul(%ld): sched_getscheduler(%d)\n",
1355 (long)p->p_pid, uap->pid);
1356 #endif
1357
1358
1359 bsd.pid = uap->pid;
1360 error = sched_getscheduler(p, &bsd);
1361
1362 switch (p->p_retval[0]) {
1363 case SCHED_OTHER:
1364 p->p_retval[0] = LINUX_SCHED_OTHER;
1365 break;
1366 case SCHED_FIFO:
1367 p->p_retval[0] = LINUX_SCHED_FIFO;
1368 break;
1369 case SCHED_RR:
1370 p->p_retval[0] = LINUX_SCHED_RR;
1371 break;
1372 }
1373
1374 return error;
1375 }
Cache object: 15b435efbf33cd144bfb92c2f5923a9a
|