1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/systm.h>
37 #include <sys/file.h>
38 #include <sys/fcntl.h>
39 #include <sys/clock.h>
40 #include <sys/imgact.h>
41 #include <sys/limits.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mman.h>
45 #include <sys/mutex.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/resource.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sched.h>
51 #include <sys/syscallsubr.h>
52 #include <sys/sysproto.h>
53 #include <sys/unistd.h>
54 #include <sys/wait.h>
55
56 #include <machine/frame.h>
57 #include <machine/pcb.h>
58 #include <machine/psl.h>
59 #include <machine/segments.h>
60 #include <machine/specialreg.h>
61
62 #include <vm/vm.h>
63 #include <vm/pmap.h>
64 #include <vm/vm_map.h>
65
66 #include <compat/freebsd32/freebsd32_util.h>
67 #include <amd64/linux32/linux.h>
68 #include <amd64/linux32/linux32_proto.h>
69 #include <compat/linux/linux_ipc.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
73 #include <compat/linux/linux_emul.h>
74
75 struct l_old_select_argv {
76 l_int nfds;
77 l_uintptr_t readfds;
78 l_uintptr_t writefds;
79 l_uintptr_t exceptfds;
80 l_uintptr_t timeout;
81 } __packed;
82
83 int
84 linux_to_bsd_sigaltstack(int lsa)
85 {
86 int bsa = 0;
87
88 if (lsa & LINUX_SS_DISABLE)
89 bsa |= SS_DISABLE;
90 if (lsa & LINUX_SS_ONSTACK)
91 bsa |= SS_ONSTACK;
92 return (bsa);
93 }
94
95 static int linux_mmap_common(struct thread *td, l_uintptr_t addr,
96 l_size_t len, l_int prot, l_int flags, l_int fd,
97 l_loff_t pos);
98
99 int
100 bsd_to_linux_sigaltstack(int bsa)
101 {
102 int lsa = 0;
103
104 if (bsa & SS_DISABLE)
105 lsa |= LINUX_SS_DISABLE;
106 if (bsa & SS_ONSTACK)
107 lsa |= LINUX_SS_ONSTACK;
108 return (lsa);
109 }
110
111 static void
112 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
113 {
114
115 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;
116 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;
117 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;
118 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;
119 lru->ru_maxrss = ru->ru_maxrss;
120 lru->ru_ixrss = ru->ru_ixrss;
121 lru->ru_idrss = ru->ru_idrss;
122 lru->ru_isrss = ru->ru_isrss;
123 lru->ru_minflt = ru->ru_minflt;
124 lru->ru_majflt = ru->ru_majflt;
125 lru->ru_nswap = ru->ru_nswap;
126 lru->ru_inblock = ru->ru_inblock;
127 lru->ru_oublock = ru->ru_oublock;
128 lru->ru_msgsnd = ru->ru_msgsnd;
129 lru->ru_msgrcv = ru->ru_msgrcv;
130 lru->ru_nsignals = ru->ru_nsignals;
131 lru->ru_nvcsw = ru->ru_nvcsw;
132 lru->ru_nivcsw = ru->ru_nivcsw;
133 }
134
135 int
136 linux_execve(struct thread *td, struct linux_execve_args *args)
137 {
138 struct image_args eargs;
139 char *path;
140 int error;
141
142 LCONVPATHEXIST(td, args->path, &path);
143
144 #ifdef DEBUG
145 if (ldebug(execve))
146 printf(ARGS(execve, "%s"), path);
147 #endif
148
149 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE,
150 args->argp, args->envp);
151 free(path, M_TEMP);
152 if (error == 0)
153 error = kern_execve(td, &eargs, NULL);
154 if (error == 0)
155 /* Linux process can execute FreeBSD one, do not attempt
156 * to create emuldata for such process using
157 * linux_proc_init, this leads to a panic on KASSERT
158 * because such process has p->p_emuldata == NULL.
159 */
160 if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX)
161 error = linux_proc_init(td, 0, 0);
162 return (error);
163 }
164
165 CTASSERT(sizeof(struct l_iovec32) == 8);
166
167 static int
168 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop)
169 {
170 struct l_iovec32 iov32;
171 struct iovec *iov;
172 struct uio *uio;
173 uint32_t iovlen;
174 int error, i;
175
176 *uiop = NULL;
177 if (iovcnt > UIO_MAXIOV)
178 return (EINVAL);
179 iovlen = iovcnt * sizeof(struct iovec);
180 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK);
181 iov = (struct iovec *)(uio + 1);
182 for (i = 0; i < iovcnt; i++) {
183 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32));
184 if (error) {
185 free(uio, M_IOV);
186 return (error);
187 }
188 iov[i].iov_base = PTRIN(iov32.iov_base);
189 iov[i].iov_len = iov32.iov_len;
190 }
191 uio->uio_iov = iov;
192 uio->uio_iovcnt = iovcnt;
193 uio->uio_segflg = UIO_USERSPACE;
194 uio->uio_offset = -1;
195 uio->uio_resid = 0;
196 for (i = 0; i < iovcnt; i++) {
197 if (iov->iov_len > INT_MAX - uio->uio_resid) {
198 free(uio, M_IOV);
199 return (EINVAL);
200 }
201 uio->uio_resid += iov->iov_len;
202 iov++;
203 }
204 *uiop = uio;
205 return (0);
206 }
207
208 int
209 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp,
210 int error)
211 {
212 struct l_iovec32 iov32;
213 struct iovec *iov;
214 uint32_t iovlen;
215 int i;
216
217 *iovp = NULL;
218 if (iovcnt > UIO_MAXIOV)
219 return (error);
220 iovlen = iovcnt * sizeof(struct iovec);
221 iov = malloc(iovlen, M_IOV, M_WAITOK);
222 for (i = 0; i < iovcnt; i++) {
223 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32));
224 if (error) {
225 free(iov, M_IOV);
226 return (error);
227 }
228 iov[i].iov_base = PTRIN(iov32.iov_base);
229 iov[i].iov_len = iov32.iov_len;
230 }
231 *iovp = iov;
232 return(0);
233
234 }
235
236 int
237 linux_readv(struct thread *td, struct linux_readv_args *uap)
238 {
239 struct uio *auio;
240 int error;
241
242 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
243 if (error)
244 return (error);
245 error = kern_readv(td, uap->fd, auio);
246 free(auio, M_IOV);
247 return (error);
248 }
249
250 int
251 linux_writev(struct thread *td, struct linux_writev_args *uap)
252 {
253 struct uio *auio;
254 int error;
255
256 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
257 if (error)
258 return (error);
259 error = kern_writev(td, uap->fd, auio);
260 free(auio, M_IOV);
261 return (error);
262 }
263
264 struct l_ipc_kludge {
265 l_uintptr_t msgp;
266 l_long msgtyp;
267 } __packed;
268
269 int
270 linux_ipc(struct thread *td, struct linux_ipc_args *args)
271 {
272
273 switch (args->what & 0xFFFF) {
274 case LINUX_SEMOP: {
275 struct linux_semop_args a;
276
277 a.semid = args->arg1;
278 a.tsops = args->ptr;
279 a.nsops = args->arg2;
280 return (linux_semop(td, &a));
281 }
282 case LINUX_SEMGET: {
283 struct linux_semget_args a;
284
285 a.key = args->arg1;
286 a.nsems = args->arg2;
287 a.semflg = args->arg3;
288 return (linux_semget(td, &a));
289 }
290 case LINUX_SEMCTL: {
291 struct linux_semctl_args a;
292 int error;
293
294 a.semid = args->arg1;
295 a.semnum = args->arg2;
296 a.cmd = args->arg3;
297 error = copyin(args->ptr, &a.arg, sizeof(a.arg));
298 if (error)
299 return (error);
300 return (linux_semctl(td, &a));
301 }
302 case LINUX_MSGSND: {
303 struct linux_msgsnd_args a;
304
305 a.msqid = args->arg1;
306 a.msgp = args->ptr;
307 a.msgsz = args->arg2;
308 a.msgflg = args->arg3;
309 return (linux_msgsnd(td, &a));
310 }
311 case LINUX_MSGRCV: {
312 struct linux_msgrcv_args a;
313
314 a.msqid = args->arg1;
315 a.msgsz = args->arg2;
316 a.msgflg = args->arg3;
317 if ((args->what >> 16) == 0) {
318 struct l_ipc_kludge tmp;
319 int error;
320
321 if (args->ptr == 0)
322 return (EINVAL);
323 error = copyin(args->ptr, &tmp, sizeof(tmp));
324 if (error)
325 return (error);
326 a.msgp = PTRIN(tmp.msgp);
327 a.msgtyp = tmp.msgtyp;
328 } else {
329 a.msgp = args->ptr;
330 a.msgtyp = args->arg5;
331 }
332 return (linux_msgrcv(td, &a));
333 }
334 case LINUX_MSGGET: {
335 struct linux_msgget_args a;
336
337 a.key = args->arg1;
338 a.msgflg = args->arg2;
339 return (linux_msgget(td, &a));
340 }
341 case LINUX_MSGCTL: {
342 struct linux_msgctl_args a;
343
344 a.msqid = args->arg1;
345 a.cmd = args->arg2;
346 a.buf = args->ptr;
347 return (linux_msgctl(td, &a));
348 }
349 case LINUX_SHMAT: {
350 struct linux_shmat_args a;
351
352 a.shmid = args->arg1;
353 a.shmaddr = args->ptr;
354 a.shmflg = args->arg2;
355 a.raddr = PTRIN((l_uint)args->arg3);
356 return (linux_shmat(td, &a));
357 }
358 case LINUX_SHMDT: {
359 struct linux_shmdt_args a;
360
361 a.shmaddr = args->ptr;
362 return (linux_shmdt(td, &a));
363 }
364 case LINUX_SHMGET: {
365 struct linux_shmget_args a;
366
367 a.key = args->arg1;
368 a.size = args->arg2;
369 a.shmflg = args->arg3;
370 return (linux_shmget(td, &a));
371 }
372 case LINUX_SHMCTL: {
373 struct linux_shmctl_args a;
374
375 a.shmid = args->arg1;
376 a.cmd = args->arg2;
377 a.buf = args->ptr;
378 return (linux_shmctl(td, &a));
379 }
380 default:
381 break;
382 }
383
384 return (EINVAL);
385 }
386
387 int
388 linux_old_select(struct thread *td, struct linux_old_select_args *args)
389 {
390 struct l_old_select_argv linux_args;
391 struct linux_select_args newsel;
392 int error;
393
394 #ifdef DEBUG
395 if (ldebug(old_select))
396 printf(ARGS(old_select, "%p"), args->ptr);
397 #endif
398
399 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
400 if (error)
401 return (error);
402
403 newsel.nfds = linux_args.nfds;
404 newsel.readfds = PTRIN(linux_args.readfds);
405 newsel.writefds = PTRIN(linux_args.writefds);
406 newsel.exceptfds = PTRIN(linux_args.exceptfds);
407 newsel.timeout = PTRIN(linux_args.timeout);
408 return (linux_select(td, &newsel));
409 }
410
411 int
412 linux_set_cloned_tls(struct thread *td, void *desc)
413 {
414 struct user_segment_descriptor sd;
415 struct l_user_desc info;
416 struct pcb *pcb;
417 int error;
418 int a[2];
419
420 error = copyin(desc, &info, sizeof(struct l_user_desc));
421 if (error) {
422 printf(LMSG("copyin failed!"));
423 } else {
424 /* We might copy out the entry_number as GUGS32_SEL. */
425 info.entry_number = GUGS32_SEL;
426 error = copyout(&info, desc, sizeof(struct l_user_desc));
427 if (error)
428 printf(LMSG("copyout failed!"));
429
430 a[0] = LINUX_LDT_entry_a(&info);
431 a[1] = LINUX_LDT_entry_b(&info);
432
433 memcpy(&sd, &a, sizeof(a));
434 #ifdef DEBUG
435 if (ldebug(clone))
436 printf("Segment created in clone with "
437 "CLONE_SETTLS: lobase: %x, hibase: %x, "
438 "lolimit: %x, hilimit: %x, type: %i, "
439 "dpl: %i, p: %i, xx: %i, long: %i, "
440 "def32: %i, gran: %i\n", sd.sd_lobase,
441 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit,
442 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
443 sd.sd_long, sd.sd_def32, sd.sd_gran);
444 #endif
445 pcb = td->td_pcb;
446 pcb->pcb_gsbase = (register_t)info.base_addr;
447 /* XXXKIB pcb->pcb_gs32sd = sd; */
448 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
449 set_pcb_flags(pcb, PCB_GS32BIT | PCB_32BIT);
450 }
451
452 return (error);
453 }
454
455 int
456 linux_set_upcall_kse(struct thread *td, register_t stack)
457 {
458
459 td->td_frame->tf_rsp = stack;
460
461 return (0);
462 }
463
464 #define STACK_SIZE (2 * 1024 * 1024)
465 #define GUARD_SIZE (4 * PAGE_SIZE)
466
467 int
468 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
469 {
470
471 #ifdef DEBUG
472 if (ldebug(mmap2))
473 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"),
474 args->addr, args->len, args->prot,
475 args->flags, args->fd, args->pgoff);
476 #endif
477
478 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
479 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff *
480 PAGE_SIZE));
481 }
482
483 int
484 linux_mmap(struct thread *td, struct linux_mmap_args *args)
485 {
486 int error;
487 struct l_mmap_argv linux_args;
488
489 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
490 if (error)
491 return (error);
492
493 #ifdef DEBUG
494 if (ldebug(mmap))
495 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"),
496 linux_args.addr, linux_args.len, linux_args.prot,
497 linux_args.flags, linux_args.fd, linux_args.pgoff);
498 #endif
499
500 return (linux_mmap_common(td, linux_args.addr, linux_args.len,
501 linux_args.prot, linux_args.flags, linux_args.fd,
502 (uint32_t)linux_args.pgoff));
503 }
504
505 static int
506 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
507 l_int flags, l_int fd, l_loff_t pos)
508 {
509 struct proc *p = td->td_proc;
510 struct mmap_args /* {
511 caddr_t addr;
512 size_t len;
513 int prot;
514 int flags;
515 int fd;
516 long pad;
517 off_t pos;
518 } */ bsd_args;
519 int error;
520 struct file *fp;
521
522 error = 0;
523 bsd_args.flags = 0;
524 fp = NULL;
525
526 /*
527 * Linux mmap(2):
528 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
529 */
530 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
531 return (EINVAL);
532
533 if (flags & LINUX_MAP_SHARED)
534 bsd_args.flags |= MAP_SHARED;
535 if (flags & LINUX_MAP_PRIVATE)
536 bsd_args.flags |= MAP_PRIVATE;
537 if (flags & LINUX_MAP_FIXED)
538 bsd_args.flags |= MAP_FIXED;
539 if (flags & LINUX_MAP_ANON) {
540 /* Enforce pos to be on page boundary, then ignore. */
541 if ((pos & PAGE_MASK) != 0)
542 return (EINVAL);
543 pos = 0;
544 bsd_args.flags |= MAP_ANON;
545 } else
546 bsd_args.flags |= MAP_NOSYNC;
547 if (flags & LINUX_MAP_GROWSDOWN)
548 bsd_args.flags |= MAP_STACK;
549
550 /*
551 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
552 * on Linux/i386. We do this to ensure maximum compatibility.
553 * Linux/ia64 does the same in i386 emulation mode.
554 */
555 bsd_args.prot = prot;
556 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
557 bsd_args.prot |= PROT_READ | PROT_EXEC;
558
559 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
560 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
561 if (bsd_args.fd != -1) {
562 /*
563 * Linux follows Solaris mmap(2) description:
564 * The file descriptor fildes is opened with
565 * read permission, regardless of the
566 * protection options specified.
567 */
568
569 if ((error = fget(td, bsd_args.fd, &fp)) != 0)
570 return (error);
571 if (fp->f_type != DTYPE_VNODE) {
572 fdrop(fp, td);
573 return (EINVAL);
574 }
575
576 /* Linux mmap() just fails for O_WRONLY files */
577 if (!(fp->f_flag & FREAD)) {
578 fdrop(fp, td);
579 return (EACCES);
580 }
581
582 fdrop(fp, td);
583 }
584
585 if (flags & LINUX_MAP_GROWSDOWN) {
586 /*
587 * The Linux MAP_GROWSDOWN option does not limit auto
588 * growth of the region. Linux mmap with this option
589 * takes as addr the inital BOS, and as len, the initial
590 * region size. It can then grow down from addr without
591 * limit. However, Linux threads has an implicit internal
592 * limit to stack size of STACK_SIZE. Its just not
593 * enforced explicitly in Linux. But, here we impose
594 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
595 * region, since we can do this with our mmap.
596 *
597 * Our mmap with MAP_STACK takes addr as the maximum
598 * downsize limit on BOS, and as len the max size of
599 * the region. It then maps the top SGROWSIZ bytes,
600 * and auto grows the region down, up to the limit
601 * in addr.
602 *
603 * If we don't use the MAP_STACK option, the effect
604 * of this code is to allocate a stack region of a
605 * fixed size of (STACK_SIZE - GUARD_SIZE).
606 */
607
608 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
609 /*
610 * Some Linux apps will attempt to mmap
611 * thread stacks near the top of their
612 * address space. If their TOS is greater
613 * than vm_maxsaddr, vm_map_growstack()
614 * will confuse the thread stack with the
615 * process stack and deliver a SEGV if they
616 * attempt to grow the thread stack past their
617 * current stacksize rlimit. To avoid this,
618 * adjust vm_maxsaddr upwards to reflect
619 * the current stacksize rlimit rather
620 * than the maximum possible stacksize.
621 * It would be better to adjust the
622 * mmap'ed region, but some apps do not check
623 * mmap's return value.
624 */
625 PROC_LOCK(p);
626 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
627 lim_cur(p, RLIMIT_STACK);
628 PROC_UNLOCK(p);
629 }
630
631 /*
632 * This gives us our maximum stack size and a new BOS.
633 * If we're using VM_STACK, then mmap will just map
634 * the top SGROWSIZ bytes, and let the stack grow down
635 * to the limit at BOS. If we're not using VM_STACK
636 * we map the full stack, since we don't have a way
637 * to autogrow it.
638 */
639 if (len > STACK_SIZE - GUARD_SIZE) {
640 bsd_args.addr = (caddr_t)PTRIN(addr);
641 bsd_args.len = len;
642 } else {
643 bsd_args.addr = (caddr_t)PTRIN(addr) -
644 (STACK_SIZE - GUARD_SIZE - len);
645 bsd_args.len = STACK_SIZE - GUARD_SIZE;
646 }
647 } else {
648 bsd_args.addr = (caddr_t)PTRIN(addr);
649 bsd_args.len = len;
650 }
651 bsd_args.pos = pos;
652
653 #ifdef DEBUG
654 if (ldebug(mmap))
655 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
656 __func__,
657 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
658 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
659 #endif
660 error = mmap(td, &bsd_args);
661 #ifdef DEBUG
662 if (ldebug(mmap))
663 printf("-> %s() return: 0x%x (0x%08x)\n",
664 __func__, error, (u_int)td->td_retval[0]);
665 #endif
666 return (error);
667 }
668
669 int
670 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
671 {
672 struct mprotect_args bsd_args;
673
674 bsd_args.addr = uap->addr;
675 bsd_args.len = uap->len;
676 bsd_args.prot = uap->prot;
677 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
678 bsd_args.prot |= PROT_READ | PROT_EXEC;
679 return (mprotect(td, &bsd_args));
680 }
681
682 int
683 linux_iopl(struct thread *td, struct linux_iopl_args *args)
684 {
685 int error;
686
687 if (args->level < 0 || args->level > 3)
688 return (EINVAL);
689 if ((error = priv_check(td, PRIV_IO)) != 0)
690 return (error);
691 if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
692 return (error);
693 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
694 (args->level * (PSL_IOPL / 3));
695
696 return (0);
697 }
698
699 int
700 linux_pipe(struct thread *td, struct linux_pipe_args *args)
701 {
702 int error;
703 int fildes[2];
704
705 #ifdef DEBUG
706 if (ldebug(pipe))
707 printf(ARGS(pipe, "*"));
708 #endif
709
710 error = kern_pipe(td, fildes);
711 if (error)
712 return (error);
713
714 /* XXX: Close descriptors on error. */
715 return (copyout(fildes, args->pipefds, sizeof fildes));
716 }
717
718 int
719 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
720 {
721 l_osigaction_t osa;
722 l_sigaction_t act, oact;
723 int error;
724
725 #ifdef DEBUG
726 if (ldebug(sigaction))
727 printf(ARGS(sigaction, "%d, %p, %p"),
728 args->sig, (void *)args->nsa, (void *)args->osa);
729 #endif
730
731 if (args->nsa != NULL) {
732 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
733 if (error)
734 return (error);
735 act.lsa_handler = osa.lsa_handler;
736 act.lsa_flags = osa.lsa_flags;
737 act.lsa_restorer = osa.lsa_restorer;
738 LINUX_SIGEMPTYSET(act.lsa_mask);
739 act.lsa_mask.__bits[0] = osa.lsa_mask;
740 }
741
742 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
743 args->osa ? &oact : NULL);
744
745 if (args->osa != NULL && !error) {
746 osa.lsa_handler = oact.lsa_handler;
747 osa.lsa_flags = oact.lsa_flags;
748 osa.lsa_restorer = oact.lsa_restorer;
749 osa.lsa_mask = oact.lsa_mask.__bits[0];
750 error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
751 }
752
753 return (error);
754 }
755
756 /*
757 * Linux has two extra args, restart and oldmask. We don't use these,
758 * but it seems that "restart" is actually a context pointer that
759 * enables the signal to happen with a different register set.
760 */
761 int
762 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
763 {
764 sigset_t sigmask;
765 l_sigset_t mask;
766
767 #ifdef DEBUG
768 if (ldebug(sigsuspend))
769 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
770 #endif
771
772 LINUX_SIGEMPTYSET(mask);
773 mask.__bits[0] = args->mask;
774 linux_to_bsd_sigset(&mask, &sigmask);
775 return (kern_sigsuspend(td, sigmask));
776 }
777
778 int
779 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
780 {
781 l_sigset_t lmask;
782 sigset_t sigmask;
783 int error;
784
785 #ifdef DEBUG
786 if (ldebug(rt_sigsuspend))
787 printf(ARGS(rt_sigsuspend, "%p, %d"),
788 (void *)uap->newset, uap->sigsetsize);
789 #endif
790
791 if (uap->sigsetsize != sizeof(l_sigset_t))
792 return (EINVAL);
793
794 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
795 if (error)
796 return (error);
797
798 linux_to_bsd_sigset(&lmask, &sigmask);
799 return (kern_sigsuspend(td, sigmask));
800 }
801
802 int
803 linux_pause(struct thread *td, struct linux_pause_args *args)
804 {
805 struct proc *p = td->td_proc;
806 sigset_t sigmask;
807
808 #ifdef DEBUG
809 if (ldebug(pause))
810 printf(ARGS(pause, ""));
811 #endif
812
813 PROC_LOCK(p);
814 sigmask = td->td_sigmask;
815 PROC_UNLOCK(p);
816 return (kern_sigsuspend(td, sigmask));
817 }
818
819 int
820 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
821 {
822 stack_t ss, oss;
823 l_stack_t lss;
824 int error;
825
826 #ifdef DEBUG
827 if (ldebug(sigaltstack))
828 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
829 #endif
830
831 if (uap->uss != NULL) {
832 error = copyin(uap->uss, &lss, sizeof(l_stack_t));
833 if (error)
834 return (error);
835
836 ss.ss_sp = PTRIN(lss.ss_sp);
837 ss.ss_size = lss.ss_size;
838 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
839 }
840 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
841 (uap->uoss != NULL) ? &oss : NULL);
842 if (!error && uap->uoss != NULL) {
843 lss.ss_sp = PTROUT(oss.ss_sp);
844 lss.ss_size = oss.ss_size;
845 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
846 error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
847 }
848
849 return (error);
850 }
851
852 int
853 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
854 {
855 struct ftruncate_args sa;
856
857 #ifdef DEBUG
858 if (ldebug(ftruncate64))
859 printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
860 (intmax_t)args->length);
861 #endif
862
863 sa.fd = args->fd;
864 sa.length = args->length;
865 return ftruncate(td, &sa);
866 }
867
868 int
869 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
870 {
871 struct timeval atv;
872 l_timeval atv32;
873 struct timezone rtz;
874 int error = 0;
875
876 if (uap->tp) {
877 microtime(&atv);
878 atv32.tv_sec = atv.tv_sec;
879 atv32.tv_usec = atv.tv_usec;
880 error = copyout(&atv32, uap->tp, sizeof(atv32));
881 }
882 if (error == 0 && uap->tzp != NULL) {
883 rtz.tz_minuteswest = tz_minuteswest;
884 rtz.tz_dsttime = tz_dsttime;
885 error = copyout(&rtz, uap->tzp, sizeof(rtz));
886 }
887 return (error);
888 }
889
890 int
891 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
892 {
893 l_timeval atv32;
894 struct timeval atv, *tvp;
895 struct timezone atz, *tzp;
896 int error;
897
898 if (uap->tp) {
899 error = copyin(uap->tp, &atv32, sizeof(atv32));
900 if (error)
901 return (error);
902 atv.tv_sec = atv32.tv_sec;
903 atv.tv_usec = atv32.tv_usec;
904 tvp = &atv;
905 } else
906 tvp = NULL;
907 if (uap->tzp) {
908 error = copyin(uap->tzp, &atz, sizeof(atz));
909 if (error)
910 return (error);
911 tzp = &atz;
912 } else
913 tzp = NULL;
914 return (kern_settimeofday(td, tvp, tzp));
915 }
916
917 int
918 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
919 {
920 struct l_rusage s32;
921 struct rusage s;
922 int error;
923
924 error = kern_getrusage(td, uap->who, &s);
925 if (error != 0)
926 return (error);
927 if (uap->rusage != NULL) {
928 bsd_to_linux_rusage(&s, &s32);
929 error = copyout(&s32, uap->rusage, sizeof(s32));
930 }
931 return (error);
932 }
933
934 int
935 linux_sched_rr_get_interval(struct thread *td,
936 struct linux_sched_rr_get_interval_args *uap)
937 {
938 struct timespec ts;
939 struct l_timespec ts32;
940 int error;
941
942 error = kern_sched_rr_get_interval(td, uap->pid, &ts);
943 if (error != 0)
944 return (error);
945 ts32.tv_sec = ts.tv_sec;
946 ts32.tv_nsec = ts.tv_nsec;
947 return (copyout(&ts32, uap->interval, sizeof(ts32)));
948 }
949
950 int
951 linux_set_thread_area(struct thread *td,
952 struct linux_set_thread_area_args *args)
953 {
954 struct l_user_desc info;
955 struct user_segment_descriptor sd;
956 struct pcb *pcb;
957 int a[2];
958 int error;
959
960 error = copyin(args->desc, &info, sizeof(struct l_user_desc));
961 if (error)
962 return (error);
963
964 #ifdef DEBUG
965 if (ldebug(set_thread_area))
966 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, "
967 "%i, %i, %i"), info.entry_number, info.base_addr,
968 info.limit, info.seg_32bit, info.contents,
969 info.read_exec_only, info.limit_in_pages,
970 info.seg_not_present, info.useable);
971 #endif
972
973 /*
974 * Semantics of Linux version: every thread in the system has array
975 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
976 * This syscall loads one of the selected TLS decriptors with a value
977 * and also loads GDT descriptors 6, 7 and 8 with the content of
978 * the per-thread descriptors.
979 *
980 * Semantics of FreeBSD version: I think we can ignore that Linux has
981 * three per-thread descriptors and use just the first one.
982 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
983 * for loading the GDT descriptors. We use just one GDT descriptor
984 * for TLS, so we will load just one.
985 *
986 * XXX: This doesn't work when a user space process tries to use more
987 * than one TLS segment. Comment in the Linux source says wine might
988 * do this.
989 */
990
991 /*
992 * GLIBC reads current %gs and call set_thread_area() with it.
993 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
994 * we use these segments.
995 */
996 switch (info.entry_number) {
997 case GUGS32_SEL:
998 case GUDATA_SEL:
999 case 6:
1000 case -1:
1001 info.entry_number = GUGS32_SEL;
1002 break;
1003 default:
1004 return (EINVAL);
1005 }
1006
1007 /*
1008 * We have to copy out the GDT entry we use.
1009 *
1010 * XXX: What if a user space program does not check the return value
1011 * and tries to use 6, 7 or 8?
1012 */
1013 error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1014 if (error)
1015 return (error);
1016
1017 if (LINUX_LDT_empty(&info)) {
1018 a[0] = 0;
1019 a[1] = 0;
1020 } else {
1021 a[0] = LINUX_LDT_entry_a(&info);
1022 a[1] = LINUX_LDT_entry_b(&info);
1023 }
1024
1025 memcpy(&sd, &a, sizeof(a));
1026 #ifdef DEBUG
1027 if (ldebug(set_thread_area))
1028 printf("Segment created in set_thread_area: "
1029 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, "
1030 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, "
1031 "def32: %i, gran: %i\n",
1032 sd.sd_lobase,
1033 sd.sd_hibase,
1034 sd.sd_lolimit,
1035 sd.sd_hilimit,
1036 sd.sd_type,
1037 sd.sd_dpl,
1038 sd.sd_p,
1039 sd.sd_xx,
1040 sd.sd_long,
1041 sd.sd_def32,
1042 sd.sd_gran);
1043 #endif
1044
1045 pcb = td->td_pcb;
1046 pcb->pcb_gsbase = (register_t)info.base_addr;
1047 set_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT);
1048 update_gdt_gsbase(td, info.base_addr);
1049
1050 return (0);
1051 }
1052
1053 int
1054 linux_wait4(struct thread *td, struct linux_wait4_args *args)
1055 {
1056 int error, options;
1057 struct rusage ru, *rup;
1058 struct l_rusage lru;
1059
1060 #ifdef DEBUG
1061 if (ldebug(wait4))
1062 printf(ARGS(wait4, "%d, %p, %d, %p"),
1063 args->pid, (void *)args->status, args->options,
1064 (void *)args->rusage);
1065 #endif
1066
1067 options = (args->options & (WNOHANG | WUNTRACED));
1068 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1069 if (args->options & __WCLONE)
1070 options |= WLINUXCLONE;
1071
1072 if (args->rusage != NULL)
1073 rup = &ru;
1074 else
1075 rup = NULL;
1076 error = linux_common_wait(td, args->pid, args->status, options, rup);
1077 if (error)
1078 return (error);
1079 if (args->rusage != NULL) {
1080 bsd_to_linux_rusage(rup, &lru);
1081 error = copyout(&lru, args->rusage, sizeof(lru));
1082 }
1083
1084 return (error);
1085 }
Cache object: 787465a30bfc29533895a47661153800
|