1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD: releng/10.2/sys/amd64/linux32/linux32_machdep.c 283359 2015-05-24 07:32:02Z kib $");
33
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/systm.h>
37 #include <sys/capsicum.h>
38 #include <sys/file.h>
39 #include <sys/fcntl.h>
40 #include <sys/clock.h>
41 #include <sys/imgact.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mman.h>
46 #include <sys/mutex.h>
47 #include <sys/priv.h>
48 #include <sys/proc.h>
49 #include <sys/resource.h>
50 #include <sys/resourcevar.h>
51 #include <sys/sched.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysproto.h>
54 #include <sys/unistd.h>
55 #include <sys/wait.h>
56
57 #include <machine/frame.h>
58 #include <machine/pcb.h>
59 #include <machine/psl.h>
60 #include <machine/segments.h>
61 #include <machine/specialreg.h>
62
63 #include <vm/vm.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_map.h>
66
67 #include <compat/freebsd32/freebsd32_util.h>
68 #include <amd64/linux32/linux.h>
69 #include <amd64/linux32/linux32_proto.h>
70 #include <compat/linux/linux_ipc.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 #include <compat/linux/linux_emul.h>
75
76 struct l_old_select_argv {
77 l_int nfds;
78 l_uintptr_t readfds;
79 l_uintptr_t writefds;
80 l_uintptr_t exceptfds;
81 l_uintptr_t timeout;
82 } __packed;
83
84 int
85 linux_to_bsd_sigaltstack(int lsa)
86 {
87 int bsa = 0;
88
89 if (lsa & LINUX_SS_DISABLE)
90 bsa |= SS_DISABLE;
91 if (lsa & LINUX_SS_ONSTACK)
92 bsa |= SS_ONSTACK;
93 return (bsa);
94 }
95
96 static int linux_mmap_common(struct thread *td, l_uintptr_t addr,
97 l_size_t len, l_int prot, l_int flags, l_int fd,
98 l_loff_t pos);
99
100 int
101 bsd_to_linux_sigaltstack(int bsa)
102 {
103 int lsa = 0;
104
105 if (bsa & SS_DISABLE)
106 lsa |= LINUX_SS_DISABLE;
107 if (bsa & SS_ONSTACK)
108 lsa |= LINUX_SS_ONSTACK;
109 return (lsa);
110 }
111
112 static void
113 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
114 {
115
116 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;
117 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;
118 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;
119 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;
120 lru->ru_maxrss = ru->ru_maxrss;
121 lru->ru_ixrss = ru->ru_ixrss;
122 lru->ru_idrss = ru->ru_idrss;
123 lru->ru_isrss = ru->ru_isrss;
124 lru->ru_minflt = ru->ru_minflt;
125 lru->ru_majflt = ru->ru_majflt;
126 lru->ru_nswap = ru->ru_nswap;
127 lru->ru_inblock = ru->ru_inblock;
128 lru->ru_oublock = ru->ru_oublock;
129 lru->ru_msgsnd = ru->ru_msgsnd;
130 lru->ru_msgrcv = ru->ru_msgrcv;
131 lru->ru_nsignals = ru->ru_nsignals;
132 lru->ru_nvcsw = ru->ru_nvcsw;
133 lru->ru_nivcsw = ru->ru_nivcsw;
134 }
135
136 int
137 linux_execve(struct thread *td, struct linux_execve_args *args)
138 {
139 struct image_args eargs;
140 struct vmspace *oldvmspace;
141 char *path;
142 int error;
143
144 LCONVPATHEXIST(td, args->path, &path);
145
146 #ifdef DEBUG
147 if (ldebug(execve))
148 printf(ARGS(execve, "%s"), path);
149 #endif
150
151 error = pre_execve(td, &oldvmspace);
152 if (error != 0) {
153 free(path, M_TEMP);
154 return (error);
155 }
156 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE,
157 args->argp, args->envp);
158 free(path, M_TEMP);
159 if (error == 0)
160 error = kern_execve(td, &eargs, NULL);
161 if (error == 0) {
162 /* Linux process can execute FreeBSD one, do not attempt
163 * to create emuldata for such process using
164 * linux_proc_init, this leads to a panic on KASSERT
165 * because such process has p->p_emuldata == NULL.
166 */
167 if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX)
168 error = linux_proc_init(td, 0, 0);
169 }
170 post_execve(td, error, oldvmspace);
171 return (error);
172 }
173
174 CTASSERT(sizeof(struct l_iovec32) == 8);
175
176 static int
177 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop)
178 {
179 struct l_iovec32 iov32;
180 struct iovec *iov;
181 struct uio *uio;
182 uint32_t iovlen;
183 int error, i;
184
185 *uiop = NULL;
186 if (iovcnt > UIO_MAXIOV)
187 return (EINVAL);
188 iovlen = iovcnt * sizeof(struct iovec);
189 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK);
190 iov = (struct iovec *)(uio + 1);
191 for (i = 0; i < iovcnt; i++) {
192 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32));
193 if (error) {
194 free(uio, M_IOV);
195 return (error);
196 }
197 iov[i].iov_base = PTRIN(iov32.iov_base);
198 iov[i].iov_len = iov32.iov_len;
199 }
200 uio->uio_iov = iov;
201 uio->uio_iovcnt = iovcnt;
202 uio->uio_segflg = UIO_USERSPACE;
203 uio->uio_offset = -1;
204 uio->uio_resid = 0;
205 for (i = 0; i < iovcnt; i++) {
206 if (iov->iov_len > INT_MAX - uio->uio_resid) {
207 free(uio, M_IOV);
208 return (EINVAL);
209 }
210 uio->uio_resid += iov->iov_len;
211 iov++;
212 }
213 *uiop = uio;
214 return (0);
215 }
216
217 int
218 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp,
219 int error)
220 {
221 struct l_iovec32 iov32;
222 struct iovec *iov;
223 uint32_t iovlen;
224 int i;
225
226 *iovp = NULL;
227 if (iovcnt > UIO_MAXIOV)
228 return (error);
229 iovlen = iovcnt * sizeof(struct iovec);
230 iov = malloc(iovlen, M_IOV, M_WAITOK);
231 for (i = 0; i < iovcnt; i++) {
232 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32));
233 if (error) {
234 free(iov, M_IOV);
235 return (error);
236 }
237 iov[i].iov_base = PTRIN(iov32.iov_base);
238 iov[i].iov_len = iov32.iov_len;
239 }
240 *iovp = iov;
241 return(0);
242
243 }
244
245 int
246 linux_readv(struct thread *td, struct linux_readv_args *uap)
247 {
248 struct uio *auio;
249 int error;
250
251 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
252 if (error)
253 return (error);
254 error = kern_readv(td, uap->fd, auio);
255 free(auio, M_IOV);
256 return (error);
257 }
258
259 int
260 linux_writev(struct thread *td, struct linux_writev_args *uap)
261 {
262 struct uio *auio;
263 int error;
264
265 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
266 if (error)
267 return (error);
268 error = kern_writev(td, uap->fd, auio);
269 free(auio, M_IOV);
270 return (error);
271 }
272
273 struct l_ipc_kludge {
274 l_uintptr_t msgp;
275 l_long msgtyp;
276 } __packed;
277
278 int
279 linux_ipc(struct thread *td, struct linux_ipc_args *args)
280 {
281
282 switch (args->what & 0xFFFF) {
283 case LINUX_SEMOP: {
284 struct linux_semop_args a;
285
286 a.semid = args->arg1;
287 a.tsops = args->ptr;
288 a.nsops = args->arg2;
289 return (linux_semop(td, &a));
290 }
291 case LINUX_SEMGET: {
292 struct linux_semget_args a;
293
294 a.key = args->arg1;
295 a.nsems = args->arg2;
296 a.semflg = args->arg3;
297 return (linux_semget(td, &a));
298 }
299 case LINUX_SEMCTL: {
300 struct linux_semctl_args a;
301 int error;
302
303 a.semid = args->arg1;
304 a.semnum = args->arg2;
305 a.cmd = args->arg3;
306 error = copyin(args->ptr, &a.arg, sizeof(a.arg));
307 if (error)
308 return (error);
309 return (linux_semctl(td, &a));
310 }
311 case LINUX_MSGSND: {
312 struct linux_msgsnd_args a;
313
314 a.msqid = args->arg1;
315 a.msgp = args->ptr;
316 a.msgsz = args->arg2;
317 a.msgflg = args->arg3;
318 return (linux_msgsnd(td, &a));
319 }
320 case LINUX_MSGRCV: {
321 struct linux_msgrcv_args a;
322
323 a.msqid = args->arg1;
324 a.msgsz = args->arg2;
325 a.msgflg = args->arg3;
326 if ((args->what >> 16) == 0) {
327 struct l_ipc_kludge tmp;
328 int error;
329
330 if (args->ptr == 0)
331 return (EINVAL);
332 error = copyin(args->ptr, &tmp, sizeof(tmp));
333 if (error)
334 return (error);
335 a.msgp = PTRIN(tmp.msgp);
336 a.msgtyp = tmp.msgtyp;
337 } else {
338 a.msgp = args->ptr;
339 a.msgtyp = args->arg5;
340 }
341 return (linux_msgrcv(td, &a));
342 }
343 case LINUX_MSGGET: {
344 struct linux_msgget_args a;
345
346 a.key = args->arg1;
347 a.msgflg = args->arg2;
348 return (linux_msgget(td, &a));
349 }
350 case LINUX_MSGCTL: {
351 struct linux_msgctl_args a;
352
353 a.msqid = args->arg1;
354 a.cmd = args->arg2;
355 a.buf = args->ptr;
356 return (linux_msgctl(td, &a));
357 }
358 case LINUX_SHMAT: {
359 struct linux_shmat_args a;
360
361 a.shmid = args->arg1;
362 a.shmaddr = args->ptr;
363 a.shmflg = args->arg2;
364 a.raddr = PTRIN((l_uint)args->arg3);
365 return (linux_shmat(td, &a));
366 }
367 case LINUX_SHMDT: {
368 struct linux_shmdt_args a;
369
370 a.shmaddr = args->ptr;
371 return (linux_shmdt(td, &a));
372 }
373 case LINUX_SHMGET: {
374 struct linux_shmget_args a;
375
376 a.key = args->arg1;
377 a.size = args->arg2;
378 a.shmflg = args->arg3;
379 return (linux_shmget(td, &a));
380 }
381 case LINUX_SHMCTL: {
382 struct linux_shmctl_args a;
383
384 a.shmid = args->arg1;
385 a.cmd = args->arg2;
386 a.buf = args->ptr;
387 return (linux_shmctl(td, &a));
388 }
389 default:
390 break;
391 }
392
393 return (EINVAL);
394 }
395
396 int
397 linux_old_select(struct thread *td, struct linux_old_select_args *args)
398 {
399 struct l_old_select_argv linux_args;
400 struct linux_select_args newsel;
401 int error;
402
403 #ifdef DEBUG
404 if (ldebug(old_select))
405 printf(ARGS(old_select, "%p"), args->ptr);
406 #endif
407
408 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
409 if (error)
410 return (error);
411
412 newsel.nfds = linux_args.nfds;
413 newsel.readfds = PTRIN(linux_args.readfds);
414 newsel.writefds = PTRIN(linux_args.writefds);
415 newsel.exceptfds = PTRIN(linux_args.exceptfds);
416 newsel.timeout = PTRIN(linux_args.timeout);
417 return (linux_select(td, &newsel));
418 }
419
420 int
421 linux_set_cloned_tls(struct thread *td, void *desc)
422 {
423 struct user_segment_descriptor sd;
424 struct l_user_desc info;
425 struct pcb *pcb;
426 int error;
427 int a[2];
428
429 error = copyin(desc, &info, sizeof(struct l_user_desc));
430 if (error) {
431 printf(LMSG("copyin failed!"));
432 } else {
433 /* We might copy out the entry_number as GUGS32_SEL. */
434 info.entry_number = GUGS32_SEL;
435 error = copyout(&info, desc, sizeof(struct l_user_desc));
436 if (error)
437 printf(LMSG("copyout failed!"));
438
439 a[0] = LINUX_LDT_entry_a(&info);
440 a[1] = LINUX_LDT_entry_b(&info);
441
442 memcpy(&sd, &a, sizeof(a));
443 #ifdef DEBUG
444 if (ldebug(clone))
445 printf("Segment created in clone with "
446 "CLONE_SETTLS: lobase: %x, hibase: %x, "
447 "lolimit: %x, hilimit: %x, type: %i, "
448 "dpl: %i, p: %i, xx: %i, long: %i, "
449 "def32: %i, gran: %i\n", sd.sd_lobase,
450 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit,
451 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
452 sd.sd_long, sd.sd_def32, sd.sd_gran);
453 #endif
454 pcb = td->td_pcb;
455 pcb->pcb_gsbase = (register_t)info.base_addr;
456 /* XXXKIB pcb->pcb_gs32sd = sd; */
457 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
458 set_pcb_flags(pcb, PCB_32BIT);
459 }
460
461 return (error);
462 }
463
464 int
465 linux_set_upcall_kse(struct thread *td, register_t stack)
466 {
467
468 td->td_frame->tf_rsp = stack;
469
470 return (0);
471 }
472
473 #define STACK_SIZE (2 * 1024 * 1024)
474 #define GUARD_SIZE (4 * PAGE_SIZE)
475
476 int
477 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
478 {
479
480 #ifdef DEBUG
481 if (ldebug(mmap2))
482 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"),
483 args->addr, args->len, args->prot,
484 args->flags, args->fd, args->pgoff);
485 #endif
486
487 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
488 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff *
489 PAGE_SIZE));
490 }
491
492 int
493 linux_mmap(struct thread *td, struct linux_mmap_args *args)
494 {
495 int error;
496 struct l_mmap_argv linux_args;
497
498 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
499 if (error)
500 return (error);
501
502 #ifdef DEBUG
503 if (ldebug(mmap))
504 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"),
505 linux_args.addr, linux_args.len, linux_args.prot,
506 linux_args.flags, linux_args.fd, linux_args.pgoff);
507 #endif
508
509 return (linux_mmap_common(td, linux_args.addr, linux_args.len,
510 linux_args.prot, linux_args.flags, linux_args.fd,
511 (uint32_t)linux_args.pgoff));
512 }
513
514 static int
515 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
516 l_int flags, l_int fd, l_loff_t pos)
517 {
518 struct proc *p = td->td_proc;
519 struct mmap_args /* {
520 caddr_t addr;
521 size_t len;
522 int prot;
523 int flags;
524 int fd;
525 long pad;
526 off_t pos;
527 } */ bsd_args;
528 int error;
529 struct file *fp;
530 cap_rights_t rights;
531
532 error = 0;
533 bsd_args.flags = 0;
534 fp = NULL;
535
536 /*
537 * Linux mmap(2):
538 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
539 */
540 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
541 return (EINVAL);
542
543 if (flags & LINUX_MAP_SHARED)
544 bsd_args.flags |= MAP_SHARED;
545 if (flags & LINUX_MAP_PRIVATE)
546 bsd_args.flags |= MAP_PRIVATE;
547 if (flags & LINUX_MAP_FIXED)
548 bsd_args.flags |= MAP_FIXED;
549 if (flags & LINUX_MAP_ANON) {
550 /* Enforce pos to be on page boundary, then ignore. */
551 if ((pos & PAGE_MASK) != 0)
552 return (EINVAL);
553 pos = 0;
554 bsd_args.flags |= MAP_ANON;
555 } else
556 bsd_args.flags |= MAP_NOSYNC;
557 if (flags & LINUX_MAP_GROWSDOWN)
558 bsd_args.flags |= MAP_STACK;
559
560 /*
561 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
562 * on Linux/i386. We do this to ensure maximum compatibility.
563 * Linux/ia64 does the same in i386 emulation mode.
564 */
565 bsd_args.prot = prot;
566 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
567 bsd_args.prot |= PROT_READ | PROT_EXEC;
568
569 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
570 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
571 if (bsd_args.fd != -1) {
572 /*
573 * Linux follows Solaris mmap(2) description:
574 * The file descriptor fildes is opened with
575 * read permission, regardless of the
576 * protection options specified.
577 */
578
579 error = fget(td, bsd_args.fd,
580 cap_rights_init(&rights, CAP_MMAP), &fp);
581 if (error != 0)
582 return (error);
583 if (fp->f_type != DTYPE_VNODE) {
584 fdrop(fp, td);
585 return (EINVAL);
586 }
587
588 /* Linux mmap() just fails for O_WRONLY files */
589 if (!(fp->f_flag & FREAD)) {
590 fdrop(fp, td);
591 return (EACCES);
592 }
593
594 fdrop(fp, td);
595 }
596
597 if (flags & LINUX_MAP_GROWSDOWN) {
598 /*
599 * The Linux MAP_GROWSDOWN option does not limit auto
600 * growth of the region. Linux mmap with this option
601 * takes as addr the inital BOS, and as len, the initial
602 * region size. It can then grow down from addr without
603 * limit. However, Linux threads has an implicit internal
604 * limit to stack size of STACK_SIZE. Its just not
605 * enforced explicitly in Linux. But, here we impose
606 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
607 * region, since we can do this with our mmap.
608 *
609 * Our mmap with MAP_STACK takes addr as the maximum
610 * downsize limit on BOS, and as len the max size of
611 * the region. It then maps the top SGROWSIZ bytes,
612 * and auto grows the region down, up to the limit
613 * in addr.
614 *
615 * If we don't use the MAP_STACK option, the effect
616 * of this code is to allocate a stack region of a
617 * fixed size of (STACK_SIZE - GUARD_SIZE).
618 */
619
620 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
621 /*
622 * Some Linux apps will attempt to mmap
623 * thread stacks near the top of their
624 * address space. If their TOS is greater
625 * than vm_maxsaddr, vm_map_growstack()
626 * will confuse the thread stack with the
627 * process stack and deliver a SEGV if they
628 * attempt to grow the thread stack past their
629 * current stacksize rlimit. To avoid this,
630 * adjust vm_maxsaddr upwards to reflect
631 * the current stacksize rlimit rather
632 * than the maximum possible stacksize.
633 * It would be better to adjust the
634 * mmap'ed region, but some apps do not check
635 * mmap's return value.
636 */
637 PROC_LOCK(p);
638 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
639 lim_cur(p, RLIMIT_STACK);
640 PROC_UNLOCK(p);
641 }
642
643 /*
644 * This gives us our maximum stack size and a new BOS.
645 * If we're using VM_STACK, then mmap will just map
646 * the top SGROWSIZ bytes, and let the stack grow down
647 * to the limit at BOS. If we're not using VM_STACK
648 * we map the full stack, since we don't have a way
649 * to autogrow it.
650 */
651 if (len > STACK_SIZE - GUARD_SIZE) {
652 bsd_args.addr = (caddr_t)PTRIN(addr);
653 bsd_args.len = len;
654 } else {
655 bsd_args.addr = (caddr_t)PTRIN(addr) -
656 (STACK_SIZE - GUARD_SIZE - len);
657 bsd_args.len = STACK_SIZE - GUARD_SIZE;
658 }
659 } else {
660 bsd_args.addr = (caddr_t)PTRIN(addr);
661 bsd_args.len = len;
662 }
663 bsd_args.pos = pos;
664
665 #ifdef DEBUG
666 if (ldebug(mmap))
667 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
668 __func__,
669 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
670 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
671 #endif
672 error = sys_mmap(td, &bsd_args);
673 #ifdef DEBUG
674 if (ldebug(mmap))
675 printf("-> %s() return: 0x%x (0x%08x)\n",
676 __func__, error, (u_int)td->td_retval[0]);
677 #endif
678 return (error);
679 }
680
681 int
682 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
683 {
684 struct mprotect_args bsd_args;
685
686 bsd_args.addr = uap->addr;
687 bsd_args.len = uap->len;
688 bsd_args.prot = uap->prot;
689 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
690 bsd_args.prot |= PROT_READ | PROT_EXEC;
691 return (sys_mprotect(td, &bsd_args));
692 }
693
694 int
695 linux_iopl(struct thread *td, struct linux_iopl_args *args)
696 {
697 int error;
698
699 if (args->level < 0 || args->level > 3)
700 return (EINVAL);
701 if ((error = priv_check(td, PRIV_IO)) != 0)
702 return (error);
703 if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
704 return (error);
705 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
706 (args->level * (PSL_IOPL / 3));
707
708 return (0);
709 }
710
711 int
712 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
713 {
714 l_osigaction_t osa;
715 l_sigaction_t act, oact;
716 int error;
717
718 #ifdef DEBUG
719 if (ldebug(sigaction))
720 printf(ARGS(sigaction, "%d, %p, %p"),
721 args->sig, (void *)args->nsa, (void *)args->osa);
722 #endif
723
724 if (args->nsa != NULL) {
725 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
726 if (error)
727 return (error);
728 act.lsa_handler = osa.lsa_handler;
729 act.lsa_flags = osa.lsa_flags;
730 act.lsa_restorer = osa.lsa_restorer;
731 LINUX_SIGEMPTYSET(act.lsa_mask);
732 act.lsa_mask.__bits[0] = osa.lsa_mask;
733 }
734
735 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
736 args->osa ? &oact : NULL);
737
738 if (args->osa != NULL && !error) {
739 osa.lsa_handler = oact.lsa_handler;
740 osa.lsa_flags = oact.lsa_flags;
741 osa.lsa_restorer = oact.lsa_restorer;
742 osa.lsa_mask = oact.lsa_mask.__bits[0];
743 error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
744 }
745
746 return (error);
747 }
748
749 /*
750 * Linux has two extra args, restart and oldmask. We don't use these,
751 * but it seems that "restart" is actually a context pointer that
752 * enables the signal to happen with a different register set.
753 */
754 int
755 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
756 {
757 sigset_t sigmask;
758 l_sigset_t mask;
759
760 #ifdef DEBUG
761 if (ldebug(sigsuspend))
762 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
763 #endif
764
765 LINUX_SIGEMPTYSET(mask);
766 mask.__bits[0] = args->mask;
767 linux_to_bsd_sigset(&mask, &sigmask);
768 return (kern_sigsuspend(td, sigmask));
769 }
770
771 int
772 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
773 {
774 l_sigset_t lmask;
775 sigset_t sigmask;
776 int error;
777
778 #ifdef DEBUG
779 if (ldebug(rt_sigsuspend))
780 printf(ARGS(rt_sigsuspend, "%p, %d"),
781 (void *)uap->newset, uap->sigsetsize);
782 #endif
783
784 if (uap->sigsetsize != sizeof(l_sigset_t))
785 return (EINVAL);
786
787 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
788 if (error)
789 return (error);
790
791 linux_to_bsd_sigset(&lmask, &sigmask);
792 return (kern_sigsuspend(td, sigmask));
793 }
794
795 int
796 linux_pause(struct thread *td, struct linux_pause_args *args)
797 {
798 struct proc *p = td->td_proc;
799 sigset_t sigmask;
800
801 #ifdef DEBUG
802 if (ldebug(pause))
803 printf(ARGS(pause, ""));
804 #endif
805
806 PROC_LOCK(p);
807 sigmask = td->td_sigmask;
808 PROC_UNLOCK(p);
809 return (kern_sigsuspend(td, sigmask));
810 }
811
812 int
813 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
814 {
815 stack_t ss, oss;
816 l_stack_t lss;
817 int error;
818
819 #ifdef DEBUG
820 if (ldebug(sigaltstack))
821 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
822 #endif
823
824 if (uap->uss != NULL) {
825 error = copyin(uap->uss, &lss, sizeof(l_stack_t));
826 if (error)
827 return (error);
828
829 ss.ss_sp = PTRIN(lss.ss_sp);
830 ss.ss_size = lss.ss_size;
831 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
832 }
833 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
834 (uap->uoss != NULL) ? &oss : NULL);
835 if (!error && uap->uoss != NULL) {
836 lss.ss_sp = PTROUT(oss.ss_sp);
837 lss.ss_size = oss.ss_size;
838 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
839 error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
840 }
841
842 return (error);
843 }
844
845 int
846 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
847 {
848 struct ftruncate_args sa;
849
850 #ifdef DEBUG
851 if (ldebug(ftruncate64))
852 printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
853 (intmax_t)args->length);
854 #endif
855
856 sa.fd = args->fd;
857 sa.length = args->length;
858 return sys_ftruncate(td, &sa);
859 }
860
861 int
862 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
863 {
864 struct timeval atv;
865 l_timeval atv32;
866 struct timezone rtz;
867 int error = 0;
868
869 if (uap->tp) {
870 microtime(&atv);
871 atv32.tv_sec = atv.tv_sec;
872 atv32.tv_usec = atv.tv_usec;
873 error = copyout(&atv32, uap->tp, sizeof(atv32));
874 }
875 if (error == 0 && uap->tzp != NULL) {
876 rtz.tz_minuteswest = tz_minuteswest;
877 rtz.tz_dsttime = tz_dsttime;
878 error = copyout(&rtz, uap->tzp, sizeof(rtz));
879 }
880 return (error);
881 }
882
883 int
884 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
885 {
886 l_timeval atv32;
887 struct timeval atv, *tvp;
888 struct timezone atz, *tzp;
889 int error;
890
891 if (uap->tp) {
892 error = copyin(uap->tp, &atv32, sizeof(atv32));
893 if (error)
894 return (error);
895 atv.tv_sec = atv32.tv_sec;
896 atv.tv_usec = atv32.tv_usec;
897 tvp = &atv;
898 } else
899 tvp = NULL;
900 if (uap->tzp) {
901 error = copyin(uap->tzp, &atz, sizeof(atz));
902 if (error)
903 return (error);
904 tzp = &atz;
905 } else
906 tzp = NULL;
907 return (kern_settimeofday(td, tvp, tzp));
908 }
909
910 int
911 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
912 {
913 struct l_rusage s32;
914 struct rusage s;
915 int error;
916
917 error = kern_getrusage(td, uap->who, &s);
918 if (error != 0)
919 return (error);
920 if (uap->rusage != NULL) {
921 bsd_to_linux_rusage(&s, &s32);
922 error = copyout(&s32, uap->rusage, sizeof(s32));
923 }
924 return (error);
925 }
926
927 int
928 linux_sched_rr_get_interval(struct thread *td,
929 struct linux_sched_rr_get_interval_args *uap)
930 {
931 struct timespec ts;
932 struct l_timespec ts32;
933 int error;
934
935 error = kern_sched_rr_get_interval(td, uap->pid, &ts);
936 if (error != 0)
937 return (error);
938 ts32.tv_sec = ts.tv_sec;
939 ts32.tv_nsec = ts.tv_nsec;
940 return (copyout(&ts32, uap->interval, sizeof(ts32)));
941 }
942
943 int
944 linux_set_thread_area(struct thread *td,
945 struct linux_set_thread_area_args *args)
946 {
947 struct l_user_desc info;
948 struct user_segment_descriptor sd;
949 struct pcb *pcb;
950 int a[2];
951 int error;
952
953 error = copyin(args->desc, &info, sizeof(struct l_user_desc));
954 if (error)
955 return (error);
956
957 #ifdef DEBUG
958 if (ldebug(set_thread_area))
959 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, "
960 "%i, %i, %i"), info.entry_number, info.base_addr,
961 info.limit, info.seg_32bit, info.contents,
962 info.read_exec_only, info.limit_in_pages,
963 info.seg_not_present, info.useable);
964 #endif
965
966 /*
967 * Semantics of Linux version: every thread in the system has array
968 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
969 * This syscall loads one of the selected TLS decriptors with a value
970 * and also loads GDT descriptors 6, 7 and 8 with the content of
971 * the per-thread descriptors.
972 *
973 * Semantics of FreeBSD version: I think we can ignore that Linux has
974 * three per-thread descriptors and use just the first one.
975 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
976 * for loading the GDT descriptors. We use just one GDT descriptor
977 * for TLS, so we will load just one.
978 *
979 * XXX: This doesn't work when a user space process tries to use more
980 * than one TLS segment. Comment in the Linux source says wine might
981 * do this.
982 */
983
984 /*
985 * GLIBC reads current %gs and call set_thread_area() with it.
986 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
987 * we use these segments.
988 */
989 switch (info.entry_number) {
990 case GUGS32_SEL:
991 case GUDATA_SEL:
992 case 6:
993 case -1:
994 info.entry_number = GUGS32_SEL;
995 break;
996 default:
997 return (EINVAL);
998 }
999
1000 /*
1001 * We have to copy out the GDT entry we use.
1002 *
1003 * XXX: What if a user space program does not check the return value
1004 * and tries to use 6, 7 or 8?
1005 */
1006 error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1007 if (error)
1008 return (error);
1009
1010 if (LINUX_LDT_empty(&info)) {
1011 a[0] = 0;
1012 a[1] = 0;
1013 } else {
1014 a[0] = LINUX_LDT_entry_a(&info);
1015 a[1] = LINUX_LDT_entry_b(&info);
1016 }
1017
1018 memcpy(&sd, &a, sizeof(a));
1019 #ifdef DEBUG
1020 if (ldebug(set_thread_area))
1021 printf("Segment created in set_thread_area: "
1022 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, "
1023 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, "
1024 "def32: %i, gran: %i\n",
1025 sd.sd_lobase,
1026 sd.sd_hibase,
1027 sd.sd_lolimit,
1028 sd.sd_hilimit,
1029 sd.sd_type,
1030 sd.sd_dpl,
1031 sd.sd_p,
1032 sd.sd_xx,
1033 sd.sd_long,
1034 sd.sd_def32,
1035 sd.sd_gran);
1036 #endif
1037
1038 pcb = td->td_pcb;
1039 pcb->pcb_gsbase = (register_t)info.base_addr;
1040 set_pcb_flags(pcb, PCB_32BIT);
1041 update_gdt_gsbase(td, info.base_addr);
1042
1043 return (0);
1044 }
1045
1046 int
1047 linux_wait4(struct thread *td, struct linux_wait4_args *args)
1048 {
1049 int error, options;
1050 struct rusage ru, *rup;
1051 struct l_rusage lru;
1052
1053 #ifdef DEBUG
1054 if (ldebug(wait4))
1055 printf(ARGS(wait4, "%d, %p, %d, %p"),
1056 args->pid, (void *)args->status, args->options,
1057 (void *)args->rusage);
1058 #endif
1059
1060 options = (args->options & (WNOHANG | WUNTRACED));
1061 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
1062 if (args->options & __WCLONE)
1063 options |= WLINUXCLONE;
1064
1065 if (args->rusage != NULL)
1066 rup = &ru;
1067 else
1068 rup = NULL;
1069 error = linux_common_wait(td, args->pid, args->status, options, rup);
1070 if (error)
1071 return (error);
1072 if (args->rusage != NULL) {
1073 bsd_to_linux_rusage(rup, &lru);
1074 error = copyout(&lru, args->rusage, sizeof(lru));
1075 }
1076
1077 return (error);
1078 }
Cache object: b41292d5e4a81bc06b66c0889d9bc5f6
|