1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD: releng/10.3/sys/amd64/linux32/linux32_machdep.c 293600 2016-01-09 18:07:48Z dchagin $");
33
34 #include "opt_compat.h"
35
36 #include <sys/param.h>
37 #include <sys/kernel.h>
38 #include <sys/systm.h>
39 #include <sys/capsicum.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/clock.h>
43 #include <sys/imgact.h>
44 #include <sys/limits.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mman.h>
48 #include <sys/mutex.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/resource.h>
52 #include <sys/resourcevar.h>
53 #include <sys/syscallsubr.h>
54 #include <sys/sysproto.h>
55 #include <sys/unistd.h>
56 #include <sys/wait.h>
57
58 #include <machine/frame.h>
59 #include <machine/pcb.h>
60 #include <machine/psl.h>
61 #include <machine/segments.h>
62 #include <machine/specialreg.h>
63
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_map.h>
67
68 #include <compat/freebsd32/freebsd32_util.h>
69 #include <amd64/linux32/linux.h>
70 #include <amd64/linux32/linux32_proto.h>
71 #include <compat/linux/linux_ipc.h>
72 #include <compat/linux/linux_misc.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 #include <compat/linux/linux_emul.h>
76
77 static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru);
78
79 struct l_old_select_argv {
80 l_int nfds;
81 l_uintptr_t readfds;
82 l_uintptr_t writefds;
83 l_uintptr_t exceptfds;
84 l_uintptr_t timeout;
85 } __packed;
86
87 static int linux_mmap_common(struct thread *td, l_uintptr_t addr,
88 l_size_t len, l_int prot, l_int flags, l_int fd,
89 l_loff_t pos);
90
91 static void
92 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
93 {
94
95 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;
96 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;
97 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;
98 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;
99 lru->ru_maxrss = ru->ru_maxrss;
100 lru->ru_ixrss = ru->ru_ixrss;
101 lru->ru_idrss = ru->ru_idrss;
102 lru->ru_isrss = ru->ru_isrss;
103 lru->ru_minflt = ru->ru_minflt;
104 lru->ru_majflt = ru->ru_majflt;
105 lru->ru_nswap = ru->ru_nswap;
106 lru->ru_inblock = ru->ru_inblock;
107 lru->ru_oublock = ru->ru_oublock;
108 lru->ru_msgsnd = ru->ru_msgsnd;
109 lru->ru_msgrcv = ru->ru_msgrcv;
110 lru->ru_nsignals = ru->ru_nsignals;
111 lru->ru_nvcsw = ru->ru_nvcsw;
112 lru->ru_nivcsw = ru->ru_nivcsw;
113 }
114
115 int
116 linux_copyout_rusage(struct rusage *ru, void *uaddr)
117 {
118 struct l_rusage lru;
119
120 bsd_to_linux_rusage(ru, &lru);
121
122 return (copyout(&lru, uaddr, sizeof(struct l_rusage)));
123 }
124
125 int
126 linux_execve(struct thread *td, struct linux_execve_args *args)
127 {
128 struct image_args eargs;
129 char *path;
130 int error;
131
132 LCONVPATHEXIST(td, args->path, &path);
133
134 #ifdef DEBUG
135 if (ldebug(execve))
136 printf(ARGS(execve, "%s"), path);
137 #endif
138
139 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE,
140 args->argp, args->envp);
141 free(path, M_TEMP);
142 if (error == 0)
143 error = linux_common_execve(td, &eargs);
144 return (error);
145 }
146
147 CTASSERT(sizeof(struct l_iovec32) == 8);
148
149 static int
150 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop)
151 {
152 struct l_iovec32 iov32;
153 struct iovec *iov;
154 struct uio *uio;
155 uint32_t iovlen;
156 int error, i;
157
158 *uiop = NULL;
159 if (iovcnt > UIO_MAXIOV)
160 return (EINVAL);
161 iovlen = iovcnt * sizeof(struct iovec);
162 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK);
163 iov = (struct iovec *)(uio + 1);
164 for (i = 0; i < iovcnt; i++) {
165 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32));
166 if (error) {
167 free(uio, M_IOV);
168 return (error);
169 }
170 iov[i].iov_base = PTRIN(iov32.iov_base);
171 iov[i].iov_len = iov32.iov_len;
172 }
173 uio->uio_iov = iov;
174 uio->uio_iovcnt = iovcnt;
175 uio->uio_segflg = UIO_USERSPACE;
176 uio->uio_offset = -1;
177 uio->uio_resid = 0;
178 for (i = 0; i < iovcnt; i++) {
179 if (iov->iov_len > INT_MAX - uio->uio_resid) {
180 free(uio, M_IOV);
181 return (EINVAL);
182 }
183 uio->uio_resid += iov->iov_len;
184 iov++;
185 }
186 *uiop = uio;
187 return (0);
188 }
189
190 int
191 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp,
192 int error)
193 {
194 struct l_iovec32 iov32;
195 struct iovec *iov;
196 uint32_t iovlen;
197 int i;
198
199 *iovp = NULL;
200 if (iovcnt > UIO_MAXIOV)
201 return (error);
202 iovlen = iovcnt * sizeof(struct iovec);
203 iov = malloc(iovlen, M_IOV, M_WAITOK);
204 for (i = 0; i < iovcnt; i++) {
205 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32));
206 if (error) {
207 free(iov, M_IOV);
208 return (error);
209 }
210 iov[i].iov_base = PTRIN(iov32.iov_base);
211 iov[i].iov_len = iov32.iov_len;
212 }
213 *iovp = iov;
214 return(0);
215
216 }
217
218 int
219 linux_readv(struct thread *td, struct linux_readv_args *uap)
220 {
221 struct uio *auio;
222 int error;
223
224 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
225 if (error)
226 return (error);
227 error = kern_readv(td, uap->fd, auio);
228 free(auio, M_IOV);
229 return (error);
230 }
231
232 int
233 linux_writev(struct thread *td, struct linux_writev_args *uap)
234 {
235 struct uio *auio;
236 int error;
237
238 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
239 if (error)
240 return (error);
241 error = kern_writev(td, uap->fd, auio);
242 free(auio, M_IOV);
243 return (error);
244 }
245
246 struct l_ipc_kludge {
247 l_uintptr_t msgp;
248 l_long msgtyp;
249 } __packed;
250
251 int
252 linux_ipc(struct thread *td, struct linux_ipc_args *args)
253 {
254
255 switch (args->what & 0xFFFF) {
256 case LINUX_SEMOP: {
257 struct linux_semop_args a;
258
259 a.semid = args->arg1;
260 a.tsops = args->ptr;
261 a.nsops = args->arg2;
262 return (linux_semop(td, &a));
263 }
264 case LINUX_SEMGET: {
265 struct linux_semget_args a;
266
267 a.key = args->arg1;
268 a.nsems = args->arg2;
269 a.semflg = args->arg3;
270 return (linux_semget(td, &a));
271 }
272 case LINUX_SEMCTL: {
273 struct linux_semctl_args a;
274 int error;
275
276 a.semid = args->arg1;
277 a.semnum = args->arg2;
278 a.cmd = args->arg3;
279 error = copyin(args->ptr, &a.arg, sizeof(a.arg));
280 if (error)
281 return (error);
282 return (linux_semctl(td, &a));
283 }
284 case LINUX_MSGSND: {
285 struct linux_msgsnd_args a;
286
287 a.msqid = args->arg1;
288 a.msgp = args->ptr;
289 a.msgsz = args->arg2;
290 a.msgflg = args->arg3;
291 return (linux_msgsnd(td, &a));
292 }
293 case LINUX_MSGRCV: {
294 struct linux_msgrcv_args a;
295
296 a.msqid = args->arg1;
297 a.msgsz = args->arg2;
298 a.msgflg = args->arg3;
299 if ((args->what >> 16) == 0) {
300 struct l_ipc_kludge tmp;
301 int error;
302
303 if (args->ptr == 0)
304 return (EINVAL);
305 error = copyin(args->ptr, &tmp, sizeof(tmp));
306 if (error)
307 return (error);
308 a.msgp = PTRIN(tmp.msgp);
309 a.msgtyp = tmp.msgtyp;
310 } else {
311 a.msgp = args->ptr;
312 a.msgtyp = args->arg5;
313 }
314 return (linux_msgrcv(td, &a));
315 }
316 case LINUX_MSGGET: {
317 struct linux_msgget_args a;
318
319 a.key = args->arg1;
320 a.msgflg = args->arg2;
321 return (linux_msgget(td, &a));
322 }
323 case LINUX_MSGCTL: {
324 struct linux_msgctl_args a;
325
326 a.msqid = args->arg1;
327 a.cmd = args->arg2;
328 a.buf = args->ptr;
329 return (linux_msgctl(td, &a));
330 }
331 case LINUX_SHMAT: {
332 struct linux_shmat_args a;
333
334 a.shmid = args->arg1;
335 a.shmaddr = args->ptr;
336 a.shmflg = args->arg2;
337 a.raddr = PTRIN((l_uint)args->arg3);
338 return (linux_shmat(td, &a));
339 }
340 case LINUX_SHMDT: {
341 struct linux_shmdt_args a;
342
343 a.shmaddr = args->ptr;
344 return (linux_shmdt(td, &a));
345 }
346 case LINUX_SHMGET: {
347 struct linux_shmget_args a;
348
349 a.key = args->arg1;
350 a.size = args->arg2;
351 a.shmflg = args->arg3;
352 return (linux_shmget(td, &a));
353 }
354 case LINUX_SHMCTL: {
355 struct linux_shmctl_args a;
356
357 a.shmid = args->arg1;
358 a.cmd = args->arg2;
359 a.buf = args->ptr;
360 return (linux_shmctl(td, &a));
361 }
362 default:
363 break;
364 }
365
366 return (EINVAL);
367 }
368
369 int
370 linux_old_select(struct thread *td, struct linux_old_select_args *args)
371 {
372 struct l_old_select_argv linux_args;
373 struct linux_select_args newsel;
374 int error;
375
376 #ifdef DEBUG
377 if (ldebug(old_select))
378 printf(ARGS(old_select, "%p"), args->ptr);
379 #endif
380
381 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
382 if (error)
383 return (error);
384
385 newsel.nfds = linux_args.nfds;
386 newsel.readfds = PTRIN(linux_args.readfds);
387 newsel.writefds = PTRIN(linux_args.writefds);
388 newsel.exceptfds = PTRIN(linux_args.exceptfds);
389 newsel.timeout = PTRIN(linux_args.timeout);
390 return (linux_select(td, &newsel));
391 }
392
393 int
394 linux_set_cloned_tls(struct thread *td, void *desc)
395 {
396 struct user_segment_descriptor sd;
397 struct l_user_desc info;
398 struct pcb *pcb;
399 int error;
400 int a[2];
401
402 error = copyin(desc, &info, sizeof(struct l_user_desc));
403 if (error) {
404 printf(LMSG("copyin failed!"));
405 } else {
406 /* We might copy out the entry_number as GUGS32_SEL. */
407 info.entry_number = GUGS32_SEL;
408 error = copyout(&info, desc, sizeof(struct l_user_desc));
409 if (error)
410 printf(LMSG("copyout failed!"));
411
412 a[0] = LINUX_LDT_entry_a(&info);
413 a[1] = LINUX_LDT_entry_b(&info);
414
415 memcpy(&sd, &a, sizeof(a));
416 #ifdef DEBUG
417 if (ldebug(clone))
418 printf("Segment created in clone with "
419 "CLONE_SETTLS: lobase: %x, hibase: %x, "
420 "lolimit: %x, hilimit: %x, type: %i, "
421 "dpl: %i, p: %i, xx: %i, long: %i, "
422 "def32: %i, gran: %i\n", sd.sd_lobase,
423 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit,
424 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
425 sd.sd_long, sd.sd_def32, sd.sd_gran);
426 #endif
427 pcb = td->td_pcb;
428 pcb->pcb_gsbase = (register_t)info.base_addr;
429 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
430 set_pcb_flags(pcb, PCB_32BIT);
431 }
432
433 return (error);
434 }
435
436 int
437 linux_set_upcall_kse(struct thread *td, register_t stack)
438 {
439
440 if (stack)
441 td->td_frame->tf_rsp = stack;
442
443 /*
444 * The newly created Linux thread returns
445 * to the user space by the same path that a parent do.
446 */
447 td->td_frame->tf_rax = 0;
448 return (0);
449 }
450
451 #define STACK_SIZE (2 * 1024 * 1024)
452 #define GUARD_SIZE (4 * PAGE_SIZE)
453
454 int
455 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
456 {
457
458 #ifdef DEBUG
459 if (ldebug(mmap2))
460 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"),
461 args->addr, args->len, args->prot,
462 args->flags, args->fd, args->pgoff);
463 #endif
464
465 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
466 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff *
467 PAGE_SIZE));
468 }
469
470 int
471 linux_mmap(struct thread *td, struct linux_mmap_args *args)
472 {
473 int error;
474 struct l_mmap_argv linux_args;
475
476 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
477 if (error)
478 return (error);
479
480 #ifdef DEBUG
481 if (ldebug(mmap))
482 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"),
483 linux_args.addr, linux_args.len, linux_args.prot,
484 linux_args.flags, linux_args.fd, linux_args.pgoff);
485 #endif
486
487 return (linux_mmap_common(td, linux_args.addr, linux_args.len,
488 linux_args.prot, linux_args.flags, linux_args.fd,
489 (uint32_t)linux_args.pgoff));
490 }
491
492 static int
493 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
494 l_int flags, l_int fd, l_loff_t pos)
495 {
496 struct proc *p = td->td_proc;
497 struct mmap_args /* {
498 caddr_t addr;
499 size_t len;
500 int prot;
501 int flags;
502 int fd;
503 long pad;
504 off_t pos;
505 } */ bsd_args;
506 int error;
507 struct file *fp;
508 cap_rights_t rights;
509
510 error = 0;
511 bsd_args.flags = 0;
512 fp = NULL;
513
514 /*
515 * Linux mmap(2):
516 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
517 */
518 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
519 return (EINVAL);
520
521 if (flags & LINUX_MAP_SHARED)
522 bsd_args.flags |= MAP_SHARED;
523 if (flags & LINUX_MAP_PRIVATE)
524 bsd_args.flags |= MAP_PRIVATE;
525 if (flags & LINUX_MAP_FIXED)
526 bsd_args.flags |= MAP_FIXED;
527 if (flags & LINUX_MAP_ANON) {
528 /* Enforce pos to be on page boundary, then ignore. */
529 if ((pos & PAGE_MASK) != 0)
530 return (EINVAL);
531 pos = 0;
532 bsd_args.flags |= MAP_ANON;
533 } else
534 bsd_args.flags |= MAP_NOSYNC;
535 if (flags & LINUX_MAP_GROWSDOWN)
536 bsd_args.flags |= MAP_STACK;
537
538 /*
539 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
540 * on Linux/i386. We do this to ensure maximum compatibility.
541 * Linux/ia64 does the same in i386 emulation mode.
542 */
543 bsd_args.prot = prot;
544 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
545 bsd_args.prot |= PROT_READ | PROT_EXEC;
546
547 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
548 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
549 if (bsd_args.fd != -1) {
550 /*
551 * Linux follows Solaris mmap(2) description:
552 * The file descriptor fildes is opened with
553 * read permission, regardless of the
554 * protection options specified.
555 */
556
557 error = fget(td, bsd_args.fd,
558 cap_rights_init(&rights, CAP_MMAP), &fp);
559 if (error != 0)
560 return (error);
561 if (fp->f_type != DTYPE_VNODE) {
562 fdrop(fp, td);
563 return (EINVAL);
564 }
565
566 /* Linux mmap() just fails for O_WRONLY files */
567 if (!(fp->f_flag & FREAD)) {
568 fdrop(fp, td);
569 return (EACCES);
570 }
571
572 fdrop(fp, td);
573 }
574
575 if (flags & LINUX_MAP_GROWSDOWN) {
576 /*
577 * The Linux MAP_GROWSDOWN option does not limit auto
578 * growth of the region. Linux mmap with this option
579 * takes as addr the inital BOS, and as len, the initial
580 * region size. It can then grow down from addr without
581 * limit. However, Linux threads has an implicit internal
582 * limit to stack size of STACK_SIZE. Its just not
583 * enforced explicitly in Linux. But, here we impose
584 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
585 * region, since we can do this with our mmap.
586 *
587 * Our mmap with MAP_STACK takes addr as the maximum
588 * downsize limit on BOS, and as len the max size of
589 * the region. It then maps the top SGROWSIZ bytes,
590 * and auto grows the region down, up to the limit
591 * in addr.
592 *
593 * If we don't use the MAP_STACK option, the effect
594 * of this code is to allocate a stack region of a
595 * fixed size of (STACK_SIZE - GUARD_SIZE).
596 */
597
598 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
599 /*
600 * Some Linux apps will attempt to mmap
601 * thread stacks near the top of their
602 * address space. If their TOS is greater
603 * than vm_maxsaddr, vm_map_growstack()
604 * will confuse the thread stack with the
605 * process stack and deliver a SEGV if they
606 * attempt to grow the thread stack past their
607 * current stacksize rlimit. To avoid this,
608 * adjust vm_maxsaddr upwards to reflect
609 * the current stacksize rlimit rather
610 * than the maximum possible stacksize.
611 * It would be better to adjust the
612 * mmap'ed region, but some apps do not check
613 * mmap's return value.
614 */
615 PROC_LOCK(p);
616 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
617 lim_cur(p, RLIMIT_STACK);
618 PROC_UNLOCK(p);
619 }
620
621 /*
622 * This gives us our maximum stack size and a new BOS.
623 * If we're using VM_STACK, then mmap will just map
624 * the top SGROWSIZ bytes, and let the stack grow down
625 * to the limit at BOS. If we're not using VM_STACK
626 * we map the full stack, since we don't have a way
627 * to autogrow it.
628 */
629 if (len > STACK_SIZE - GUARD_SIZE) {
630 bsd_args.addr = (caddr_t)PTRIN(addr);
631 bsd_args.len = len;
632 } else {
633 bsd_args.addr = (caddr_t)PTRIN(addr) -
634 (STACK_SIZE - GUARD_SIZE - len);
635 bsd_args.len = STACK_SIZE - GUARD_SIZE;
636 }
637 } else {
638 bsd_args.addr = (caddr_t)PTRIN(addr);
639 bsd_args.len = len;
640 }
641 bsd_args.pos = pos;
642
643 #ifdef DEBUG
644 if (ldebug(mmap))
645 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
646 __func__,
647 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
648 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
649 #endif
650 error = sys_mmap(td, &bsd_args);
651 #ifdef DEBUG
652 if (ldebug(mmap))
653 printf("-> %s() return: 0x%x (0x%08x)\n",
654 __func__, error, (u_int)td->td_retval[0]);
655 #endif
656 return (error);
657 }
658
659 int
660 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
661 {
662 struct mprotect_args bsd_args;
663
664 bsd_args.addr = uap->addr;
665 bsd_args.len = uap->len;
666 bsd_args.prot = uap->prot;
667 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
668 bsd_args.prot |= PROT_READ | PROT_EXEC;
669 return (sys_mprotect(td, &bsd_args));
670 }
671
672 int
673 linux_iopl(struct thread *td, struct linux_iopl_args *args)
674 {
675 int error;
676
677 if (args->level < 0 || args->level > 3)
678 return (EINVAL);
679 if ((error = priv_check(td, PRIV_IO)) != 0)
680 return (error);
681 if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
682 return (error);
683 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
684 (args->level * (PSL_IOPL / 3));
685
686 return (0);
687 }
688
689 int
690 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
691 {
692 l_osigaction_t osa;
693 l_sigaction_t act, oact;
694 int error;
695
696 #ifdef DEBUG
697 if (ldebug(sigaction))
698 printf(ARGS(sigaction, "%d, %p, %p"),
699 args->sig, (void *)args->nsa, (void *)args->osa);
700 #endif
701
702 if (args->nsa != NULL) {
703 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
704 if (error)
705 return (error);
706 act.lsa_handler = osa.lsa_handler;
707 act.lsa_flags = osa.lsa_flags;
708 act.lsa_restorer = osa.lsa_restorer;
709 LINUX_SIGEMPTYSET(act.lsa_mask);
710 act.lsa_mask.__mask = osa.lsa_mask;
711 }
712
713 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
714 args->osa ? &oact : NULL);
715
716 if (args->osa != NULL && !error) {
717 osa.lsa_handler = oact.lsa_handler;
718 osa.lsa_flags = oact.lsa_flags;
719 osa.lsa_restorer = oact.lsa_restorer;
720 osa.lsa_mask = oact.lsa_mask.__mask;
721 error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
722 }
723
724 return (error);
725 }
726
727 /*
728 * Linux has two extra args, restart and oldmask. We don't use these,
729 * but it seems that "restart" is actually a context pointer that
730 * enables the signal to happen with a different register set.
731 */
732 int
733 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
734 {
735 sigset_t sigmask;
736 l_sigset_t mask;
737
738 #ifdef DEBUG
739 if (ldebug(sigsuspend))
740 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
741 #endif
742
743 LINUX_SIGEMPTYSET(mask);
744 mask.__mask = args->mask;
745 linux_to_bsd_sigset(&mask, &sigmask);
746 return (kern_sigsuspend(td, sigmask));
747 }
748
749 int
750 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
751 {
752 l_sigset_t lmask;
753 sigset_t sigmask;
754 int error;
755
756 #ifdef DEBUG
757 if (ldebug(rt_sigsuspend))
758 printf(ARGS(rt_sigsuspend, "%p, %d"),
759 (void *)uap->newset, uap->sigsetsize);
760 #endif
761
762 if (uap->sigsetsize != sizeof(l_sigset_t))
763 return (EINVAL);
764
765 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
766 if (error)
767 return (error);
768
769 linux_to_bsd_sigset(&lmask, &sigmask);
770 return (kern_sigsuspend(td, sigmask));
771 }
772
773 int
774 linux_pause(struct thread *td, struct linux_pause_args *args)
775 {
776 struct proc *p = td->td_proc;
777 sigset_t sigmask;
778
779 #ifdef DEBUG
780 if (ldebug(pause))
781 printf(ARGS(pause, ""));
782 #endif
783
784 PROC_LOCK(p);
785 sigmask = td->td_sigmask;
786 PROC_UNLOCK(p);
787 return (kern_sigsuspend(td, sigmask));
788 }
789
790 int
791 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
792 {
793 stack_t ss, oss;
794 l_stack_t lss;
795 int error;
796
797 #ifdef DEBUG
798 if (ldebug(sigaltstack))
799 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
800 #endif
801
802 if (uap->uss != NULL) {
803 error = copyin(uap->uss, &lss, sizeof(l_stack_t));
804 if (error)
805 return (error);
806
807 ss.ss_sp = PTRIN(lss.ss_sp);
808 ss.ss_size = lss.ss_size;
809 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
810 }
811 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
812 (uap->uoss != NULL) ? &oss : NULL);
813 if (!error && uap->uoss != NULL) {
814 lss.ss_sp = PTROUT(oss.ss_sp);
815 lss.ss_size = oss.ss_size;
816 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
817 error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
818 }
819
820 return (error);
821 }
822
823 int
824 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
825 {
826 struct ftruncate_args sa;
827
828 #ifdef DEBUG
829 if (ldebug(ftruncate64))
830 printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
831 (intmax_t)args->length);
832 #endif
833
834 sa.fd = args->fd;
835 sa.length = args->length;
836 return sys_ftruncate(td, &sa);
837 }
838
839 int
840 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
841 {
842 struct timeval atv;
843 l_timeval atv32;
844 struct timezone rtz;
845 int error = 0;
846
847 if (uap->tp) {
848 microtime(&atv);
849 atv32.tv_sec = atv.tv_sec;
850 atv32.tv_usec = atv.tv_usec;
851 error = copyout(&atv32, uap->tp, sizeof(atv32));
852 }
853 if (error == 0 && uap->tzp != NULL) {
854 rtz.tz_minuteswest = tz_minuteswest;
855 rtz.tz_dsttime = tz_dsttime;
856 error = copyout(&rtz, uap->tzp, sizeof(rtz));
857 }
858 return (error);
859 }
860
861 int
862 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
863 {
864 l_timeval atv32;
865 struct timeval atv, *tvp;
866 struct timezone atz, *tzp;
867 int error;
868
869 if (uap->tp) {
870 error = copyin(uap->tp, &atv32, sizeof(atv32));
871 if (error)
872 return (error);
873 atv.tv_sec = atv32.tv_sec;
874 atv.tv_usec = atv32.tv_usec;
875 tvp = &atv;
876 } else
877 tvp = NULL;
878 if (uap->tzp) {
879 error = copyin(uap->tzp, &atz, sizeof(atz));
880 if (error)
881 return (error);
882 tzp = &atz;
883 } else
884 tzp = NULL;
885 return (kern_settimeofday(td, tvp, tzp));
886 }
887
888 int
889 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
890 {
891 struct rusage s;
892 int error;
893
894 error = kern_getrusage(td, uap->who, &s);
895 if (error != 0)
896 return (error);
897 if (uap->rusage != NULL)
898 error = linux_copyout_rusage(&s, uap->rusage);
899 return (error);
900 }
901
902 int
903 linux_set_thread_area(struct thread *td,
904 struct linux_set_thread_area_args *args)
905 {
906 struct l_user_desc info;
907 struct user_segment_descriptor sd;
908 struct pcb *pcb;
909 int a[2];
910 int error;
911
912 error = copyin(args->desc, &info, sizeof(struct l_user_desc));
913 if (error)
914 return (error);
915
916 #ifdef DEBUG
917 if (ldebug(set_thread_area))
918 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, "
919 "%i, %i, %i"), info.entry_number, info.base_addr,
920 info.limit, info.seg_32bit, info.contents,
921 info.read_exec_only, info.limit_in_pages,
922 info.seg_not_present, info.useable);
923 #endif
924
925 /*
926 * Semantics of Linux version: every thread in the system has array
927 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
928 * This syscall loads one of the selected TLS decriptors with a value
929 * and also loads GDT descriptors 6, 7 and 8 with the content of
930 * the per-thread descriptors.
931 *
932 * Semantics of FreeBSD version: I think we can ignore that Linux has
933 * three per-thread descriptors and use just the first one.
934 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
935 * for loading the GDT descriptors. We use just one GDT descriptor
936 * for TLS, so we will load just one.
937 *
938 * XXX: This doesn't work when a user space process tries to use more
939 * than one TLS segment. Comment in the Linux source says wine might
940 * do this.
941 */
942
943 /*
944 * GLIBC reads current %gs and call set_thread_area() with it.
945 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
946 * we use these segments.
947 */
948 switch (info.entry_number) {
949 case GUGS32_SEL:
950 case GUDATA_SEL:
951 case 6:
952 case -1:
953 info.entry_number = GUGS32_SEL;
954 break;
955 default:
956 return (EINVAL);
957 }
958
959 /*
960 * We have to copy out the GDT entry we use.
961 *
962 * XXX: What if a user space program does not check the return value
963 * and tries to use 6, 7 or 8?
964 */
965 error = copyout(&info, args->desc, sizeof(struct l_user_desc));
966 if (error)
967 return (error);
968
969 if (LINUX_LDT_empty(&info)) {
970 a[0] = 0;
971 a[1] = 0;
972 } else {
973 a[0] = LINUX_LDT_entry_a(&info);
974 a[1] = LINUX_LDT_entry_b(&info);
975 }
976
977 memcpy(&sd, &a, sizeof(a));
978 #ifdef DEBUG
979 if (ldebug(set_thread_area))
980 printf("Segment created in set_thread_area: "
981 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, "
982 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, "
983 "def32: %i, gran: %i\n",
984 sd.sd_lobase,
985 sd.sd_hibase,
986 sd.sd_lolimit,
987 sd.sd_hilimit,
988 sd.sd_type,
989 sd.sd_dpl,
990 sd.sd_p,
991 sd.sd_xx,
992 sd.sd_long,
993 sd.sd_def32,
994 sd.sd_gran);
995 #endif
996
997 pcb = td->td_pcb;
998 pcb->pcb_gsbase = (register_t)info.base_addr;
999 set_pcb_flags(pcb, PCB_32BIT);
1000 update_gdt_gsbase(td, info.base_addr);
1001
1002 return (0);
1003 }
Cache object: 81adc9c9fa8b18e52c7a7f98572d9eb1
|