The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/linux/linux_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2000 Marcel Moolenaar
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer
   10  *    in this position and unchanged.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 3. The name of the author may not be used to endorse or promote products
   15  *    derived from this software without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD: releng/8.4/sys/i386/linux/linux_machdep.c 219193 2011-03-02 19:36:06Z dchagin $");
   31 
   32 #include <sys/param.h>
   33 #include <sys/systm.h>
   34 #include <sys/file.h>
   35 #include <sys/fcntl.h>
   36 #include <sys/imgact.h>
   37 #include <sys/lock.h>
   38 #include <sys/malloc.h>
   39 #include <sys/mman.h>
   40 #include <sys/mutex.h>
   41 #include <sys/sx.h>
   42 #include <sys/priv.h>
   43 #include <sys/proc.h>
   44 #include <sys/queue.h>
   45 #include <sys/resource.h>
   46 #include <sys/resourcevar.h>
   47 #include <sys/signalvar.h>
   48 #include <sys/syscallsubr.h>
   49 #include <sys/sysproto.h>
   50 #include <sys/unistd.h>
   51 #include <sys/wait.h>
   52 #include <sys/sched.h>
   53 
   54 #include <machine/frame.h>
   55 #include <machine/psl.h>
   56 #include <machine/segments.h>
   57 #include <machine/sysarch.h>
   58 
   59 #include <vm/vm.h>
   60 #include <vm/pmap.h>
   61 #include <vm/vm_map.h>
   62 
   63 #include <i386/linux/linux.h>
   64 #include <i386/linux/linux_proto.h>
   65 #include <compat/linux/linux_ipc.h>
   66 #include <compat/linux/linux_misc.h>
   67 #include <compat/linux/linux_signal.h>
   68 #include <compat/linux/linux_util.h>
   69 #include <compat/linux/linux_emul.h>
   70 
   71 #include <i386/include/pcb.h>                   /* needed for pcb definition in linux_set_thread_area */
   72 
   73 #include "opt_posix.h"
   74 
   75 extern struct sysentvec elf32_freebsd_sysvec;   /* defined in i386/i386/elf_machdep.c */
   76 
   77 struct l_descriptor {
   78         l_uint          entry_number;
   79         l_ulong         base_addr;
   80         l_uint          limit;
   81         l_uint          seg_32bit:1;
   82         l_uint          contents:2;
   83         l_uint          read_exec_only:1;
   84         l_uint          limit_in_pages:1;
   85         l_uint          seg_not_present:1;
   86         l_uint          useable:1;
   87 };
   88 
   89 struct l_old_select_argv {
   90         l_int           nfds;
   91         l_fd_set        *readfds;
   92         l_fd_set        *writefds;
   93         l_fd_set        *exceptfds;
   94         struct l_timeval        *timeout;
   95 };
   96 
   97 static int      linux_mmap_common(struct thread *td, l_uintptr_t addr,
   98                     l_size_t len, l_int prot, l_int flags, l_int fd,
   99                     l_loff_t pos);
  100 
  101 int
  102 linux_to_bsd_sigaltstack(int lsa)
  103 {
  104         int bsa = 0;
  105 
  106         if (lsa & LINUX_SS_DISABLE)
  107                 bsa |= SS_DISABLE;
  108         if (lsa & LINUX_SS_ONSTACK)
  109                 bsa |= SS_ONSTACK;
  110         return (bsa);
  111 }
  112 
  113 int
  114 bsd_to_linux_sigaltstack(int bsa)
  115 {
  116         int lsa = 0;
  117 
  118         if (bsa & SS_DISABLE)
  119                 lsa |= LINUX_SS_DISABLE;
  120         if (bsa & SS_ONSTACK)
  121                 lsa |= LINUX_SS_ONSTACK;
  122         return (lsa);
  123 }
  124 
  125 int
  126 linux_execve(struct thread *td, struct linux_execve_args *args)
  127 {
  128         int error;
  129         char *newpath;
  130         struct image_args eargs;
  131 
  132         LCONVPATHEXIST(td, args->path, &newpath);
  133 
  134 #ifdef DEBUG
  135         if (ldebug(execve))
  136                 printf(ARGS(execve, "%s"), newpath);
  137 #endif
  138 
  139         error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE,
  140             args->argp, args->envp);
  141         free(newpath, M_TEMP);
  142         if (error == 0)
  143                 error = kern_execve(td, &eargs, NULL);
  144         if (error == 0)
  145                 /* linux process can exec fbsd one, dont attempt
  146                  * to create emuldata for such process using
  147                  * linux_proc_init, this leads to a panic on KASSERT
  148                  * because such process has p->p_emuldata == NULL
  149                  */
  150                 if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX)
  151                         error = linux_proc_init(td, 0, 0);
  152         return (error);
  153 }
  154 
  155 struct l_ipc_kludge {
  156         struct l_msgbuf *msgp;
  157         l_long msgtyp;
  158 };
  159 
  160 int
  161 linux_ipc(struct thread *td, struct linux_ipc_args *args)
  162 {
  163 
  164         switch (args->what & 0xFFFF) {
  165         case LINUX_SEMOP: {
  166                 struct linux_semop_args a;
  167 
  168                 a.semid = args->arg1;
  169                 a.tsops = args->ptr;
  170                 a.nsops = args->arg2;
  171                 return (linux_semop(td, &a));
  172         }
  173         case LINUX_SEMGET: {
  174                 struct linux_semget_args a;
  175 
  176                 a.key = args->arg1;
  177                 a.nsems = args->arg2;
  178                 a.semflg = args->arg3;
  179                 return (linux_semget(td, &a));
  180         }
  181         case LINUX_SEMCTL: {
  182                 struct linux_semctl_args a;
  183                 int error;
  184 
  185                 a.semid = args->arg1;
  186                 a.semnum = args->arg2;
  187                 a.cmd = args->arg3;
  188                 error = copyin(args->ptr, &a.arg, sizeof(a.arg));
  189                 if (error)
  190                         return (error);
  191                 return (linux_semctl(td, &a));
  192         }
  193         case LINUX_MSGSND: {
  194                 struct linux_msgsnd_args a;
  195 
  196                 a.msqid = args->arg1;
  197                 a.msgp = args->ptr;
  198                 a.msgsz = args->arg2;
  199                 a.msgflg = args->arg3;
  200                 return (linux_msgsnd(td, &a));
  201         }
  202         case LINUX_MSGRCV: {
  203                 struct linux_msgrcv_args a;
  204 
  205                 a.msqid = args->arg1;
  206                 a.msgsz = args->arg2;
  207                 a.msgflg = args->arg3;
  208                 if ((args->what >> 16) == 0) {
  209                         struct l_ipc_kludge tmp;
  210                         int error;
  211 
  212                         if (args->ptr == NULL)
  213                                 return (EINVAL);
  214                         error = copyin(args->ptr, &tmp, sizeof(tmp));
  215                         if (error)
  216                                 return (error);
  217                         a.msgp = tmp.msgp;
  218                         a.msgtyp = tmp.msgtyp;
  219                 } else {
  220                         a.msgp = args->ptr;
  221                         a.msgtyp = args->arg5;
  222                 }
  223                 return (linux_msgrcv(td, &a));
  224         }
  225         case LINUX_MSGGET: {
  226                 struct linux_msgget_args a;
  227 
  228                 a.key = args->arg1;
  229                 a.msgflg = args->arg2;
  230                 return (linux_msgget(td, &a));
  231         }
  232         case LINUX_MSGCTL: {
  233                 struct linux_msgctl_args a;
  234 
  235                 a.msqid = args->arg1;
  236                 a.cmd = args->arg2;
  237                 a.buf = args->ptr;
  238                 return (linux_msgctl(td, &a));
  239         }
  240         case LINUX_SHMAT: {
  241                 struct linux_shmat_args a;
  242 
  243                 a.shmid = args->arg1;
  244                 a.shmaddr = args->ptr;
  245                 a.shmflg = args->arg2;
  246                 a.raddr = (l_ulong *)args->arg3;
  247                 return (linux_shmat(td, &a));
  248         }
  249         case LINUX_SHMDT: {
  250                 struct linux_shmdt_args a;
  251 
  252                 a.shmaddr = args->ptr;
  253                 return (linux_shmdt(td, &a));
  254         }
  255         case LINUX_SHMGET: {
  256                 struct linux_shmget_args a;
  257 
  258                 a.key = args->arg1;
  259                 a.size = args->arg2;
  260                 a.shmflg = args->arg3;
  261                 return (linux_shmget(td, &a));
  262         }
  263         case LINUX_SHMCTL: {
  264                 struct linux_shmctl_args a;
  265 
  266                 a.shmid = args->arg1;
  267                 a.cmd = args->arg2;
  268                 a.buf = args->ptr;
  269                 return (linux_shmctl(td, &a));
  270         }
  271         default:
  272                 break;
  273         }
  274 
  275         return (EINVAL);
  276 }
  277 
  278 int
  279 linux_old_select(struct thread *td, struct linux_old_select_args *args)
  280 {
  281         struct l_old_select_argv linux_args;
  282         struct linux_select_args newsel;
  283         int error;
  284 
  285 #ifdef DEBUG
  286         if (ldebug(old_select))
  287                 printf(ARGS(old_select, "%p"), args->ptr);
  288 #endif
  289 
  290         error = copyin(args->ptr, &linux_args, sizeof(linux_args));
  291         if (error)
  292                 return (error);
  293 
  294         newsel.nfds = linux_args.nfds;
  295         newsel.readfds = linux_args.readfds;
  296         newsel.writefds = linux_args.writefds;
  297         newsel.exceptfds = linux_args.exceptfds;
  298         newsel.timeout = linux_args.timeout;
  299         return (linux_select(td, &newsel));
  300 }
  301 
  302 int
  303 linux_set_cloned_tls(struct thread *td, void *desc)
  304 {
  305         struct segment_descriptor sd;
  306         struct l_user_desc info;
  307         int idx, error;
  308         int a[2];
  309 
  310         error = copyin(desc, &info, sizeof(struct l_user_desc));
  311         if (error) {
  312                 printf(LMSG("copyin failed!"));
  313         } else {
  314                 idx = info.entry_number;
  315 
  316                 /* 
  317                  * looks like we're getting the idx we returned
  318                  * in the set_thread_area() syscall
  319                  */
  320                 if (idx != 6 && idx != 3) {
  321                         printf(LMSG("resetting idx!"));
  322                         idx = 3;
  323                 }
  324 
  325                 /* this doesnt happen in practice */
  326                 if (idx == 6) {
  327                         /* we might copy out the entry_number as 3 */
  328                         info.entry_number = 3;
  329                         error = copyout(&info, desc, sizeof(struct l_user_desc));
  330                         if (error)
  331                                 printf(LMSG("copyout failed!"));
  332                 }
  333 
  334                 a[0] = LINUX_LDT_entry_a(&info);
  335                 a[1] = LINUX_LDT_entry_b(&info);
  336 
  337                 memcpy(&sd, &a, sizeof(a));
  338 #ifdef DEBUG
  339                 if (ldebug(clone))
  340                         printf("Segment created in clone with "
  341                         "CLONE_SETTLS: lobase: %x, hibase: %x, "
  342                         "lolimit: %x, hilimit: %x, type: %i, "
  343                         "dpl: %i, p: %i, xx: %i, def32: %i, "
  344                         "gran: %i\n", sd.sd_lobase, sd.sd_hibase,
  345                         sd.sd_lolimit, sd.sd_hilimit, sd.sd_type,
  346                         sd.sd_dpl, sd.sd_p, sd.sd_xx,
  347                         sd.sd_def32, sd.sd_gran);
  348 #endif
  349 
  350                 /* set %gs */
  351                 td->td_pcb->pcb_gsd = sd;
  352                 td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL);
  353         }
  354 
  355         return (error);
  356 }
  357 
  358 int
  359 linux_set_upcall_kse(struct thread *td, register_t stack)
  360 {
  361 
  362         td->td_frame->tf_esp = stack;
  363 
  364         return (0);
  365 }
  366 
  367 #define STACK_SIZE  (2 * 1024 * 1024)
  368 #define GUARD_SIZE  (4 * PAGE_SIZE)
  369 
  370 int
  371 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
  372 {
  373 
  374 #ifdef DEBUG
  375         if (ldebug(mmap2))
  376                 printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
  377                     (void *)args->addr, args->len, args->prot,
  378                     args->flags, args->fd, args->pgoff);
  379 #endif
  380 
  381         return (linux_mmap_common(td, args->addr, args->len, args->prot,
  382                 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff *
  383                 PAGE_SIZE));
  384 }
  385 
  386 int
  387 linux_mmap(struct thread *td, struct linux_mmap_args *args)
  388 {
  389         int error;
  390         struct l_mmap_argv linux_args;
  391 
  392         error = copyin(args->ptr, &linux_args, sizeof(linux_args));
  393         if (error)
  394                 return (error);
  395 
  396 #ifdef DEBUG
  397         if (ldebug(mmap))
  398                 printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
  399                     (void *)linux_args.addr, linux_args.len, linux_args.prot,
  400                     linux_args.flags, linux_args.fd, linux_args.pgoff);
  401 #endif
  402 
  403         return (linux_mmap_common(td, linux_args.addr, linux_args.len,
  404             linux_args.prot, linux_args.flags, linux_args.fd,
  405             (uint32_t)linux_args.pgoff));
  406 }
  407 
  408 static int
  409 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
  410     l_int flags, l_int fd, l_loff_t pos)
  411 {
  412         struct proc *p = td->td_proc;
  413         struct mmap_args /* {
  414                 caddr_t addr;
  415                 size_t len;
  416                 int prot;
  417                 int flags;
  418                 int fd;
  419                 long pad;
  420                 off_t pos;
  421         } */ bsd_args;
  422         int error;
  423         struct file *fp;
  424 
  425         error = 0;
  426         bsd_args.flags = 0;
  427         fp = NULL;
  428 
  429         /*
  430          * Linux mmap(2):
  431          * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
  432          */
  433         if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
  434                 return (EINVAL);
  435 
  436         if (flags & LINUX_MAP_SHARED)
  437                 bsd_args.flags |= MAP_SHARED;
  438         if (flags & LINUX_MAP_PRIVATE)
  439                 bsd_args.flags |= MAP_PRIVATE;
  440         if (flags & LINUX_MAP_FIXED)
  441                 bsd_args.flags |= MAP_FIXED;
  442         if (flags & LINUX_MAP_ANON) {
  443                 /* Enforce pos to be on page boundary, then ignore. */
  444                 if ((pos & PAGE_MASK) != 0)
  445                         return (EINVAL);
  446                 pos = 0;
  447                 bsd_args.flags |= MAP_ANON;
  448         } else
  449                 bsd_args.flags |= MAP_NOSYNC;
  450         if (flags & LINUX_MAP_GROWSDOWN)
  451                 bsd_args.flags |= MAP_STACK;
  452 
  453         /*
  454          * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
  455          * on Linux/i386. We do this to ensure maximum compatibility.
  456          * Linux/ia64 does the same in i386 emulation mode.
  457          */
  458         bsd_args.prot = prot;
  459         if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
  460                 bsd_args.prot |= PROT_READ | PROT_EXEC;
  461 
  462         /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
  463         bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
  464         if (bsd_args.fd != -1) {
  465                 /*
  466                  * Linux follows Solaris mmap(2) description:
  467                  * The file descriptor fildes is opened with
  468                  * read permission, regardless of the
  469                  * protection options specified.
  470                  */
  471 
  472                 if ((error = fget(td, bsd_args.fd, &fp)) != 0)
  473                         return (error);
  474                 if (fp->f_type != DTYPE_VNODE) {
  475                         fdrop(fp, td);
  476                         return (EINVAL);
  477                 }
  478 
  479                 /* Linux mmap() just fails for O_WRONLY files */
  480                 if (!(fp->f_flag & FREAD)) {
  481                         fdrop(fp, td);
  482                         return (EACCES);
  483                 }
  484 
  485                 fdrop(fp, td);
  486         }
  487 
  488         if (flags & LINUX_MAP_GROWSDOWN) {
  489                 /* 
  490                  * The Linux MAP_GROWSDOWN option does not limit auto
  491                  * growth of the region.  Linux mmap with this option
  492                  * takes as addr the inital BOS, and as len, the initial
  493                  * region size.  It can then grow down from addr without
  494                  * limit.  However, linux threads has an implicit internal
  495                  * limit to stack size of STACK_SIZE.  Its just not
  496                  * enforced explicitly in linux.  But, here we impose
  497                  * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
  498                  * region, since we can do this with our mmap.
  499                  *
  500                  * Our mmap with MAP_STACK takes addr as the maximum
  501                  * downsize limit on BOS, and as len the max size of
  502                  * the region.  It them maps the top SGROWSIZ bytes,
  503                  * and auto grows the region down, up to the limit
  504                  * in addr.
  505                  *
  506                  * If we don't use the MAP_STACK option, the effect
  507                  * of this code is to allocate a stack region of a
  508                  * fixed size of (STACK_SIZE - GUARD_SIZE).
  509                  */
  510 
  511                 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
  512                         /* 
  513                          * Some linux apps will attempt to mmap
  514                          * thread stacks near the top of their
  515                          * address space.  If their TOS is greater
  516                          * than vm_maxsaddr, vm_map_growstack()
  517                          * will confuse the thread stack with the
  518                          * process stack and deliver a SEGV if they
  519                          * attempt to grow the thread stack past their
  520                          * current stacksize rlimit.  To avoid this,
  521                          * adjust vm_maxsaddr upwards to reflect
  522                          * the current stacksize rlimit rather
  523                          * than the maximum possible stacksize.
  524                          * It would be better to adjust the
  525                          * mmap'ed region, but some apps do not check
  526                          * mmap's return value.
  527                          */
  528                         PROC_LOCK(p);
  529                         p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
  530                             lim_cur(p, RLIMIT_STACK);
  531                         PROC_UNLOCK(p);
  532                 }
  533 
  534                 /*
  535                  * This gives us our maximum stack size and a new BOS.
  536                  * If we're using VM_STACK, then mmap will just map
  537                  * the top SGROWSIZ bytes, and let the stack grow down
  538                  * to the limit at BOS.  If we're not using VM_STACK
  539                  * we map the full stack, since we don't have a way
  540                  * to autogrow it.
  541                  */
  542                 if (len > STACK_SIZE - GUARD_SIZE) {
  543                         bsd_args.addr = (caddr_t)PTRIN(addr);
  544                         bsd_args.len = len;
  545                 } else {
  546                         bsd_args.addr = (caddr_t)PTRIN(addr) -
  547                             (STACK_SIZE - GUARD_SIZE - len);
  548                         bsd_args.len = STACK_SIZE - GUARD_SIZE;
  549                 }
  550         } else {
  551                 bsd_args.addr = (caddr_t)PTRIN(addr);
  552                 bsd_args.len  = len;
  553         }
  554         bsd_args.pos = pos;
  555 
  556 #ifdef DEBUG
  557         if (ldebug(mmap))
  558                 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
  559                     __func__,
  560                     (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
  561                     bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
  562 #endif
  563         error = mmap(td, &bsd_args);
  564 #ifdef DEBUG
  565         if (ldebug(mmap))
  566                 printf("-> %s() return: 0x%x (0x%08x)\n",
  567                         __func__, error, (u_int)td->td_retval[0]);
  568 #endif
  569         return (error);
  570 }
  571 
  572 int
  573 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
  574 {
  575         struct mprotect_args bsd_args;
  576 
  577         bsd_args.addr = uap->addr;
  578         bsd_args.len = uap->len;
  579         bsd_args.prot = uap->prot;
  580         if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
  581                 bsd_args.prot |= PROT_READ | PROT_EXEC;
  582         return (mprotect(td, &bsd_args));
  583 }
  584 
  585 int
  586 linux_pipe(struct thread *td, struct linux_pipe_args *args)
  587 {
  588         int error;
  589         int fildes[2];
  590 
  591 #ifdef DEBUG
  592         if (ldebug(pipe))
  593                 printf(ARGS(pipe, "*"));
  594 #endif
  595 
  596         error = kern_pipe(td, fildes);
  597         if (error)
  598                 return (error);
  599 
  600         /* XXX: Close descriptors on error. */
  601         return (copyout(fildes, args->pipefds, sizeof fildes));
  602 }
  603 
  604 int
  605 linux_ioperm(struct thread *td, struct linux_ioperm_args *args)
  606 {
  607         int error;
  608         struct i386_ioperm_args iia;
  609 
  610         iia.start = args->start;
  611         iia.length = args->length;
  612         iia.enable = args->enable;
  613         error = i386_set_ioperm(td, &iia);
  614         return (error);
  615 }
  616 
  617 int
  618 linux_iopl(struct thread *td, struct linux_iopl_args *args)
  619 {
  620         int error;
  621 
  622         if (args->level < 0 || args->level > 3)
  623                 return (EINVAL);
  624         if ((error = priv_check(td, PRIV_IO)) != 0)
  625                 return (error);
  626         if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
  627                 return (error);
  628         td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
  629             (args->level * (PSL_IOPL / 3));
  630         return (0);
  631 }
  632 
  633 int
  634 linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap)
  635 {
  636         int error;
  637         struct i386_ldt_args ldt;
  638         struct l_descriptor ld;
  639         union descriptor desc;
  640         int size, written;
  641 
  642         switch (uap->func) {
  643         case 0x00: /* read_ldt */
  644                 ldt.start = 0;
  645                 ldt.descs = uap->ptr;
  646                 ldt.num = uap->bytecount / sizeof(union descriptor);
  647                 error = i386_get_ldt(td, &ldt);
  648                 td->td_retval[0] *= sizeof(union descriptor);
  649                 break;
  650         case 0x02: /* read_default_ldt = 0 */
  651                 size = 5*sizeof(struct l_desc_struct);
  652                 if (size > uap->bytecount)
  653                         size = uap->bytecount;
  654                 for (written = error = 0; written < size && error == 0; written++)
  655                         error = subyte((char *)uap->ptr + written, 0);
  656                 td->td_retval[0] = written;
  657                 break;
  658         case 0x01: /* write_ldt */
  659         case 0x11: /* write_ldt */
  660                 if (uap->bytecount != sizeof(ld))
  661                         return (EINVAL);
  662 
  663                 error = copyin(uap->ptr, &ld, sizeof(ld));
  664                 if (error)
  665                         return (error);
  666 
  667                 ldt.start = ld.entry_number;
  668                 ldt.descs = &desc;
  669                 ldt.num = 1;
  670                 desc.sd.sd_lolimit = (ld.limit & 0x0000ffff);
  671                 desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
  672                 desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff);
  673                 desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
  674                 desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
  675                         (ld.contents << 2);
  676                 desc.sd.sd_dpl = 3;
  677                 desc.sd.sd_p = (ld.seg_not_present ^ 1);
  678                 desc.sd.sd_xx = 0;
  679                 desc.sd.sd_def32 = ld.seg_32bit;
  680                 desc.sd.sd_gran = ld.limit_in_pages;
  681                 error = i386_set_ldt(td, &ldt, &desc);
  682                 break;
  683         default:
  684                 error = ENOSYS;
  685                 break;
  686         }
  687 
  688         if (error == EOPNOTSUPP) {
  689                 printf("linux: modify_ldt needs kernel option USER_LDT\n");
  690                 error = ENOSYS;
  691         }
  692 
  693         return (error);
  694 }
  695 
  696 int
  697 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
  698 {
  699         l_osigaction_t osa;
  700         l_sigaction_t act, oact;
  701         int error;
  702 
  703 #ifdef DEBUG
  704         if (ldebug(sigaction))
  705                 printf(ARGS(sigaction, "%d, %p, %p"),
  706                     args->sig, (void *)args->nsa, (void *)args->osa);
  707 #endif
  708 
  709         if (args->nsa != NULL) {
  710                 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
  711                 if (error)
  712                         return (error);
  713                 act.lsa_handler = osa.lsa_handler;
  714                 act.lsa_flags = osa.lsa_flags;
  715                 act.lsa_restorer = osa.lsa_restorer;
  716                 LINUX_SIGEMPTYSET(act.lsa_mask);
  717                 act.lsa_mask.__bits[0] = osa.lsa_mask;
  718         }
  719 
  720         error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
  721             args->osa ? &oact : NULL);
  722 
  723         if (args->osa != NULL && !error) {
  724                 osa.lsa_handler = oact.lsa_handler;
  725                 osa.lsa_flags = oact.lsa_flags;
  726                 osa.lsa_restorer = oact.lsa_restorer;
  727                 osa.lsa_mask = oact.lsa_mask.__bits[0];
  728                 error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
  729         }
  730 
  731         return (error);
  732 }
  733 
  734 /*
  735  * Linux has two extra args, restart and oldmask.  We dont use these,
  736  * but it seems that "restart" is actually a context pointer that
  737  * enables the signal to happen with a different register set.
  738  */
  739 int
  740 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
  741 {
  742         sigset_t sigmask;
  743         l_sigset_t mask;
  744 
  745 #ifdef DEBUG
  746         if (ldebug(sigsuspend))
  747                 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
  748 #endif
  749 
  750         LINUX_SIGEMPTYSET(mask);
  751         mask.__bits[0] = args->mask;
  752         linux_to_bsd_sigset(&mask, &sigmask);
  753         return (kern_sigsuspend(td, sigmask));
  754 }
  755 
  756 int
  757 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
  758 {
  759         l_sigset_t lmask;
  760         sigset_t sigmask;
  761         int error;
  762 
  763 #ifdef DEBUG
  764         if (ldebug(rt_sigsuspend))
  765                 printf(ARGS(rt_sigsuspend, "%p, %d"),
  766                     (void *)uap->newset, uap->sigsetsize);
  767 #endif
  768 
  769         if (uap->sigsetsize != sizeof(l_sigset_t))
  770                 return (EINVAL);
  771 
  772         error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
  773         if (error)
  774                 return (error);
  775 
  776         linux_to_bsd_sigset(&lmask, &sigmask);
  777         return (kern_sigsuspend(td, sigmask));
  778 }
  779 
  780 int
  781 linux_pause(struct thread *td, struct linux_pause_args *args)
  782 {
  783         struct proc *p = td->td_proc;
  784         sigset_t sigmask;
  785 
  786 #ifdef DEBUG
  787         if (ldebug(pause))
  788                 printf(ARGS(pause, ""));
  789 #endif
  790 
  791         PROC_LOCK(p);
  792         sigmask = td->td_sigmask;
  793         PROC_UNLOCK(p);
  794         return (kern_sigsuspend(td, sigmask));
  795 }
  796 
  797 int
  798 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
  799 {
  800         stack_t ss, oss;
  801         l_stack_t lss;
  802         int error;
  803 
  804 #ifdef DEBUG
  805         if (ldebug(sigaltstack))
  806                 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
  807 #endif
  808 
  809         if (uap->uss != NULL) {
  810                 error = copyin(uap->uss, &lss, sizeof(l_stack_t));
  811                 if (error)
  812                         return (error);
  813 
  814                 ss.ss_sp = lss.ss_sp;
  815                 ss.ss_size = lss.ss_size;
  816                 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
  817         }
  818         error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
  819             (uap->uoss != NULL) ? &oss : NULL);
  820         if (!error && uap->uoss != NULL) {
  821                 lss.ss_sp = oss.ss_sp;
  822                 lss.ss_size = oss.ss_size;
  823                 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
  824                 error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
  825         }
  826 
  827         return (error);
  828 }
  829 
  830 int
  831 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
  832 {
  833         struct ftruncate_args sa;
  834 
  835 #ifdef DEBUG
  836         if (ldebug(ftruncate64))
  837                 printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
  838                     (intmax_t)args->length);
  839 #endif
  840 
  841         sa.fd = args->fd;
  842         sa.length = args->length;
  843         return ftruncate(td, &sa);
  844 }
  845 
  846 int
  847 linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args)
  848 {
  849         struct l_user_desc info;
  850         int error;
  851         int idx;
  852         int a[2];
  853         struct segment_descriptor sd;
  854 
  855         error = copyin(args->desc, &info, sizeof(struct l_user_desc));
  856         if (error)
  857                 return (error);
  858 
  859 #ifdef DEBUG
  860         if (ldebug(set_thread_area))
  861                 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"),
  862                       info.entry_number,
  863                       info.base_addr,
  864                       info.limit,
  865                       info.seg_32bit,
  866                       info.contents,
  867                       info.read_exec_only,
  868                       info.limit_in_pages,
  869                       info.seg_not_present,
  870                       info.useable);
  871 #endif
  872 
  873         idx = info.entry_number;
  874         /* 
  875          * Semantics of linux version: every thread in the system has array of
  876          * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This 
  877          * syscall loads one of the selected tls decriptors with a value and
  878          * also loads GDT descriptors 6, 7 and 8 with the content of the
  879          * per-thread descriptors.
  880          *
  881          * Semantics of fbsd version: I think we can ignore that linux has 3 
  882          * per-thread descriptors and use just the 1st one. The tls_array[]
  883          * is used only in set/get-thread_area() syscalls and for loading the
  884          * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so
  885          * we will load just one. 
  886          *
  887          * XXX: this doesn't work when a user space process tries to use more
  888          * than 1 TLS segment. Comment in the linux sources says wine might do
  889          * this.
  890          */
  891 
  892         /* 
  893          * we support just GLIBC TLS now 
  894          * we should let 3 proceed as well because we use this segment so
  895          * if code does two subsequent calls it should succeed
  896          */
  897         if (idx != 6 && idx != -1 && idx != 3)
  898                 return (EINVAL);
  899 
  900         /* 
  901          * we have to copy out the GDT entry we use
  902          * FreeBSD uses GDT entry #3 for storing %gs so load that
  903          *
  904          * XXX: what if a user space program doesn't check this value and tries
  905          * to use 6, 7 or 8? 
  906          */
  907         idx = info.entry_number = 3;
  908         error = copyout(&info, args->desc, sizeof(struct l_user_desc));
  909         if (error)
  910                 return (error);
  911 
  912         if (LINUX_LDT_empty(&info)) {
  913                 a[0] = 0;
  914                 a[1] = 0;
  915         } else {
  916                 a[0] = LINUX_LDT_entry_a(&info);
  917                 a[1] = LINUX_LDT_entry_b(&info);
  918         }
  919 
  920         memcpy(&sd, &a, sizeof(a));
  921 #ifdef DEBUG
  922         if (ldebug(set_thread_area))
  923                 printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
  924                         sd.sd_hibase,
  925                         sd.sd_lolimit,
  926                         sd.sd_hilimit,
  927                         sd.sd_type,
  928                         sd.sd_dpl,
  929                         sd.sd_p,
  930                         sd.sd_xx,
  931                         sd.sd_def32,
  932                         sd.sd_gran);
  933 #endif
  934 
  935         /* this is taken from i386 version of cpu_set_user_tls() */
  936         critical_enter();
  937         /* set %gs */
  938         td->td_pcb->pcb_gsd = sd;
  939         PCPU_GET(fsgs_gdt)[1] = sd;
  940         load_gs(GSEL(GUGS_SEL, SEL_UPL));
  941         critical_exit();
  942    
  943         return (0);
  944 }
  945 
  946 int
  947 linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args)
  948 {
  949         
  950         struct l_user_desc info;
  951         int error;
  952         int idx;
  953         struct l_desc_struct desc;
  954         struct segment_descriptor sd;
  955 
  956 #ifdef DEBUG
  957         if (ldebug(get_thread_area))
  958                 printf(ARGS(get_thread_area, "%p"), args->desc);
  959 #endif
  960 
  961         error = copyin(args->desc, &info, sizeof(struct l_user_desc));
  962         if (error)
  963                 return (error);
  964 
  965         idx = info.entry_number;
  966         /* XXX: I am not sure if we want 3 to be allowed too. */
  967         if (idx != 6 && idx != 3)
  968                 return (EINVAL);
  969 
  970         idx = 3;
  971 
  972         memset(&info, 0, sizeof(info));
  973 
  974         sd = PCPU_GET(fsgs_gdt)[1];
  975 
  976         memcpy(&desc, &sd, sizeof(desc));
  977 
  978         info.entry_number = idx;
  979         info.base_addr = LINUX_GET_BASE(&desc);
  980         info.limit = LINUX_GET_LIMIT(&desc);
  981         info.seg_32bit = LINUX_GET_32BIT(&desc);
  982         info.contents = LINUX_GET_CONTENTS(&desc);
  983         info.read_exec_only = !LINUX_GET_WRITABLE(&desc);
  984         info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc);
  985         info.seg_not_present = !LINUX_GET_PRESENT(&desc);
  986         info.useable = LINUX_GET_USEABLE(&desc);
  987 
  988         error = copyout(&info, args->desc, sizeof(struct l_user_desc));
  989         if (error)
  990                 return (EFAULT);
  991 
  992         return (0);
  993 }
  994 
  995 /* copied from kern/kern_time.c */
  996 int
  997 linux_timer_create(struct thread *td, struct linux_timer_create_args *args)
  998 {
  999         return ktimer_create(td, (struct ktimer_create_args *) args);
 1000 }
 1001 
 1002 int
 1003 linux_timer_settime(struct thread *td, struct linux_timer_settime_args *args)
 1004 {
 1005         return ktimer_settime(td, (struct ktimer_settime_args *) args);
 1006 }
 1007 
 1008 int
 1009 linux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args)
 1010 {
 1011         return ktimer_gettime(td, (struct ktimer_gettime_args *) args);
 1012 }
 1013 
 1014 int
 1015 linux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args)
 1016 {
 1017         return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args);
 1018 }
 1019 
 1020 int
 1021 linux_timer_delete(struct thread *td, struct linux_timer_delete_args *args)
 1022 {
 1023         return ktimer_delete(td, (struct ktimer_delete_args *) args);
 1024 }
 1025 
 1026 /* XXX: this wont work with module - convert it */
 1027 int
 1028 linux_mq_open(struct thread *td, struct linux_mq_open_args *args)
 1029 {
 1030 #ifdef P1003_1B_MQUEUE
 1031         return kmq_open(td, (struct kmq_open_args *) args);
 1032 #else
 1033         return (ENOSYS);
 1034 #endif
 1035 }
 1036 
 1037 int
 1038 linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args)
 1039 {
 1040 #ifdef P1003_1B_MQUEUE
 1041         return kmq_unlink(td, (struct kmq_unlink_args *) args);
 1042 #else
 1043         return (ENOSYS);
 1044 #endif
 1045 }
 1046 
 1047 int
 1048 linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args)
 1049 {
 1050 #ifdef P1003_1B_MQUEUE
 1051         return kmq_timedsend(td, (struct kmq_timedsend_args *) args);
 1052 #else
 1053         return (ENOSYS);
 1054 #endif
 1055 }
 1056 
 1057 int
 1058 linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args)
 1059 {
 1060 #ifdef P1003_1B_MQUEUE
 1061         return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args);
 1062 #else
 1063         return (ENOSYS);
 1064 #endif
 1065 }
 1066 
 1067 int
 1068 linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args)
 1069 {
 1070 #ifdef P1003_1B_MQUEUE
 1071         return kmq_notify(td, (struct kmq_notify_args *) args);
 1072 #else
 1073         return (ENOSYS);
 1074 #endif
 1075 }
 1076 
 1077 int
 1078 linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args)
 1079 {
 1080 #ifdef P1003_1B_MQUEUE
 1081         return kmq_setattr(td, (struct kmq_setattr_args *) args);
 1082 #else
 1083         return (ENOSYS);
 1084 #endif
 1085 }
 1086 
 1087 int
 1088 linux_wait4(struct thread *td, struct linux_wait4_args *args)
 1089 {
 1090         int error, options;
 1091         struct rusage ru, *rup;
 1092 
 1093 #ifdef DEBUG
 1094         if (ldebug(wait4))
 1095                 printf(ARGS(wait4, "%d, %p, %d, %p"),
 1096                     args->pid, (void *)args->status, args->options,
 1097                     (void *)args->rusage);
 1098 #endif
 1099 
 1100         options = (args->options & (WNOHANG | WUNTRACED));
 1101         /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
 1102         if (args->options & __WCLONE)
 1103                 options |= WLINUXCLONE;
 1104 
 1105         if (args->rusage != NULL)
 1106                 rup = &ru;
 1107         else
 1108                 rup = NULL;
 1109         error = linux_common_wait(td, args->pid, args->status, options, rup);
 1110         if (error)
 1111                 return (error);
 1112         if (args->rusage != NULL)
 1113                 error = copyout(&ru, args->rusage, sizeof(ru));
 1114 
 1115         return (error);
 1116 }

Cache object: a1ebffeed4369514577c73ce734ca4d0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.