linux_misc.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 2002 Doug Rabson
    5  * Copyright (c) 1994-1995 Søren Schmidt
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer
   13  *    in this position and unchanged.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. The name of the author may not be used to endorse or promote products
   18  *    derived from this software without specific prior written permission
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_compat.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/blist.h>
   39 #include <sys/fcntl.h>
   40 #if defined(__i386__)
   41 #include <sys/imgact_aout.h>
   42 #endif
   43 #include <sys/jail.h>
   44 #include <sys/kernel.h>
   45 #include <sys/limits.h>
   46 #include <sys/lock.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mman.h>
   49 #include <sys/mount.h>
   50 #include <sys/msgbuf.h>
   51 #include <sys/mutex.h>
   52 #include <sys/namei.h>
   53 #include <sys/priv.h>
   54 #include <sys/proc.h>
   55 #include <sys/procctl.h>
   56 #include <sys/reboot.h>
   57 #include <sys/racct.h>
   58 #include <sys/random.h>
   59 #include <sys/resourcevar.h>
   60 #include <sys/sched.h>
   61 #include <sys/sdt.h>
   62 #include <sys/signalvar.h>
   63 #include <sys/stat.h>
   64 #include <sys/syscallsubr.h>
   65 #include <sys/sysctl.h>
   66 #include <sys/sysproto.h>
   67 #include <sys/systm.h>
   68 #include <sys/time.h>
   69 #include <sys/vmmeter.h>
   70 #include <sys/vnode.h>
   71 #include <sys/wait.h>
   72 #include <sys/cpuset.h>
   73 #include <sys/uio.h>
   74 
   75 #include <security/mac/mac_framework.h>
   76 
   77 #include <vm/vm.h>
   78 #include <vm/pmap.h>
   79 #include <vm/vm_kern.h>
   80 #include <vm/vm_map.h>
   81 #include <vm/vm_extern.h>
   82 #include <vm/swap_pager.h>
   83 
   84 #ifdef COMPAT_LINUX32
   85 #include <machine/../linux32/linux.h>
   86 #include <machine/../linux32/linux32_proto.h>
   87 #else
   88 #include <machine/../linux/linux.h>
   89 #include <machine/../linux/linux_proto.h>
   90 #endif
   91 
   92 #include <compat/linux/linux_dtrace.h>
   93 #include <compat/linux/linux_file.h>
   94 #include <compat/linux/linux_mib.h>
   95 #include <compat/linux/linux_signal.h>
   96 #include <compat/linux/linux_timer.h>
   97 #include <compat/linux/linux_util.h>
   98 #include <compat/linux/linux_sysproto.h>
   99 #include <compat/linux/linux_emul.h>
  100 #include <compat/linux/linux_misc.h>
  101 
  102 int stclohz;                            /* Statistics clock frequency */
  103 
  104 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
  105         RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
  106         RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
  107         RLIMIT_MEMLOCK, RLIMIT_AS
  108 };
  109 
  110 struct l_sysinfo {
  111         l_long          uptime;         /* Seconds since boot */
  112         l_ulong         loads[3];       /* 1, 5, and 15 minute load averages */
  113 #define LINUX_SYSINFO_LOADS_SCALE 65536
  114         l_ulong         totalram;       /* Total usable main memory size */
  115         l_ulong         freeram;        /* Available memory size */
  116         l_ulong         sharedram;      /* Amount of shared memory */
  117         l_ulong         bufferram;      /* Memory used by buffers */
  118         l_ulong         totalswap;      /* Total swap space size */
  119         l_ulong         freeswap;       /* swap space still available */
  120         l_ushort        procs;          /* Number of current processes */
  121         l_ushort        pads;
  122         l_ulong         totalhigh;
  123         l_ulong         freehigh;
  124         l_uint          mem_unit;
  125         char            _f[20-2*sizeof(l_long)-sizeof(l_int)];  /* padding */
  126 };
  127 
  128 struct l_pselect6arg {
  129         l_uintptr_t     ss;
  130         l_size_t        ss_len;
  131 };
  132 
  133 static int      linux_utimensat_nsec_valid(l_long);
  134 
  135 
  136 int
  137 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
  138 {
  139         struct l_sysinfo sysinfo;
  140         int i, j;
  141         struct timespec ts;
  142 
  143         bzero(&sysinfo, sizeof(sysinfo));
  144         getnanouptime(&ts);
  145         if (ts.tv_nsec != 0)
  146                 ts.tv_sec++;
  147         sysinfo.uptime = ts.tv_sec;
  148 
  149         /* Use the information from the mib to get our load averages */
  150         for (i = 0; i < 3; i++)
  151                 sysinfo.loads[i] = averunnable.ldavg[i] *
  152                     LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
  153 
  154         sysinfo.totalram = physmem * PAGE_SIZE;
  155         sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE;
  156 
  157         /*
  158          * sharedram counts pages allocated to named, swap-backed objects such
  159          * as shared memory segments and tmpfs files.  There is no cheap way to
  160          * compute this, so just leave the field unpopulated.  Linux itself only
  161          * started setting this field in the 3.x timeframe.
  162          */
  163         sysinfo.sharedram = 0;
  164         sysinfo.bufferram = 0;
  165 
  166         swap_pager_status(&i, &j);
  167         sysinfo.totalswap = i * PAGE_SIZE;
  168         sysinfo.freeswap = (i - j) * PAGE_SIZE;
  169 
  170         sysinfo.procs = nprocs;
  171 
  172         /*
  173          * Platforms supported by the emulation layer do not have a notion of
  174          * high memory.
  175          */
  176         sysinfo.totalhigh = 0;
  177         sysinfo.freehigh = 0;
  178 
  179         sysinfo.mem_unit = 1;
  180 
  181         return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
  182 }
  183 
  184 #ifdef LINUX_LEGACY_SYSCALLS
  185 int
  186 linux_alarm(struct thread *td, struct linux_alarm_args *args)
  187 {
  188         struct itimerval it, old_it;
  189         u_int secs;
  190         int error;
  191 
  192         secs = args->secs;
  193         /*
  194          * Linux alarm() is always successful. Limit secs to INT32_MAX / 2
  195          * to match kern_setitimer()'s limit to avoid error from it.
  196          *
  197          * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit
  198          * platforms.
  199          */
  200         if (secs > INT32_MAX / 2)
  201                 secs = INT32_MAX / 2;
  202 
  203         it.it_value.tv_sec = secs;
  204         it.it_value.tv_usec = 0;
  205         timevalclear(&it.it_interval);
  206         error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
  207         KASSERT(error == 0, ("kern_setitimer returns %d", error));
  208 
  209         if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) ||
  210             old_it.it_value.tv_usec >= 500000)
  211                 old_it.it_value.tv_sec++;
  212         td->td_retval[0] = old_it.it_value.tv_sec;
  213         return (0);
  214 }
  215 #endif
  216 
  217 int
  218 linux_brk(struct thread *td, struct linux_brk_args *args)
  219 {
  220         struct vmspace *vm = td->td_proc->p_vmspace;
  221         uintptr_t new, old;
  222 
  223         old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize);
  224         new = (uintptr_t)args->dsend;
  225         if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new))
  226                 td->td_retval[0] = (register_t)new;
  227         else
  228                 td->td_retval[0] = (register_t)old;
  229 
  230         return (0);
  231 }
  232 
  233 #if defined(__i386__)
  234 /* XXX: what about amd64/linux32? */
  235 
  236 int
  237 linux_uselib(struct thread *td, struct linux_uselib_args *args)
  238 {
  239         struct nameidata ni;
  240         struct vnode *vp;
  241         struct exec *a_out;
  242         vm_map_t map;
  243         vm_map_entry_t entry;
  244         struct vattr attr;
  245         vm_offset_t vmaddr;
  246         unsigned long file_offset;
  247         unsigned long bss_size;
  248         char *library;
  249         ssize_t aresid;
  250         int error;
  251         bool locked, opened, textset;
  252 
  253         LCONVPATHEXIST(td, args->library, &library);
  254 
  255         a_out = NULL;
  256         vp = NULL;
  257         locked = false;
  258         textset = false;
  259         opened = false;
  260 
  261         NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
  262             UIO_SYSSPACE, library, td);
  263         error = namei(&ni);
  264         LFREEPATH(library);
  265         if (error)
  266                 goto cleanup;
  267 
  268         vp = ni.ni_vp;
  269         NDFREE(&ni, NDF_ONLY_PNBUF);
  270 
  271         /*
  272          * From here on down, we have a locked vnode that must be unlocked.
  273          * XXX: The code below largely duplicates exec_check_permissions().
  274          */
  275         locked = true;
  276 
  277         /* Executable? */
  278         error = VOP_GETATTR(vp, &attr, td->td_ucred);
  279         if (error)
  280                 goto cleanup;
  281 
  282         if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
  283             ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
  284                 /* EACCESS is what exec(2) returns. */
  285                 error = ENOEXEC;
  286                 goto cleanup;
  287         }
  288 
  289         /* Sensible size? */
  290         if (attr.va_size == 0) {
  291                 error = ENOEXEC;
  292                 goto cleanup;
  293         }
  294 
  295         /* Can we access it? */
  296         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  297         if (error)
  298                 goto cleanup;
  299 
  300         /*
  301          * XXX: This should use vn_open() so that it is properly authorized,
  302          * and to reduce code redundancy all over the place here.
  303          * XXX: Not really, it duplicates far more of exec_check_permissions()
  304          * than vn_open().
  305          */
  306 #ifdef MAC
  307         error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
  308         if (error)
  309                 goto cleanup;
  310 #endif
  311         error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
  312         if (error)
  313                 goto cleanup;
  314         opened = true;
  315 
  316         /* Pull in executable header into exec_map */
  317         error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
  318             VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
  319         if (error)
  320                 goto cleanup;
  321 
  322         /* Is it a Linux binary ? */
  323         if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
  324                 error = ENOEXEC;
  325                 goto cleanup;
  326         }
  327 
  328         /*
  329          * While we are here, we should REALLY do some more checks
  330          */
  331 
  332         /* Set file/virtual offset based on a.out variant. */
  333         switch ((int)(a_out->a_magic & 0xffff)) {
  334         case 0413:                      /* ZMAGIC */
  335                 file_offset = 1024;
  336                 break;
  337         case 0314:                      /* QMAGIC */
  338                 file_offset = 0;
  339                 break;
  340         default:
  341                 error = ENOEXEC;
  342                 goto cleanup;
  343         }
  344 
  345         bss_size = round_page(a_out->a_bss);
  346 
  347         /* Check various fields in header for validity/bounds. */
  348         if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
  349                 error = ENOEXEC;
  350                 goto cleanup;
  351         }
  352 
  353         /* text + data can't exceed file size */
  354         if (a_out->a_data + a_out->a_text > attr.va_size) {
  355                 error = EFAULT;
  356                 goto cleanup;
  357         }
  358 
  359         /*
  360          * text/data/bss must not exceed limits
  361          * XXX - this is not complete. it should check current usage PLUS
  362          * the resources needed by this library.
  363          */
  364         PROC_LOCK(td->td_proc);
  365         if (a_out->a_text > maxtsiz ||
  366             a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) ||
  367             racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
  368             bss_size) != 0) {
  369                 PROC_UNLOCK(td->td_proc);
  370                 error = ENOMEM;
  371                 goto cleanup;
  372         }
  373         PROC_UNLOCK(td->td_proc);
  374 
  375         /*
  376          * Prevent more writers.
  377          */
  378         error = VOP_SET_TEXT(vp);
  379         if (error != 0)
  380                 goto cleanup;
  381         textset = true;
  382 
  383         /*
  384          * Lock no longer needed
  385          */
  386         locked = false;
  387         VOP_UNLOCK(vp, 0);
  388 
  389         /*
  390          * Check if file_offset page aligned. Currently we cannot handle
  391          * misalinged file offsets, and so we read in the entire image
  392          * (what a waste).
  393          */
  394         if (file_offset & PAGE_MASK) {
  395                 /* Map text+data read/write/execute */
  396 
  397                 /* a_entry is the load address and is page aligned */
  398                 vmaddr = trunc_page(a_out->a_entry);
  399 
  400                 /* get anon user mapping, read+write+execute */
  401                 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
  402                     &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
  403                     VM_PROT_ALL, VM_PROT_ALL, 0);
  404                 if (error)
  405                         goto cleanup;
  406 
  407                 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
  408                     a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
  409                     td->td_ucred, NOCRED, &aresid, td);
  410                 if (error != 0)
  411                         goto cleanup;
  412                 if (aresid != 0) {
  413                         error = ENOEXEC;
  414                         goto cleanup;
  415                 }
  416         } else {
  417                 /*
  418                  * for QMAGIC, a_entry is 20 bytes beyond the load address
  419                  * to skip the executable header
  420                  */
  421                 vmaddr = trunc_page(a_out->a_entry);
  422 
  423                 /*
  424                  * Map it all into the process's space as a single
  425                  * copy-on-write "data" segment.
  426                  */
  427                 map = &td->td_proc->p_vmspace->vm_map;
  428                 error = vm_mmap(map, &vmaddr,
  429                     a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
  430                     MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
  431                 if (error)
  432                         goto cleanup;
  433                 vm_map_lock(map);
  434                 if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
  435                         vm_map_unlock(map);
  436                         error = EDOOFUS;
  437                         goto cleanup;
  438                 }
  439                 entry->eflags |= MAP_ENTRY_VN_EXEC;
  440                 vm_map_unlock(map);
  441                 textset = false;
  442         }
  443 
  444         if (bss_size != 0) {
  445                 /* Calculate BSS start address */
  446                 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
  447                     a_out->a_data;
  448 
  449                 /* allocate some 'anon' space */
  450                 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
  451                     &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
  452                     VM_PROT_ALL, 0);
  453                 if (error)
  454                         goto cleanup;
  455         }
  456 
  457 cleanup:
  458         if (opened) {
  459                 if (locked)
  460                         VOP_UNLOCK(vp, 0);
  461                 locked = false;
  462                 VOP_CLOSE(vp, FREAD, td->td_ucred, td);
  463         }
  464         if (textset) {
  465                 if (!locked) {
  466                         locked = true;
  467                         VOP_LOCK(vp, LK_SHARED | LK_RETRY);
  468                 }
  469                 VOP_UNSET_TEXT_CHECKED(vp);
  470         }
  471         if (locked)
  472                 VOP_UNLOCK(vp, 0);
  473 
  474         /* Release the temporary mapping. */
  475         if (a_out)
  476                 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
  477 
  478         return (error);
  479 }
  480 
  481 #endif  /* __i386__ */
  482 
  483 #ifdef LINUX_LEGACY_SYSCALLS
  484 int
  485 linux_select(struct thread *td, struct linux_select_args *args)
  486 {
  487         l_timeval ltv;
  488         struct timeval tv0, tv1, utv, *tvp;
  489         int error;
  490 
  491         /*
  492          * Store current time for computation of the amount of
  493          * time left.
  494          */
  495         if (args->timeout) {
  496                 if ((error = copyin(args->timeout, &ltv, sizeof(ltv))))
  497                         goto select_out;
  498                 utv.tv_sec = ltv.tv_sec;
  499                 utv.tv_usec = ltv.tv_usec;
  500 
  501                 if (itimerfix(&utv)) {
  502                         /*
  503                          * The timeval was invalid.  Convert it to something
  504                          * valid that will act as it does under Linux.
  505                          */
  506                         utv.tv_sec += utv.tv_usec / 1000000;
  507                         utv.tv_usec %= 1000000;
  508                         if (utv.tv_usec < 0) {
  509                                 utv.tv_sec -= 1;
  510                                 utv.tv_usec += 1000000;
  511                         }
  512                         if (utv.tv_sec < 0)
  513                                 timevalclear(&utv);
  514                 }
  515                 microtime(&tv0);
  516                 tvp = &utv;
  517         } else
  518                 tvp = NULL;
  519 
  520         error = kern_select(td, args->nfds, args->readfds, args->writefds,
  521             args->exceptfds, tvp, LINUX_NFDBITS);
  522         if (error)
  523                 goto select_out;
  524 
  525         if (args->timeout) {
  526                 if (td->td_retval[0]) {
  527                         /*
  528                          * Compute how much time was left of the timeout,
  529                          * by subtracting the current time and the time
  530                          * before we started the call, and subtracting
  531                          * that result from the user-supplied value.
  532                          */
  533                         microtime(&tv1);
  534                         timevalsub(&tv1, &tv0);
  535                         timevalsub(&utv, &tv1);
  536                         if (utv.tv_sec < 0)
  537                                 timevalclear(&utv);
  538                 } else
  539                         timevalclear(&utv);
  540                 ltv.tv_sec = utv.tv_sec;
  541                 ltv.tv_usec = utv.tv_usec;
  542                 if ((error = copyout(&ltv, args->timeout, sizeof(ltv))))
  543                         goto select_out;
  544         }
  545 
  546 select_out:
  547         return (error);
  548 }
  549 #endif
  550 
  551 int
  552 linux_mremap(struct thread *td, struct linux_mremap_args *args)
  553 {
  554         uintptr_t addr;
  555         size_t len;
  556         int error = 0;
  557 
  558         if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
  559                 td->td_retval[0] = 0;
  560                 return (EINVAL);
  561         }
  562 
  563         /*
  564          * Check for the page alignment.
  565          * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
  566          */
  567         if (args->addr & PAGE_MASK) {
  568                 td->td_retval[0] = 0;
  569                 return (EINVAL);
  570         }
  571 
  572         args->new_len = round_page(args->new_len);
  573         args->old_len = round_page(args->old_len);
  574 
  575         if (args->new_len > args->old_len) {
  576                 td->td_retval[0] = 0;
  577                 return (ENOMEM);
  578         }
  579 
  580         if (args->new_len < args->old_len) {
  581                 addr = args->addr + args->new_len;
  582                 len = args->old_len - args->new_len;
  583                 error = kern_munmap(td, addr, len);
  584         }
  585 
  586         td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
  587         return (error);
  588 }
  589 
  590 #define LINUX_MS_ASYNC       0x0001
  591 #define LINUX_MS_INVALIDATE  0x0002
  592 #define LINUX_MS_SYNC        0x0004
  593 
  594 int
  595 linux_msync(struct thread *td, struct linux_msync_args *args)
  596 {
  597 
  598         return (kern_msync(td, args->addr, args->len,
  599             args->fl & ~LINUX_MS_SYNC));
  600 }
  601 
  602 #ifdef LINUX_LEGACY_SYSCALLS
  603 int
  604 linux_time(struct thread *td, struct linux_time_args *args)
  605 {
  606         struct timeval tv;
  607         l_time_t tm;
  608         int error;
  609 
  610         microtime(&tv);
  611         tm = tv.tv_sec;
  612         if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
  613                 return (error);
  614         td->td_retval[0] = tm;
  615         return (0);
  616 }
  617 #endif
  618 
  619 struct l_times_argv {
  620         l_clock_t       tms_utime;
  621         l_clock_t       tms_stime;
  622         l_clock_t       tms_cutime;
  623         l_clock_t       tms_cstime;
  624 };
  625 
  626 
  627 /*
  628  * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
  629  * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
  630  * auxiliary vector entry.
  631  */
  632 #define CLK_TCK         100
  633 
  634 #define CONVOTCK(r)     (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
  635 #define CONVNTCK(r)     (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
  636 
  637 #define CONVTCK(r)      (linux_kernver(td) >= LINUX_KERNVER_2004000 ?           \
  638                             CONVNTCK(r) : CONVOTCK(r))
  639 
  640 int
  641 linux_times(struct thread *td, struct linux_times_args *args)
  642 {
  643         struct timeval tv, utime, stime, cutime, cstime;
  644         struct l_times_argv tms;
  645         struct proc *p;
  646         int error;
  647 
  648         if (args->buf != NULL) {
  649                 p = td->td_proc;
  650                 PROC_LOCK(p);
  651                 PROC_STATLOCK(p);
  652                 calcru(p, &utime, &stime);
  653                 PROC_STATUNLOCK(p);
  654                 calccru(p, &cutime, &cstime);
  655                 PROC_UNLOCK(p);
  656 
  657                 tms.tms_utime = CONVTCK(utime);
  658                 tms.tms_stime = CONVTCK(stime);
  659 
  660                 tms.tms_cutime = CONVTCK(cutime);
  661                 tms.tms_cstime = CONVTCK(cstime);
  662 
  663                 if ((error = copyout(&tms, args->buf, sizeof(tms))))
  664                         return (error);
  665         }
  666 
  667         microuptime(&tv);
  668         td->td_retval[0] = (int)CONVTCK(tv);
  669         return (0);
  670 }
  671 
  672 int
  673 linux_newuname(struct thread *td, struct linux_newuname_args *args)
  674 {
  675         struct l_new_utsname utsname;
  676         char osname[LINUX_MAX_UTSNAME];
  677         char osrelease[LINUX_MAX_UTSNAME];
  678         char *p;
  679 
  680         linux_get_osname(td, osname);
  681         linux_get_osrelease(td, osrelease);
  682 
  683         bzero(&utsname, sizeof(utsname));
  684         strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
  685         getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
  686         getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
  687         strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
  688         strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
  689         for (p = utsname.version; *p != '\0'; ++p)
  690                 if (*p == '\n') {
  691                         *p = '\0';
  692                         break;
  693                 }
  694 #if defined(__amd64__)
  695         /*
  696          * On amd64, Linux uname(2) needs to return "x86_64"
  697          * for both 64-bit and 32-bit applications.  On 32-bit,
  698          * the string returned by getauxval(AT_PLATFORM) needs
  699          * to remain "i686", though.
  700          */
  701         strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
  702 #else
  703         strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME);
  704 #endif
  705 
  706         return (copyout(&utsname, args->buf, sizeof(utsname)));
  707 }
  708 
  709 struct l_utimbuf {
  710         l_time_t l_actime;
  711         l_time_t l_modtime;
  712 };
  713 
  714 #ifdef LINUX_LEGACY_SYSCALLS
  715 int
  716 linux_utime(struct thread *td, struct linux_utime_args *args)
  717 {
  718         struct timeval tv[2], *tvp;
  719         struct l_utimbuf lut;
  720         char *fname;
  721         int error;
  722 
  723         LCONVPATHEXIST(td, args->fname, &fname);
  724 
  725         if (args->times) {
  726                 if ((error = copyin(args->times, &lut, sizeof lut))) {
  727                         LFREEPATH(fname);
  728                         return (error);
  729                 }
  730                 tv[0].tv_sec = lut.l_actime;
  731                 tv[0].tv_usec = 0;
  732                 tv[1].tv_sec = lut.l_modtime;
  733                 tv[1].tv_usec = 0;
  734                 tvp = tv;
  735         } else
  736                 tvp = NULL;
  737 
  738         error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp,
  739             UIO_SYSSPACE);
  740         LFREEPATH(fname);
  741         return (error);
  742 }
  743 #endif
  744 
  745 #ifdef LINUX_LEGACY_SYSCALLS
  746 int
  747 linux_utimes(struct thread *td, struct linux_utimes_args *args)
  748 {
  749         l_timeval ltv[2];
  750         struct timeval tv[2], *tvp = NULL;
  751         char *fname;
  752         int error;
  753 
  754         LCONVPATHEXIST(td, args->fname, &fname);
  755 
  756         if (args->tptr != NULL) {
  757                 if ((error = copyin(args->tptr, ltv, sizeof ltv))) {
  758                         LFREEPATH(fname);
  759                         return (error);
  760                 }
  761                 tv[0].tv_sec = ltv[0].tv_sec;
  762                 tv[0].tv_usec = ltv[0].tv_usec;
  763                 tv[1].tv_sec = ltv[1].tv_sec;
  764                 tv[1].tv_usec = ltv[1].tv_usec;
  765                 tvp = tv;
  766         }
  767 
  768         error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE,
  769             tvp, UIO_SYSSPACE);
  770         LFREEPATH(fname);
  771         return (error);
  772 }
  773 #endif
  774 
  775 static int
  776 linux_utimensat_nsec_valid(l_long nsec)
  777 {
  778 
  779         if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW)
  780                 return (0);
  781         if (nsec >= 0 && nsec <= 999999999)
  782                 return (0);
  783         return (1);
  784 }
  785 
  786 int
  787 linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
  788 {
  789         struct l_timespec l_times[2];
  790         struct timespec times[2], *timesp = NULL;
  791         char *path = NULL;
  792         int error, dfd, flags = 0;
  793 
  794         dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
  795 
  796         if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW)
  797                 return (EINVAL);
  798 
  799         if (args->times != NULL) {
  800                 error = copyin(args->times, l_times, sizeof(l_times));
  801                 if (error != 0)
  802                         return (error);
  803 
  804                 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 ||
  805                     linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0)
  806                         return (EINVAL);
  807 
  808                 times[0].tv_sec = l_times[0].tv_sec;
  809                 switch (l_times[0].tv_nsec)
  810                 {
  811                 case LINUX_UTIME_OMIT:
  812                         times[0].tv_nsec = UTIME_OMIT;
  813                         break;
  814                 case LINUX_UTIME_NOW:
  815                         times[0].tv_nsec = UTIME_NOW;
  816                         break;
  817                 default:
  818                         times[0].tv_nsec = l_times[0].tv_nsec;
  819                 }
  820 
  821                 times[1].tv_sec = l_times[1].tv_sec;
  822                 switch (l_times[1].tv_nsec)
  823                 {
  824                 case LINUX_UTIME_OMIT:
  825                         times[1].tv_nsec = UTIME_OMIT;
  826                         break;
  827                 case LINUX_UTIME_NOW:
  828                         times[1].tv_nsec = UTIME_NOW;
  829                         break;
  830                 default:
  831                         times[1].tv_nsec = l_times[1].tv_nsec;
  832                         break;
  833                 }
  834                 timesp = times;
  835 
  836                 /* This breaks POSIX, but is what the Linux kernel does
  837                  * _on purpose_ (documented in the man page for utimensat(2)),
  838                  * so we must follow that behaviour. */
  839                 if (times[0].tv_nsec == UTIME_OMIT &&
  840                     times[1].tv_nsec == UTIME_OMIT)
  841                         return (0);
  842         }
  843 
  844         if (args->pathname != NULL)
  845                 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
  846         else if (args->flags != 0)
  847                 return (EINVAL);
  848 
  849         if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW)
  850                 flags |= AT_SYMLINK_NOFOLLOW;
  851 
  852         if (path == NULL)
  853                 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE);
  854         else {
  855                 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp,
  856                         UIO_SYSSPACE, flags);
  857                 LFREEPATH(path);
  858         }
  859 
  860         return (error);
  861 }
  862 
  863 #ifdef LINUX_LEGACY_SYSCALLS
  864 int
  865 linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
  866 {
  867         l_timeval ltv[2];
  868         struct timeval tv[2], *tvp = NULL;
  869         char *fname;
  870         int error, dfd;
  871 
  872         dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
  873         LCONVPATHEXIST_AT(td, args->filename, &fname, dfd);
  874 
  875         if (args->utimes != NULL) {
  876                 if ((error = copyin(args->utimes, ltv, sizeof ltv))) {
  877                         LFREEPATH(fname);
  878                         return (error);
  879                 }
  880                 tv[0].tv_sec = ltv[0].tv_sec;
  881                 tv[0].tv_usec = ltv[0].tv_usec;
  882                 tv[1].tv_sec = ltv[1].tv_sec;
  883                 tv[1].tv_usec = ltv[1].tv_usec;
  884                 tvp = tv;
  885         }
  886 
  887         error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE);
  888         LFREEPATH(fname);
  889         return (error);
  890 }
  891 #endif
  892 
  893 static int
  894 linux_common_wait(struct thread *td, int pid, int *statusp,
  895     int options, struct __wrusage *wrup)
  896 {
  897         siginfo_t siginfo;
  898         idtype_t idtype;
  899         id_t id;
  900         int error, status, tmpstat;
  901 
  902         if (pid == WAIT_ANY) {
  903                 idtype = P_ALL;
  904                 id = 0;
  905         } else if (pid < 0) {
  906                 idtype = P_PGID;
  907                 id = (id_t)-pid;
  908         } else {
  909                 idtype = P_PID;
  910                 id = (id_t)pid;
  911         }
  912 
  913         /*
  914          * For backward compatibility we implicitly add flags WEXITED
  915          * and WTRAPPED here.
  916          */
  917         options |= WEXITED | WTRAPPED;
  918         error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo);
  919         if (error)
  920                 return (error);
  921 
  922         if (statusp) {
  923                 tmpstat = status & 0xffff;
  924                 if (WIFSIGNALED(tmpstat)) {
  925                         tmpstat = (tmpstat & 0xffffff80) |
  926                             bsd_to_linux_signal(WTERMSIG(tmpstat));
  927                 } else if (WIFSTOPPED(tmpstat)) {
  928                         tmpstat = (tmpstat & 0xffff00ff) |
  929                             (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
  930 #if defined(__amd64__) && !defined(COMPAT_LINUX32)
  931                         if (WSTOPSIG(status) == SIGTRAP) {
  932                                 tmpstat = linux_ptrace_status(td,
  933                                     siginfo.si_pid, tmpstat);
  934                         }
  935 #endif
  936                 } else if (WIFCONTINUED(tmpstat)) {
  937                         tmpstat = 0xffff;
  938                 }
  939                 error = copyout(&tmpstat, statusp, sizeof(int));
  940         }
  941 
  942         return (error);
  943 }
  944 
  945 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  946 int
  947 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
  948 {
  949         struct linux_wait4_args wait4_args;
  950 
  951         wait4_args.pid = args->pid;
  952         wait4_args.status = args->status;
  953         wait4_args.options = args->options;
  954         wait4_args.rusage = NULL;
  955 
  956         return (linux_wait4(td, &wait4_args));
  957 }
  958 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
  959 
  960 int
  961 linux_wait4(struct thread *td, struct linux_wait4_args *args)
  962 {
  963         int error, options;
  964         struct __wrusage wru, *wrup;
  965 
  966         if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
  967             LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
  968                 return (EINVAL);
  969 
  970         options = WEXITED;
  971         linux_to_bsd_waitopts(args->options, &options);
  972 
  973         if (args->rusage != NULL)
  974                 wrup = &wru;
  975         else
  976                 wrup = NULL;
  977         error = linux_common_wait(td, args->pid, args->status, options, wrup);
  978         if (error != 0)
  979                 return (error);
  980         if (args->rusage != NULL)
  981                 error = linux_copyout_rusage(&wru.wru_self, args->rusage);
  982         return (error);
  983 }
  984 
  985 int
  986 linux_waitid(struct thread *td, struct linux_waitid_args *args)
  987 {
  988         int status, options, sig;
  989         struct __wrusage wru;
  990         siginfo_t siginfo;
  991         l_siginfo_t lsi;
  992         idtype_t idtype;
  993         struct proc *p;
  994         int error;
  995 
  996         options = 0;
  997         linux_to_bsd_waitopts(args->options, &options);
  998 
  999         if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED))
 1000                 return (EINVAL);
 1001         if (!(options & (WEXITED | WUNTRACED | WCONTINUED)))
 1002                 return (EINVAL);
 1003 
 1004         switch (args->idtype) {
 1005         case LINUX_P_ALL:
 1006                 idtype = P_ALL;
 1007                 break;
 1008         case LINUX_P_PID:
 1009                 if (args->id <= 0)
 1010                         return (EINVAL);
 1011                 idtype = P_PID;
 1012                 break;
 1013         case LINUX_P_PGID:
 1014                 if (args->id <= 0)
 1015                         return (EINVAL);
 1016                 idtype = P_PGID;
 1017                 break;
 1018         default:
 1019                 return (EINVAL);
 1020         }
 1021 
 1022         error = kern_wait6(td, idtype, args->id, &status, options,
 1023             &wru, &siginfo);
 1024         if (error != 0)
 1025                 return (error);
 1026         if (args->rusage != NULL) {
 1027                 error = linux_copyout_rusage(&wru.wru_children,
 1028                     args->rusage);
 1029                 if (error != 0)
 1030                         return (error);
 1031         }
 1032         if (args->info != NULL) {
 1033                 p = td->td_proc;
 1034                 bzero(&lsi, sizeof(lsi));
 1035                 if (td->td_retval[0] != 0) {
 1036                         sig = bsd_to_linux_signal(siginfo.si_signo);
 1037                         siginfo_to_lsiginfo(&siginfo, &lsi, sig);
 1038                 }
 1039                 error = copyout(&lsi, args->info, sizeof(lsi));
 1040         }
 1041         td->td_retval[0] = 0;
 1042 
 1043         return (error);
 1044 }
 1045 
 1046 #ifdef LINUX_LEGACY_SYSCALLS
 1047 int
 1048 linux_mknod(struct thread *td, struct linux_mknod_args *args)
 1049 {
 1050         char *path;
 1051         int error;
 1052 
 1053         LCONVPATHCREAT(td, args->path, &path);
 1054 
 1055         switch (args->mode & S_IFMT) {
 1056         case S_IFIFO:
 1057         case S_IFSOCK:
 1058                 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE,
 1059                     args->mode);
 1060                 break;
 1061 
 1062         case S_IFCHR:
 1063         case S_IFBLK:
 1064                 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE,
 1065                     args->mode, args->dev);
 1066                 break;
 1067 
 1068         case S_IFDIR:
 1069                 error = EPERM;
 1070                 break;
 1071 
 1072         case 0:
 1073                 args->mode |= S_IFREG;
 1074                 /* FALLTHROUGH */
 1075         case S_IFREG:
 1076                 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
 1077                     O_WRONLY | O_CREAT | O_TRUNC, args->mode);
 1078                 if (error == 0)
 1079                         kern_close(td, td->td_retval[0]);
 1080                 break;
 1081 
 1082         default:
 1083                 error = EINVAL;
 1084                 break;
 1085         }
 1086         LFREEPATH(path);
 1087         return (error);
 1088 }
 1089 #endif
 1090 
 1091 int
 1092 linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
 1093 {
 1094         char *path;
 1095         int error, dfd;
 1096 
 1097         dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 1098         LCONVPATHCREAT_AT(td, args->filename, &path, dfd);
 1099 
 1100         switch (args->mode & S_IFMT) {
 1101         case S_IFIFO:
 1102         case S_IFSOCK:
 1103                 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode);
 1104                 break;
 1105 
 1106         case S_IFCHR:
 1107         case S_IFBLK:
 1108                 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode,
 1109                     args->dev);
 1110                 break;
 1111 
 1112         case S_IFDIR:
 1113                 error = EPERM;
 1114                 break;
 1115 
 1116         case 0:
 1117                 args->mode |= S_IFREG;
 1118                 /* FALLTHROUGH */
 1119         case S_IFREG:
 1120                 error = kern_openat(td, dfd, path, UIO_SYSSPACE,
 1121                     O_WRONLY | O_CREAT | O_TRUNC, args->mode);
 1122                 if (error == 0)
 1123                         kern_close(td, td->td_retval[0]);
 1124                 break;
 1125 
 1126         default:
 1127                 error = EINVAL;
 1128                 break;
 1129         }
 1130         LFREEPATH(path);
 1131         return (error);
 1132 }
 1133 
 1134 /*
 1135  * UGH! This is just about the dumbest idea I've ever heard!!
 1136  */
 1137 int
 1138 linux_personality(struct thread *td, struct linux_personality_args *args)
 1139 {
 1140         struct linux_pemuldata *pem;
 1141         struct proc *p = td->td_proc;
 1142         uint32_t old;
 1143 
 1144         PROC_LOCK(p);
 1145         pem = pem_find(p);
 1146         old = pem->persona;
 1147         if (args->per != 0xffffffff)
 1148                 pem->persona = args->per;
 1149         PROC_UNLOCK(p);
 1150 
 1151         td->td_retval[0] = old;
 1152         return (0);
 1153 }
 1154 
 1155 struct l_itimerval {
 1156         l_timeval it_interval;
 1157         l_timeval it_value;
 1158 };
 1159 
 1160 #define B2L_ITIMERVAL(bip, lip)                                         \
 1161         (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec;          \
 1162         (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec;        \
 1163         (bip)->it_value.tv_sec = (lip)->it_value.tv_sec;                \
 1164         (bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
 1165 
 1166 int
 1167 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
 1168 {
 1169         int error;
 1170         struct l_itimerval ls;
 1171         struct itimerval aitv, oitv;
 1172 
 1173         if (uap->itv == NULL) {
 1174                 uap->itv = uap->oitv;
 1175                 return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
 1176         }
 1177 
 1178         error = copyin(uap->itv, &ls, sizeof(ls));
 1179         if (error != 0)
 1180                 return (error);
 1181         B2L_ITIMERVAL(&aitv, &ls);
 1182         error = kern_setitimer(td, uap->which, &aitv, &oitv);
 1183         if (error != 0 || uap->oitv == NULL)
 1184                 return (error);
 1185         B2L_ITIMERVAL(&ls, &oitv);
 1186 
 1187         return (copyout(&ls, uap->oitv, sizeof(ls)));
 1188 }
 1189 
 1190 int
 1191 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
 1192 {
 1193         int error;
 1194         struct l_itimerval ls;
 1195         struct itimerval aitv;
 1196 
 1197         error = kern_getitimer(td, uap->which, &aitv);
 1198         if (error != 0)
 1199                 return (error);
 1200         B2L_ITIMERVAL(&ls, &aitv);
 1201         return (copyout(&ls, uap->itv, sizeof(ls)));
 1202 }
 1203 
 1204 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 1205 int
 1206 linux_nice(struct thread *td, struct linux_nice_args *args)
 1207 {
 1208         struct setpriority_args bsd_args;
 1209 
 1210         bsd_args.which = PRIO_PROCESS;
 1211         bsd_args.who = 0;               /* current process */
 1212         bsd_args.prio = args->inc;
 1213         return (sys_setpriority(td, &bsd_args));
 1214 }
 1215 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 1216 
 1217 int
 1218 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
 1219 {
 1220         struct ucred *newcred, *oldcred;
 1221         l_gid_t *linux_gidset;
 1222         gid_t *bsd_gidset;
 1223         int ngrp, error;
 1224         struct proc *p;
 1225 
 1226         ngrp = args->gidsetsize;
 1227         if (ngrp < 0 || ngrp >= ngroups_max + 1)
 1228                 return (EINVAL);
 1229         linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
 1230         error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
 1231         if (error)
 1232                 goto out;
 1233         newcred = crget();
 1234         crextend(newcred, ngrp + 1);
 1235         p = td->td_proc;
 1236         PROC_LOCK(p);
 1237         oldcred = p->p_ucred;
 1238         crcopy(newcred, oldcred);
 1239 
 1240         /*
 1241          * cr_groups[0] holds egid. Setting the whole set from
 1242          * the supplied set will cause egid to be changed too.
 1243          * Keep cr_groups[0] unchanged to prevent that.
 1244          */
 1245 
 1246         if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) {
 1247                 PROC_UNLOCK(p);
 1248                 crfree(newcred);
 1249                 goto out;
 1250         }
 1251 
 1252         if (ngrp > 0) {
 1253                 newcred->cr_ngroups = ngrp + 1;
 1254 
 1255                 bsd_gidset = newcred->cr_groups;
 1256                 ngrp--;
 1257                 while (ngrp >= 0) {
 1258                         bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
 1259                         ngrp--;
 1260                 }
 1261         } else
 1262                 newcred->cr_ngroups = 1;
 1263 
 1264         setsugid(p);
 1265         proc_set_cred(p, newcred);
 1266         PROC_UNLOCK(p);
 1267         crfree(oldcred);
 1268         error = 0;
 1269 out:
 1270         free(linux_gidset, M_LINUX);
 1271         return (error);
 1272 }
 1273 
 1274 int
 1275 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
 1276 {
 1277         struct ucred *cred;
 1278         l_gid_t *linux_gidset;
 1279         gid_t *bsd_gidset;
 1280         int bsd_gidsetsz, ngrp, error;
 1281 
 1282         cred = td->td_ucred;
 1283         bsd_gidset = cred->cr_groups;
 1284         bsd_gidsetsz = cred->cr_ngroups - 1;
 1285 
 1286         /*
 1287          * cr_groups[0] holds egid. Returning the whole set
 1288          * here will cause a duplicate. Exclude cr_groups[0]
 1289          * to prevent that.
 1290          */
 1291 
 1292         if ((ngrp = args->gidsetsize) == 0) {
 1293                 td->td_retval[0] = bsd_gidsetsz;
 1294                 return (0);
 1295         }
 1296 
 1297         if (ngrp < bsd_gidsetsz)
 1298                 return (EINVAL);
 1299 
 1300         ngrp = 0;
 1301         linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
 1302             M_LINUX, M_WAITOK);
 1303         while (ngrp < bsd_gidsetsz) {
 1304                 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
 1305                 ngrp++;
 1306         }
 1307 
 1308         error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
 1309         free(linux_gidset, M_LINUX);
 1310         if (error)
 1311                 return (error);
 1312 
 1313         td->td_retval[0] = ngrp;
 1314         return (0);
 1315 }
 1316 
 1317 int
 1318 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
 1319 {
 1320         struct rlimit bsd_rlim;
 1321         struct l_rlimit rlim;
 1322         u_int which;
 1323         int error;
 1324 
 1325         if (args->resource >= LINUX_RLIM_NLIMITS)
 1326                 return (EINVAL);
 1327 
 1328         which = linux_to_bsd_resource[args->resource];
 1329         if (which == -1)
 1330                 return (EINVAL);
 1331 
 1332         error = copyin(args->rlim, &rlim, sizeof(rlim));
 1333         if (error)
 1334                 return (error);
 1335 
 1336         bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
 1337         bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
 1338         return (kern_setrlimit(td, which, &bsd_rlim));
 1339 }
 1340 
 1341 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 1342 int
 1343 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
 1344 {
 1345         struct l_rlimit rlim;
 1346         struct rlimit bsd_rlim;
 1347         u_int which;
 1348 
 1349         if (args->resource >= LINUX_RLIM_NLIMITS)
 1350                 return (EINVAL);
 1351 
 1352         which = linux_to_bsd_resource[args->resource];
 1353         if (which == -1)
 1354                 return (EINVAL);
 1355 
 1356         lim_rlimit(td, which, &bsd_rlim);
 1357 
 1358 #ifdef COMPAT_LINUX32
 1359         rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
 1360         if (rlim.rlim_cur == UINT_MAX)
 1361                 rlim.rlim_cur = INT_MAX;
 1362         rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
 1363         if (rlim.rlim_max == UINT_MAX)
 1364                 rlim.rlim_max = INT_MAX;
 1365 #else
 1366         rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
 1367         if (rlim.rlim_cur == ULONG_MAX)
 1368                 rlim.rlim_cur = LONG_MAX;
 1369         rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
 1370         if (rlim.rlim_max == ULONG_MAX)
 1371                 rlim.rlim_max = LONG_MAX;
 1372 #endif
 1373         return (copyout(&rlim, args->rlim, sizeof(rlim)));
 1374 }
 1375 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 1376 
 1377 int
 1378 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
 1379 {
 1380         struct l_rlimit rlim;
 1381         struct rlimit bsd_rlim;
 1382         u_int which;
 1383 
 1384         if (args->resource >= LINUX_RLIM_NLIMITS)
 1385                 return (EINVAL);
 1386 
 1387         which = linux_to_bsd_resource[args->resource];
 1388         if (which == -1)
 1389                 return (EINVAL);
 1390 
 1391         lim_rlimit(td, which, &bsd_rlim);
 1392 
 1393         rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
 1394         rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
 1395         return (copyout(&rlim, args->rlim, sizeof(rlim)));
 1396 }
 1397 
 1398 int
 1399 linux_sched_setscheduler(struct thread *td,
 1400     struct linux_sched_setscheduler_args *args)
 1401 {
 1402         struct sched_param sched_param;
 1403         struct thread *tdt;
 1404         int error, policy;
 1405 
 1406         switch (args->policy) {
 1407         case LINUX_SCHED_OTHER:
 1408                 policy = SCHED_OTHER;
 1409                 break;
 1410         case LINUX_SCHED_FIFO:
 1411                 policy = SCHED_FIFO;
 1412                 break;
 1413         case LINUX_SCHED_RR:
 1414                 policy = SCHED_RR;
 1415                 break;
 1416         default:
 1417                 return (EINVAL);
 1418         }
 1419 
 1420         error = copyin(args->param, &sched_param, sizeof(sched_param));
 1421         if (error)
 1422                 return (error);
 1423 
 1424         if (linux_map_sched_prio) {
 1425                 switch (policy) {
 1426                 case SCHED_OTHER:
 1427                         if (sched_param.sched_priority != 0)
 1428                                 return (EINVAL);
 1429 
 1430                         sched_param.sched_priority =
 1431                             PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
 1432                         break;
 1433                 case SCHED_FIFO:
 1434                 case SCHED_RR:
 1435                         if (sched_param.sched_priority < 1 ||
 1436                             sched_param.sched_priority >= LINUX_MAX_RT_PRIO)
 1437                                 return (EINVAL);
 1438 
 1439                         /*
 1440                          * Map [1, LINUX_MAX_RT_PRIO - 1] to
 1441                          * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
 1442                          */
 1443                         sched_param.sched_priority =
 1444                             (sched_param.sched_priority - 1) *
 1445                             (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
 1446                             (LINUX_MAX_RT_PRIO - 1);
 1447                         break;
 1448                 }
 1449         }
 1450 
 1451         tdt = linux_tdfind(td, args->pid, -1);
 1452         if (tdt == NULL)
 1453                 return (ESRCH);
 1454 
 1455         error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
 1456         PROC_UNLOCK(tdt->td_proc);
 1457         return (error);
 1458 }
 1459 
 1460 int
 1461 linux_sched_getscheduler(struct thread *td,
 1462     struct linux_sched_getscheduler_args *args)
 1463 {
 1464         struct thread *tdt;
 1465         int error, policy;
 1466 
 1467         tdt = linux_tdfind(td, args->pid, -1);
 1468         if (tdt == NULL)
 1469                 return (ESRCH);
 1470 
 1471         error = kern_sched_getscheduler(td, tdt, &policy);
 1472         PROC_UNLOCK(tdt->td_proc);
 1473 
 1474         switch (policy) {
 1475         case SCHED_OTHER:
 1476                 td->td_retval[0] = LINUX_SCHED_OTHER;
 1477                 break;
 1478         case SCHED_FIFO:
 1479                 td->td_retval[0] = LINUX_SCHED_FIFO;
 1480                 break;
 1481         case SCHED_RR:
 1482                 td->td_retval[0] = LINUX_SCHED_RR;
 1483                 break;
 1484         }
 1485         return (error);
 1486 }
 1487 
 1488 int
 1489 linux_sched_get_priority_max(struct thread *td,
 1490     struct linux_sched_get_priority_max_args *args)
 1491 {
 1492         struct sched_get_priority_max_args bsd;
 1493 
 1494         if (linux_map_sched_prio) {
 1495                 switch (args->policy) {
 1496                 case LINUX_SCHED_OTHER:
 1497                         td->td_retval[0] = 0;
 1498                         return (0);
 1499                 case LINUX_SCHED_FIFO:
 1500                 case LINUX_SCHED_RR:
 1501                         td->td_retval[0] = LINUX_MAX_RT_PRIO - 1;
 1502                         return (0);
 1503                 default:
 1504                         return (EINVAL);
 1505                 }
 1506         }
 1507 
 1508         switch (args->policy) {
 1509         case LINUX_SCHED_OTHER:
 1510                 bsd.policy = SCHED_OTHER;
 1511                 break;
 1512         case LINUX_SCHED_FIFO:
 1513                 bsd.policy = SCHED_FIFO;
 1514                 break;
 1515         case LINUX_SCHED_RR:
 1516                 bsd.policy = SCHED_RR;
 1517                 break;
 1518         default:
 1519                 return (EINVAL);
 1520         }
 1521         return (sys_sched_get_priority_max(td, &bsd));
 1522 }
 1523 
 1524 int
 1525 linux_sched_get_priority_min(struct thread *td,
 1526     struct linux_sched_get_priority_min_args *args)
 1527 {
 1528         struct sched_get_priority_min_args bsd;
 1529 
 1530         if (linux_map_sched_prio) {
 1531                 switch (args->policy) {
 1532                 case LINUX_SCHED_OTHER:
 1533                         td->td_retval[0] = 0;
 1534                         return (0);
 1535                 case LINUX_SCHED_FIFO:
 1536                 case LINUX_SCHED_RR:
 1537                         td->td_retval[0] = 1;
 1538                         return (0);
 1539                 default:
 1540                         return (EINVAL);
 1541                 }
 1542         }
 1543 
 1544         switch (args->policy) {
 1545         case LINUX_SCHED_OTHER:
 1546                 bsd.policy = SCHED_OTHER;
 1547                 break;
 1548         case LINUX_SCHED_FIFO:
 1549                 bsd.policy = SCHED_FIFO;
 1550                 break;
 1551         case LINUX_SCHED_RR:
 1552                 bsd.policy = SCHED_RR;
 1553                 break;
 1554         default:
 1555                 return (EINVAL);
 1556         }
 1557         return (sys_sched_get_priority_min(td, &bsd));
 1558 }
 1559 
 1560 #define REBOOT_CAD_ON   0x89abcdef
 1561 #define REBOOT_CAD_OFF  0
 1562 #define REBOOT_HALT     0xcdef0123
 1563 #define REBOOT_RESTART  0x01234567
 1564 #define REBOOT_RESTART2 0xA1B2C3D4
 1565 #define REBOOT_POWEROFF 0x4321FEDC
 1566 #define REBOOT_MAGIC1   0xfee1dead
 1567 #define REBOOT_MAGIC2   0x28121969
 1568 #define REBOOT_MAGIC2A  0x05121996
 1569 #define REBOOT_MAGIC2B  0x16041998
 1570 
 1571 int
 1572 linux_reboot(struct thread *td, struct linux_reboot_args *args)
 1573 {
 1574         struct reboot_args bsd_args;
 1575 
 1576         if (args->magic1 != REBOOT_MAGIC1)
 1577                 return (EINVAL);
 1578 
 1579         switch (args->magic2) {
 1580         case REBOOT_MAGIC2:
 1581         case REBOOT_MAGIC2A:
 1582         case REBOOT_MAGIC2B:
 1583                 break;
 1584         default:
 1585                 return (EINVAL);
 1586         }
 1587 
 1588         switch (args->cmd) {
 1589         case REBOOT_CAD_ON:
 1590         case REBOOT_CAD_OFF:
 1591                 return (priv_check(td, PRIV_REBOOT));
 1592         case REBOOT_HALT:
 1593                 bsd_args.opt = RB_HALT;
 1594                 break;
 1595         case REBOOT_RESTART:
 1596         case REBOOT_RESTART2:
 1597                 bsd_args.opt = 0;
 1598                 break;
 1599         case REBOOT_POWEROFF:
 1600                 bsd_args.opt = RB_POWEROFF;
 1601                 break;
 1602         default:
 1603                 return (EINVAL);
 1604         }
 1605         return (sys_reboot(td, &bsd_args));
 1606 }
 1607 
 1608 
 1609 int
 1610 linux_getpid(struct thread *td, struct linux_getpid_args *args)
 1611 {
 1612 
 1613         td->td_retval[0] = td->td_proc->p_pid;
 1614 
 1615         return (0);
 1616 }
 1617 
 1618 int
 1619 linux_gettid(struct thread *td, struct linux_gettid_args *args)
 1620 {
 1621         struct linux_emuldata *em;
 1622 
 1623         em = em_find(td);
 1624         KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
 1625 
 1626         td->td_retval[0] = em->em_tid;
 1627 
 1628         return (0);
 1629 }
 1630 
 1631 
 1632 int
 1633 linux_getppid(struct thread *td, struct linux_getppid_args *args)
 1634 {
 1635 
 1636         td->td_retval[0] = kern_getppid(td);
 1637         return (0);
 1638 }
 1639 
 1640 int
 1641 linux_getgid(struct thread *td, struct linux_getgid_args *args)
 1642 {
 1643 
 1644         td->td_retval[0] = td->td_ucred->cr_rgid;
 1645         return (0);
 1646 }
 1647 
 1648 int
 1649 linux_getuid(struct thread *td, struct linux_getuid_args *args)
 1650 {
 1651 
 1652         td->td_retval[0] = td->td_ucred->cr_ruid;
 1653         return (0);
 1654 }
 1655 
 1656 
 1657 int
 1658 linux_getsid(struct thread *td, struct linux_getsid_args *args)
 1659 {
 1660         struct getsid_args bsd;
 1661 
 1662         bsd.pid = args->pid;
 1663         return (sys_getsid(td, &bsd));
 1664 }
 1665 
 1666 int
 1667 linux_nosys(struct thread *td, struct nosys_args *ignore)
 1668 {
 1669 
 1670         return (ENOSYS);
 1671 }
 1672 
 1673 int
 1674 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
 1675 {
 1676         struct getpriority_args bsd_args;
 1677         int error;
 1678 
 1679         bsd_args.which = args->which;
 1680         bsd_args.who = args->who;
 1681         error = sys_getpriority(td, &bsd_args);
 1682         td->td_retval[0] = 20 - td->td_retval[0];
 1683         return (error);
 1684 }
 1685 
 1686 int
 1687 linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
 1688 {
 1689         int name[2];
 1690 
 1691         name[0] = CTL_KERN;
 1692         name[1] = KERN_HOSTNAME;
 1693         return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
 1694             args->len, 0, 0));
 1695 }
 1696 
 1697 int
 1698 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
 1699 {
 1700         int name[2];
 1701 
 1702         name[0] = CTL_KERN;
 1703         name[1] = KERN_NISDOMAINNAME;
 1704         return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
 1705             args->len, 0, 0));
 1706 }
 1707 
 1708 int
 1709 linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
 1710 {
 1711 
 1712         LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
 1713             args->error_code);
 1714 
 1715         /*
 1716          * XXX: we should send a signal to the parent if
 1717          * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
 1718          * as it doesnt occur often.
 1719          */
 1720         exit1(td, args->error_code, 0);
 1721                 /* NOTREACHED */
 1722 }
 1723 
 1724 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
 1725 #define _LINUX_CAPABILITY_VERSION_2  0x20071026
 1726 #define _LINUX_CAPABILITY_VERSION_3  0x20080522
 1727 
 1728 struct l_user_cap_header {
 1729         l_int   version;
 1730         l_int   pid;
 1731 };
 1732 
 1733 struct l_user_cap_data {
 1734         l_int   effective;
 1735         l_int   permitted;
 1736         l_int   inheritable;
 1737 };
 1738 
 1739 int
 1740 linux_capget(struct thread *td, struct linux_capget_args *uap)
 1741 {
 1742         struct l_user_cap_header luch;
 1743         struct l_user_cap_data lucd[2];
 1744         int error, u32s;
 1745 
 1746         if (uap->hdrp == NULL)
 1747                 return (EFAULT);
 1748 
 1749         error = copyin(uap->hdrp, &luch, sizeof(luch));
 1750         if (error != 0)
 1751                 return (error);
 1752 
 1753         switch (luch.version) {
 1754         case _LINUX_CAPABILITY_VERSION_1:
 1755                 u32s = 1;
 1756                 break;
 1757         case _LINUX_CAPABILITY_VERSION_2:
 1758         case _LINUX_CAPABILITY_VERSION_3:
 1759                 u32s = 2;
 1760                 break;
 1761         default:
 1762                 luch.version = _LINUX_CAPABILITY_VERSION_1;
 1763                 error = copyout(&luch, uap->hdrp, sizeof(luch));
 1764                 if (error)
 1765                         return (error);
 1766                 return (EINVAL);
 1767         }
 1768 
 1769         if (luch.pid)
 1770                 return (EPERM);
 1771 
 1772         if (uap->datap) {
 1773                 /*
 1774                  * The current implementation doesn't support setting
 1775                  * a capability (it's essentially a stub) so indicate
 1776                  * that no capabilities are currently set or available
 1777                  * to request.
 1778                  */
 1779                 memset(&lucd, 0, u32s * sizeof(lucd[0]));
 1780                 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0]));
 1781         }
 1782 
 1783         return (error);
 1784 }
 1785 
 1786 int
 1787 linux_capset(struct thread *td, struct linux_capset_args *uap)
 1788 {
 1789         struct l_user_cap_header luch;
 1790         struct l_user_cap_data lucd[2];
 1791         int error, i, u32s;
 1792 
 1793         if (uap->hdrp == NULL || uap->datap == NULL)
 1794                 return (EFAULT);
 1795 
 1796         error = copyin(uap->hdrp, &luch, sizeof(luch));
 1797         if (error != 0)
 1798                 return (error);
 1799 
 1800         switch (luch.version) {
 1801         case _LINUX_CAPABILITY_VERSION_1:
 1802                 u32s = 1;
 1803                 break;
 1804         case _LINUX_CAPABILITY_VERSION_2:
 1805         case _LINUX_CAPABILITY_VERSION_3:
 1806                 u32s = 2;
 1807                 break;
 1808         default:
 1809                 luch.version = _LINUX_CAPABILITY_VERSION_1;
 1810                 error = copyout(&luch, uap->hdrp, sizeof(luch));
 1811                 if (error)
 1812                         return (error);
 1813                 return (EINVAL);
 1814         }
 1815 
 1816         if (luch.pid)
 1817                 return (EPERM);
 1818 
 1819         error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0]));
 1820         if (error != 0)
 1821                 return (error);
 1822 
 1823         /* We currently don't support setting any capabilities. */
 1824         for (i = 0; i < u32s; i++) {
 1825                 if (lucd[i].effective || lucd[i].permitted ||
 1826                     lucd[i].inheritable) {
 1827                         linux_msg(td,
 1828                             "capset[%d] effective=0x%x, permitted=0x%x, "
 1829                             "inheritable=0x%x is not implemented", i,
 1830                             (int)lucd[i].effective, (int)lucd[i].permitted,
 1831                             (int)lucd[i].inheritable);
 1832                         return (EPERM);
 1833                 }
 1834         }
 1835 
 1836         return (0);
 1837 }
 1838 
 1839 int
 1840 linux_prctl(struct thread *td, struct linux_prctl_args *args)
 1841 {
 1842         int error = 0, max_size;
 1843         struct proc *p = td->td_proc;
 1844         char comm[LINUX_MAX_COMM_LEN];
 1845         int pdeath_signal;
 1846 
 1847         switch (args->option) {
 1848         case LINUX_PR_SET_PDEATHSIG:
 1849                 if (!LINUX_SIG_VALID(args->arg2))
 1850                         return (EINVAL);
 1851                 pdeath_signal = linux_to_bsd_signal(args->arg2);
 1852                 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL,
 1853                     &pdeath_signal));
 1854         case LINUX_PR_GET_PDEATHSIG:
 1855                 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS,
 1856                     &pdeath_signal);
 1857                 if (error != 0)
 1858                         return (error);
 1859                 pdeath_signal = bsd_to_linux_signal(pdeath_signal);
 1860                 return (copyout(&pdeath_signal,
 1861                     (void *)(register_t)args->arg2,
 1862                     sizeof(pdeath_signal)));
 1863                 break;
 1864         case LINUX_PR_GET_KEEPCAPS:
 1865                 /*
 1866                  * Indicate that we always clear the effective and
 1867                  * permitted capability sets when the user id becomes
 1868                  * non-zero (actually the capability sets are simply
 1869                  * always zero in the current implementation).
 1870                  */
 1871                 td->td_retval[0] = 0;
 1872                 break;
 1873         case LINUX_PR_SET_KEEPCAPS:
 1874                 /*
 1875                  * Ignore requests to keep the effective and permitted
 1876                  * capability sets when the user id becomes non-zero.
 1877                  */
 1878                 break;
 1879         case LINUX_PR_SET_NAME:
 1880                 /*
 1881                  * To be on the safe side we need to make sure to not
 1882                  * overflow the size a Linux program expects. We already
 1883                  * do this here in the copyin, so that we don't need to
 1884                  * check on copyout.
 1885                  */
 1886                 max_size = MIN(sizeof(comm), sizeof(p->p_comm));
 1887                 error = copyinstr((void *)(register_t)args->arg2, comm,
 1888                     max_size, NULL);
 1889 
 1890                 /* Linux silently truncates the name if it is too long. */
 1891                 if (error == ENAMETOOLONG) {
 1892                         /*
 1893                          * XXX: copyinstr() isn't documented to populate the
 1894                          * array completely, so do a copyin() to be on the
 1895                          * safe side. This should be changed in case
 1896                          * copyinstr() is changed to guarantee this.
 1897                          */
 1898                         error = copyin((void *)(register_t)args->arg2, comm,
 1899                             max_size - 1);
 1900                         comm[max_size - 1] = '\0';
 1901                 }
 1902                 if (error)
 1903                         return (error);
 1904 
 1905                 PROC_LOCK(p);
 1906                 strlcpy(p->p_comm, comm, sizeof(p->p_comm));
 1907                 PROC_UNLOCK(p);
 1908                 break;
 1909         case LINUX_PR_GET_NAME:
 1910                 PROC_LOCK(p);
 1911                 strlcpy(comm, p->p_comm, sizeof(comm));
 1912                 PROC_UNLOCK(p);
 1913                 error = copyout(comm, (void *)(register_t)args->arg2,
 1914                     strlen(comm) + 1);
 1915                 break;
 1916         default:
 1917                 error = EINVAL;
 1918                 break;
 1919         }
 1920 
 1921         return (error);
 1922 }
 1923 
 1924 int
 1925 linux_sched_setparam(struct thread *td,
 1926     struct linux_sched_setparam_args *uap)
 1927 {
 1928         struct sched_param sched_param;
 1929         struct thread *tdt;
 1930         int error, policy;
 1931 
 1932         error = copyin(uap->param, &sched_param, sizeof(sched_param));
 1933         if (error)
 1934                 return (error);
 1935 
 1936         tdt = linux_tdfind(td, uap->pid, -1);
 1937         if (tdt == NULL)
 1938                 return (ESRCH);
 1939 
 1940         if (linux_map_sched_prio) {
 1941                 error = kern_sched_getscheduler(td, tdt, &policy);
 1942                 if (error)
 1943                         goto out;
 1944 
 1945                 switch (policy) {
 1946                 case SCHED_OTHER:
 1947                         if (sched_param.sched_priority != 0) {
 1948                                 error = EINVAL;
 1949                                 goto out;
 1950                         }
 1951                         sched_param.sched_priority =
 1952                             PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
 1953                         break;
 1954                 case SCHED_FIFO:
 1955                 case SCHED_RR:
 1956                         if (sched_param.sched_priority < 1 ||
 1957                             sched_param.sched_priority >= LINUX_MAX_RT_PRIO) {
 1958                                 error = EINVAL;
 1959                                 goto out;
 1960                         }
 1961                         /*
 1962                          * Map [1, LINUX_MAX_RT_PRIO - 1] to
 1963                          * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
 1964                          */
 1965                         sched_param.sched_priority =
 1966                             (sched_param.sched_priority - 1) *
 1967                             (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
 1968                             (LINUX_MAX_RT_PRIO - 1);
 1969                         break;
 1970                 }
 1971         }
 1972 
 1973         error = kern_sched_setparam(td, tdt, &sched_param);
 1974 out:    PROC_UNLOCK(tdt->td_proc);
 1975         return (error);
 1976 }
 1977 
 1978 int
 1979 linux_sched_getparam(struct thread *td,
 1980     struct linux_sched_getparam_args *uap)
 1981 {
 1982         struct sched_param sched_param;
 1983         struct thread *tdt;
 1984         int error, policy;
 1985 
 1986         tdt = linux_tdfind(td, uap->pid, -1);
 1987         if (tdt == NULL)
 1988                 return (ESRCH);
 1989 
 1990         error = kern_sched_getparam(td, tdt, &sched_param);
 1991         if (error) {
 1992                 PROC_UNLOCK(tdt->td_proc);
 1993                 return (error);
 1994         }
 1995 
 1996         if (linux_map_sched_prio) {
 1997                 error = kern_sched_getscheduler(td, tdt, &policy);
 1998                 PROC_UNLOCK(tdt->td_proc);
 1999                 if (error)
 2000                         return (error);
 2001 
 2002                 switch (policy) {
 2003                 case SCHED_OTHER:
 2004                         sched_param.sched_priority = 0;
 2005                         break;
 2006                 case SCHED_FIFO:
 2007                 case SCHED_RR:
 2008                         /*
 2009                          * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to
 2010                          * [1, LINUX_MAX_RT_PRIO - 1] (rounding up).
 2011                          */
 2012                         sched_param.sched_priority =
 2013                             (sched_param.sched_priority *
 2014                             (LINUX_MAX_RT_PRIO - 1) +
 2015                             (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) /
 2016                             (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1;
 2017                         break;
 2018                 }
 2019         } else
 2020                 PROC_UNLOCK(tdt->td_proc);
 2021 
 2022         error = copyout(&sched_param, uap->param, sizeof(sched_param));
 2023         return (error);
 2024 }
 2025 
 2026 static const struct cpuset_copy_cb copy_set = {
 2027         .cpuset_copyin = copyin,
 2028         .cpuset_copyout = copyout
 2029 };
 2030 
 2031 /*
 2032  * Get affinity of a process.
 2033  */
 2034 int
 2035 linux_sched_getaffinity(struct thread *td,
 2036     struct linux_sched_getaffinity_args *args)
 2037 {
 2038         int error;
 2039         struct thread *tdt;
 2040 
 2041         if (args->len < sizeof(cpuset_t))
 2042                 return (EINVAL);
 2043 
 2044         tdt = linux_tdfind(td, args->pid, -1);
 2045         if (tdt == NULL)
 2046                 return (ESRCH);
 2047 
 2048         PROC_UNLOCK(tdt->td_proc);
 2049 
 2050         error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
 2051             tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr,
 2052             &copy_set);
 2053         if (error == 0)
 2054                 td->td_retval[0] = sizeof(cpuset_t);
 2055 
 2056         return (error);
 2057 }
 2058 
 2059 /*
 2060  *  Set affinity of a process.
 2061  */
 2062 int
 2063 linux_sched_setaffinity(struct thread *td,
 2064     struct linux_sched_setaffinity_args *args)
 2065 {
 2066         struct thread *tdt;
 2067 
 2068         if (args->len < sizeof(cpuset_t))
 2069                 return (EINVAL);
 2070 
 2071         tdt = linux_tdfind(td, args->pid, -1);
 2072         if (tdt == NULL)
 2073                 return (ESRCH);
 2074 
 2075         PROC_UNLOCK(tdt->td_proc);
 2076 
 2077         return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
 2078             tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr,
 2079             &copy_set));
 2080 }
 2081 
 2082 struct linux_rlimit64 {
 2083         uint64_t        rlim_cur;
 2084         uint64_t        rlim_max;
 2085 };
 2086 
 2087 int
 2088 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
 2089 {
 2090         struct rlimit rlim, nrlim;
 2091         struct linux_rlimit64 lrlim;
 2092         struct proc *p;
 2093         u_int which;
 2094         int flags;
 2095         int error;
 2096 
 2097         if (args->resource >= LINUX_RLIM_NLIMITS)
 2098                 return (EINVAL);
 2099 
 2100         which = linux_to_bsd_resource[args->resource];
 2101         if (which == -1)
 2102                 return (EINVAL);
 2103 
 2104         if (args->new != NULL) {
 2105                 /*
 2106                  * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
 2107                  * rlim is unsigned 64-bit. FreeBSD treats negative limits
 2108                  * as INFINITY so we do not need a conversion even.
 2109                  */
 2110                 error = copyin(args->new, &nrlim, sizeof(nrlim));
 2111                 if (error != 0)
 2112                         return (error);
 2113         }
 2114 
 2115         flags = PGET_HOLD | PGET_NOTWEXIT;
 2116         if (args->new != NULL)
 2117                 flags |= PGET_CANDEBUG;
 2118         else
 2119                 flags |= PGET_CANSEE;
 2120         if (args->pid == 0) {
 2121                 p = td->td_proc;
 2122                 PHOLD(p);
 2123         } else {
 2124                 error = pget(args->pid, flags, &p);
 2125                 if (error != 0)
 2126                         return (error);
 2127         }
 2128         if (args->old != NULL) {
 2129                 PROC_LOCK(p);
 2130                 lim_rlimit_proc(p, which, &rlim);
 2131                 PROC_UNLOCK(p);
 2132                 if (rlim.rlim_cur == RLIM_INFINITY)
 2133                         lrlim.rlim_cur = LINUX_RLIM_INFINITY;
 2134                 else
 2135                         lrlim.rlim_cur = rlim.rlim_cur;
 2136                 if (rlim.rlim_max == RLIM_INFINITY)
 2137                         lrlim.rlim_max = LINUX_RLIM_INFINITY;
 2138                 else
 2139                         lrlim.rlim_max = rlim.rlim_max;
 2140                 error = copyout(&lrlim, args->old, sizeof(lrlim));
 2141                 if (error != 0)
 2142                         goto out;
 2143         }
 2144 
 2145         if (args->new != NULL)
 2146                 error = kern_proc_setrlimit(td, p, which, &nrlim);
 2147 
 2148  out:
 2149         PRELE(p);
 2150         return (error);
 2151 }
 2152 
 2153 int
 2154 linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
 2155 {
 2156         struct timeval utv, tv0, tv1, *tvp;
 2157         struct l_pselect6arg lpse6;
 2158         struct l_timespec lts;
 2159         struct timespec uts;
 2160         l_sigset_t l_ss;
 2161         sigset_t *ssp;
 2162         sigset_t ss;
 2163         int error;
 2164 
 2165         ssp = NULL;
 2166         if (args->sig != NULL) {
 2167                 error = copyin(args->sig, &lpse6, sizeof(lpse6));
 2168                 if (error != 0)
 2169                         return (error);
 2170                 if (lpse6.ss_len != sizeof(l_ss))
 2171                         return (EINVAL);
 2172                 if (lpse6.ss != 0) {
 2173                         error = copyin(PTRIN(lpse6.ss), &l_ss,
 2174                             sizeof(l_ss));
 2175                         if (error != 0)
 2176                                 return (error);
 2177                         linux_to_bsd_sigset(&l_ss, &ss);
 2178                         ssp = &ss;
 2179                 }
 2180         }
 2181 
 2182         /*
 2183          * Currently glibc changes nanosecond number to microsecond.
 2184          * This mean losing precision but for now it is hardly seen.
 2185          */
 2186         if (args->tsp != NULL) {
 2187                 error = copyin(args->tsp, &lts, sizeof(lts));
 2188                 if (error != 0)
 2189                         return (error);
 2190                 error = linux_to_native_timespec(&uts, &lts);
 2191                 if (error != 0)
 2192                         return (error);
 2193 
 2194                 TIMESPEC_TO_TIMEVAL(&utv, &uts);
 2195                 if (itimerfix(&utv))
 2196                         return (EINVAL);
 2197 
 2198                 microtime(&tv0);
 2199                 tvp = &utv;
 2200         } else
 2201                 tvp = NULL;
 2202 
 2203         error = kern_pselect(td, args->nfds, args->readfds, args->writefds,
 2204             args->exceptfds, tvp, ssp, LINUX_NFDBITS);
 2205 
 2206         if (error == 0 && args->tsp != NULL) {
 2207                 if (td->td_retval[0] != 0) {
 2208                         /*
 2209                          * Compute how much time was left of the timeout,
 2210                          * by subtracting the current time and the time
 2211                          * before we started the call, and subtracting
 2212                          * that result from the user-supplied value.
 2213                          */
 2214 
 2215                         microtime(&tv1);
 2216                         timevalsub(&tv1, &tv0);
 2217                         timevalsub(&utv, &tv1);
 2218                         if (utv.tv_sec < 0)
 2219                                 timevalclear(&utv);
 2220                 } else
 2221                         timevalclear(&utv);
 2222 
 2223                 TIMEVAL_TO_TIMESPEC(&utv, &uts);
 2224 
 2225                 error = native_to_linux_timespec(&lts, &uts);
 2226                 if (error == 0)
 2227                         error = copyout(&lts, args->tsp, sizeof(lts));
 2228         }
 2229 
 2230         return (error);
 2231 }
 2232 
 2233 int
 2234 linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
 2235 {
 2236         struct timespec ts0, ts1;
 2237         struct l_timespec lts;
 2238         struct timespec uts, *tsp;
 2239         l_sigset_t l_ss;
 2240         sigset_t *ssp;
 2241         sigset_t ss;
 2242         int error;
 2243 
 2244         if (args->sset != NULL) {
 2245                 if (args->ssize != sizeof(l_ss))
 2246                         return (EINVAL);
 2247                 error = copyin(args->sset, &l_ss, sizeof(l_ss));
 2248                 if (error)
 2249                         return (error);
 2250                 linux_to_bsd_sigset(&l_ss, &ss);
 2251                 ssp = &ss;
 2252         } else
 2253                 ssp = NULL;
 2254         if (args->tsp != NULL) {
 2255                 error = copyin(args->tsp, &lts, sizeof(lts));
 2256                 if (error)
 2257                         return (error);
 2258                 error = linux_to_native_timespec(&uts, &lts);
 2259                 if (error != 0)
 2260                         return (error);
 2261 
 2262                 nanotime(&ts0);
 2263                 tsp = &uts;
 2264         } else
 2265                 tsp = NULL;
 2266 
 2267         error = kern_poll(td, args->fds, args->nfds, tsp, ssp);
 2268 
 2269         if (error == 0 && args->tsp != NULL) {
 2270                 if (td->td_retval[0]) {
 2271                         nanotime(&ts1);
 2272                         timespecsub(&ts1, &ts0, &ts1);
 2273                         timespecsub(&uts, &ts1, &uts);
 2274                         if (uts.tv_sec < 0)
 2275                                 timespecclear(&uts);
 2276                 } else
 2277                         timespecclear(&uts);
 2278 
 2279                 error = native_to_linux_timespec(&lts, &uts);
 2280                 if (error == 0)
 2281                         error = copyout(&lts, args->tsp, sizeof(lts));
 2282         }
 2283 
 2284         return (error);
 2285 }
 2286 
 2287 int
 2288 linux_sched_rr_get_interval(struct thread *td,
 2289     struct linux_sched_rr_get_interval_args *uap)
 2290 {
 2291         struct timespec ts;
 2292         struct l_timespec lts;
 2293         struct thread *tdt;
 2294         int error;
 2295 
 2296         /*
 2297          * According to man in case the invalid pid specified
 2298          * EINVAL should be returned.
 2299          */
 2300         if (uap->pid < 0)
 2301                 return (EINVAL);
 2302 
 2303         tdt = linux_tdfind(td, uap->pid, -1);
 2304         if (tdt == NULL)
 2305                 return (ESRCH);
 2306 
 2307         error = kern_sched_rr_get_interval_td(td, tdt, &ts);
 2308         PROC_UNLOCK(tdt->td_proc);
 2309         if (error != 0)
 2310                 return (error);
 2311         error = native_to_linux_timespec(&lts, &ts);
 2312         if (error != 0)
 2313                 return (error);
 2314         return (copyout(&lts, uap->interval, sizeof(lts)));
 2315 }
 2316 
 2317 /*
 2318  * In case when the Linux thread is the initial thread in
 2319  * the thread group thread id is equal to the process id.
 2320  * Glibc depends on this magic (assert in pthread_getattr_np.c).
 2321  */
 2322 struct thread *
 2323 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
 2324 {
 2325         struct linux_emuldata *em;
 2326         struct thread *tdt;
 2327         struct proc *p;
 2328 
 2329         tdt = NULL;
 2330         if (tid == 0 || tid == td->td_tid) {
 2331                 tdt = td;
 2332                 PROC_LOCK(tdt->td_proc);
 2333         } else if (tid > PID_MAX)
 2334                 tdt = tdfind(tid, pid);
 2335         else {
 2336                 /*
 2337                  * Initial thread where the tid equal to the pid.
 2338                  */
 2339                 p = pfind(tid);
 2340                 if (p != NULL) {
 2341                         if (SV_PROC_ABI(p) != SV_ABI_LINUX) {
 2342                                 /*
 2343                                  * p is not a Linuxulator process.
 2344                                  */
 2345                                 PROC_UNLOCK(p);
 2346                                 return (NULL);
 2347                         }
 2348                         FOREACH_THREAD_IN_PROC(p, tdt) {
 2349                                 em = em_find(tdt);
 2350                                 if (tid == em->em_tid)
 2351                                         return (tdt);
 2352                         }
 2353                         PROC_UNLOCK(p);
 2354                 }
 2355                 return (NULL);
 2356         }
 2357 
 2358         return (tdt);
 2359 }
 2360 
 2361 void
 2362 linux_to_bsd_waitopts(int options, int *bsdopts)
 2363 {
 2364 
 2365         if (options & LINUX_WNOHANG)
 2366                 *bsdopts |= WNOHANG;
 2367         if (options & LINUX_WUNTRACED)
 2368                 *bsdopts |= WUNTRACED;
 2369         if (options & LINUX_WEXITED)
 2370                 *bsdopts |= WEXITED;
 2371         if (options & LINUX_WCONTINUED)
 2372                 *bsdopts |= WCONTINUED;
 2373         if (options & LINUX_WNOWAIT)
 2374                 *bsdopts |= WNOWAIT;
 2375 
 2376         if (options & __WCLONE)
 2377                 *bsdopts |= WLINUXCLONE;
 2378 }
 2379 
 2380 int
 2381 linux_getrandom(struct thread *td, struct linux_getrandom_args *args)
 2382 {
 2383         struct uio uio;
 2384         struct iovec iov;
 2385         int error;
 2386 
 2387         if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM))
 2388                 return (EINVAL);
 2389         if (args->count > INT_MAX)
 2390                 args->count = INT_MAX;
 2391 
 2392         iov.iov_base = args->buf;
 2393         iov.iov_len = args->count;
 2394 
 2395         uio.uio_iov = &iov;
 2396         uio.uio_iovcnt = 1;
 2397         uio.uio_resid = iov.iov_len;
 2398         uio.uio_segflg = UIO_USERSPACE;
 2399         uio.uio_rw = UIO_READ;
 2400         uio.uio_td = td;
 2401 
 2402         error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK);
 2403         if (error == 0)
 2404                 td->td_retval[0] = args->count - uio.uio_resid;
 2405         return (error);
 2406 }
 2407 
 2408 int
 2409 linux_mincore(struct thread *td, struct linux_mincore_args *args)
 2410 {
 2411 
 2412         /* Needs to be page-aligned */
 2413         if (args->start & PAGE_MASK)
 2414                 return (EINVAL);
 2415         return (kern_mincore(td, args->start, args->len, args->vec));
 2416 }
 2417 
 2418 #define SYSLOG_TAG      "<6>"
 2419 
 2420 int
 2421 linux_syslog(struct thread *td, struct linux_syslog_args *args)
 2422 {
 2423         char buf[128], *src, *dst;
 2424         u_int seq;
 2425         int buflen, error;
 2426 
 2427         if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) {
 2428                 linux_msg(td, "syslog unsupported type 0x%x", args->type);
 2429                 return (EINVAL);
 2430         }
 2431 
 2432         if (args->len < 6) {
 2433                 td->td_retval[0] = 0;
 2434                 return (0);
 2435         }
 2436 
 2437         error = priv_check(td, PRIV_MSGBUF);
 2438         if (error)
 2439                 return (error);
 2440 
 2441         mtx_lock(&msgbuf_lock);
 2442         msgbuf_peekbytes(msgbufp, NULL, 0, &seq);
 2443         mtx_unlock(&msgbuf_lock);
 2444 
 2445         dst = args->buf;
 2446         error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG));
 2447         /* The -1 is to skip the trailing '\0'. */
 2448         dst += sizeof(SYSLOG_TAG) - 1;
 2449 
 2450         while (error == 0) {
 2451                 mtx_lock(&msgbuf_lock);
 2452                 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq);
 2453                 mtx_unlock(&msgbuf_lock);
 2454 
 2455                 if (buflen == 0)
 2456                         break;
 2457 
 2458                 for (src = buf; src < buf + buflen && error == 0; src++) {
 2459                         if (*src == '\0')
 2460                                 continue;
 2461 
 2462                         if (dst >= args->buf + args->len)
 2463                                 goto out;
 2464 
 2465                         error = copyout(src, dst, 1);
 2466                         dst++;
 2467 
 2468                         if (*src == '\n' && *(src + 1) != '<' &&
 2469                             dst + sizeof(SYSLOG_TAG) < args->buf + args->len) {
 2470                                 error = copyout(&SYSLOG_TAG,
 2471                                     dst, sizeof(SYSLOG_TAG));
 2472                                 dst += sizeof(SYSLOG_TAG) - 1;
 2473                         }
 2474                 }
 2475         }
 2476 out:
 2477         td->td_retval[0] = dst - args->buf;
 2478         return (error);
 2479 }
 2480 
 2481 int
 2482 linux_getcpu(struct thread *td, struct linux_getcpu_args *args)
 2483 {
 2484         int cpu, error, node;
 2485 
 2486         cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */
 2487         error = 0;
 2488         node = cpuid_to_pcpu[cpu]->pc_domain;
 2489 
 2490         if (args->cpu != NULL)
 2491                 error = copyout(&cpu, args->cpu, sizeof(l_int));
 2492         if (args->node != NULL)
 2493                 error = copyout(&node, args->node, sizeof(l_int));
 2494         return (error);
 2495 }
Cache object: c46062af77db4d6acb41adf8cefa9b41
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/compat/linux/linux_misc.c

FreeBSD/Linux Kernel Cross Reference
sys/compat/linux/linux_misc.c