The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/linux/linux_misc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 2002 Doug Rabson
    5  * Copyright (c) 1994-1995 Søren Schmidt
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer
   13  *    in this position and unchanged.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. The name of the author may not be used to endorse or promote products
   18  *    derived from this software without specific prior written permission
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_compat.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/blist.h>
   39 #include <sys/fcntl.h>
   40 #if defined(__i386__)
   41 #include <sys/imgact_aout.h>
   42 #endif
   43 #include <sys/jail.h>
   44 #include <sys/imgact.h>
   45 #include <sys/kernel.h>
   46 #include <sys/limits.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mman.h>
   50 #include <sys/mount.h>
   51 #include <sys/msgbuf.h>
   52 #include <sys/mutex.h>
   53 #include <sys/namei.h>
   54 #include <sys/poll.h>
   55 #include <sys/priv.h>
   56 #include <sys/proc.h>
   57 #include <sys/procctl.h>
   58 #include <sys/reboot.h>
   59 #include <sys/racct.h>
   60 #include <sys/random.h>
   61 #include <sys/resourcevar.h>
   62 #include <sys/sched.h>
   63 #include <sys/sdt.h>
   64 #include <sys/signalvar.h>
   65 #include <sys/smp.h>
   66 #include <sys/stat.h>
   67 #include <sys/syscallsubr.h>
   68 #include <sys/sysctl.h>
   69 #include <sys/sysproto.h>
   70 #include <sys/systm.h>
   71 #include <sys/time.h>
   72 #include <sys/vmmeter.h>
   73 #include <sys/vnode.h>
   74 #include <sys/wait.h>
   75 #include <sys/cpuset.h>
   76 #include <sys/uio.h>
   77 
   78 #include <security/audit/audit.h>
   79 #include <security/mac/mac_framework.h>
   80 
   81 #include <vm/vm.h>
   82 #include <vm/pmap.h>
   83 #include <vm/vm_kern.h>
   84 #include <vm/vm_map.h>
   85 #include <vm/vm_extern.h>
   86 #include <vm/swap_pager.h>
   87 
   88 #ifdef COMPAT_LINUX32
   89 #include <machine/../linux32/linux.h>
   90 #include <machine/../linux32/linux32_proto.h>
   91 #else
   92 #include <machine/../linux/linux.h>
   93 #include <machine/../linux/linux_proto.h>
   94 #endif
   95 
   96 #include <compat/linux/linux_common.h>
   97 #include <compat/linux/linux_dtrace.h>
   98 #include <compat/linux/linux_file.h>
   99 #include <compat/linux/linux_mib.h>
  100 #include <compat/linux/linux_signal.h>
  101 #include <compat/linux/linux_timer.h>
  102 #include <compat/linux/linux_util.h>
  103 #include <compat/linux/linux_sysproto.h>
  104 #include <compat/linux/linux_emul.h>
  105 #include <compat/linux/linux_misc.h>
  106 
  107 int stclohz;                            /* Statistics clock frequency */
  108 
  109 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
  110         RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
  111         RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
  112         RLIMIT_MEMLOCK, RLIMIT_AS
  113 };
  114 
  115 struct l_sysinfo {
  116         l_long          uptime;         /* Seconds since boot */
  117         l_ulong         loads[3];       /* 1, 5, and 15 minute load averages */
  118 #define LINUX_SYSINFO_LOADS_SCALE 65536
  119         l_ulong         totalram;       /* Total usable main memory size */
  120         l_ulong         freeram;        /* Available memory size */
  121         l_ulong         sharedram;      /* Amount of shared memory */
  122         l_ulong         bufferram;      /* Memory used by buffers */
  123         l_ulong         totalswap;      /* Total swap space size */
  124         l_ulong         freeswap;       /* swap space still available */
  125         l_ushort        procs;          /* Number of current processes */
  126         l_ushort        pads;
  127         l_ulong         totalhigh;
  128         l_ulong         freehigh;
  129         l_uint          mem_unit;
  130         char            _f[20-2*sizeof(l_long)-sizeof(l_int)];  /* padding */
  131 };
  132 
  133 struct l_pselect6arg {
  134         l_uintptr_t     ss;
  135         l_size_t        ss_len;
  136 };
  137 
  138 static int      linux_utimensat_lts_to_ts(struct l_timespec *,
  139                         struct timespec *);
  140 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  141 static int      linux_utimensat_lts64_to_ts(struct l_timespec64 *,
  142                         struct timespec *);
  143 #endif
  144 static int      linux_common_utimensat(struct thread *, int,
  145                         const char *, struct timespec *, int);
  146 static int      linux_common_pselect6(struct thread *, l_int,
  147                         l_fd_set *, l_fd_set *, l_fd_set *,
  148                         struct timespec *, l_uintptr_t *);
  149 static int      linux_common_ppoll(struct thread *, struct pollfd *,
  150                         uint32_t, struct timespec *, l_sigset_t *,
  151                         l_size_t);
  152 static int      linux_pollin(struct thread *, struct pollfd *,
  153                         struct pollfd *, u_int);
  154 static int      linux_pollout(struct thread *, struct pollfd *,
  155                         struct pollfd *, u_int);
  156 
  157 int
  158 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
  159 {
  160         struct l_sysinfo sysinfo;
  161         int i, j;
  162         struct timespec ts;
  163 
  164         bzero(&sysinfo, sizeof(sysinfo));
  165         getnanouptime(&ts);
  166         if (ts.tv_nsec != 0)
  167                 ts.tv_sec++;
  168         sysinfo.uptime = ts.tv_sec;
  169 
  170         /* Use the information from the mib to get our load averages */
  171         for (i = 0; i < 3; i++)
  172                 sysinfo.loads[i] = averunnable.ldavg[i] *
  173                     LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
  174 
  175         sysinfo.totalram = physmem * PAGE_SIZE;
  176         sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE;
  177 
  178         /*
  179          * sharedram counts pages allocated to named, swap-backed objects such
  180          * as shared memory segments and tmpfs files.  There is no cheap way to
  181          * compute this, so just leave the field unpopulated.  Linux itself only
  182          * started setting this field in the 3.x timeframe.
  183          */
  184         sysinfo.sharedram = 0;
  185         sysinfo.bufferram = 0;
  186 
  187         swap_pager_status(&i, &j);
  188         sysinfo.totalswap = i * PAGE_SIZE;
  189         sysinfo.freeswap = (i - j) * PAGE_SIZE;
  190 
  191         sysinfo.procs = nprocs;
  192 
  193         /*
  194          * Platforms supported by the emulation layer do not have a notion of
  195          * high memory.
  196          */
  197         sysinfo.totalhigh = 0;
  198         sysinfo.freehigh = 0;
  199 
  200         sysinfo.mem_unit = 1;
  201 
  202         return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
  203 }
  204 
  205 #ifdef LINUX_LEGACY_SYSCALLS
  206 int
  207 linux_alarm(struct thread *td, struct linux_alarm_args *args)
  208 {
  209         struct itimerval it, old_it;
  210         u_int secs;
  211         int error __diagused;
  212 
  213         secs = args->secs;
  214         /*
  215          * Linux alarm() is always successful. Limit secs to INT32_MAX / 2
  216          * to match kern_setitimer()'s limit to avoid error from it.
  217          *
  218          * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit
  219          * platforms.
  220          */
  221         if (secs > INT32_MAX / 2)
  222                 secs = INT32_MAX / 2;
  223 
  224         it.it_value.tv_sec = secs;
  225         it.it_value.tv_usec = 0;
  226         timevalclear(&it.it_interval);
  227         error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
  228         KASSERT(error == 0, ("kern_setitimer returns %d", error));
  229 
  230         if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) ||
  231             old_it.it_value.tv_usec >= 500000)
  232                 old_it.it_value.tv_sec++;
  233         td->td_retval[0] = old_it.it_value.tv_sec;
  234         return (0);
  235 }
  236 #endif
  237 
  238 int
  239 linux_brk(struct thread *td, struct linux_brk_args *args)
  240 {
  241         struct vmspace *vm = td->td_proc->p_vmspace;
  242         uintptr_t new, old;
  243 
  244         old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize);
  245         new = (uintptr_t)args->dsend;
  246         if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new))
  247                 td->td_retval[0] = (register_t)new;
  248         else
  249                 td->td_retval[0] = (register_t)old;
  250 
  251         return (0);
  252 }
  253 
  254 #if defined(__i386__)
  255 /* XXX: what about amd64/linux32? */
  256 
  257 int
  258 linux_uselib(struct thread *td, struct linux_uselib_args *args)
  259 {
  260         struct nameidata ni;
  261         struct vnode *vp;
  262         struct exec *a_out;
  263         vm_map_t map;
  264         vm_map_entry_t entry;
  265         struct vattr attr;
  266         vm_offset_t vmaddr;
  267         unsigned long file_offset;
  268         unsigned long bss_size;
  269         char *library;
  270         ssize_t aresid;
  271         int error;
  272         bool locked, opened, textset;
  273 
  274         a_out = NULL;
  275         vp = NULL;
  276         locked = false;
  277         textset = false;
  278         opened = false;
  279 
  280         if (!LUSECONVPATH(td)) {
  281                 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
  282                     UIO_USERSPACE, args->library);
  283                 error = namei(&ni);
  284         } else {
  285                 LCONVPATHEXIST(args->library, &library);
  286                 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
  287                     UIO_SYSSPACE, library);
  288                 error = namei(&ni);
  289                 LFREEPATH(library);
  290         }
  291         if (error)
  292                 goto cleanup;
  293 
  294         vp = ni.ni_vp;
  295         NDFREE_PNBUF(&ni);
  296 
  297         /*
  298          * From here on down, we have a locked vnode that must be unlocked.
  299          * XXX: The code below largely duplicates exec_check_permissions().
  300          */
  301         locked = true;
  302 
  303         /* Executable? */
  304         error = VOP_GETATTR(vp, &attr, td->td_ucred);
  305         if (error)
  306                 goto cleanup;
  307 
  308         if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
  309             ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
  310                 /* EACCESS is what exec(2) returns. */
  311                 error = ENOEXEC;
  312                 goto cleanup;
  313         }
  314 
  315         /* Sensible size? */
  316         if (attr.va_size == 0) {
  317                 error = ENOEXEC;
  318                 goto cleanup;
  319         }
  320 
  321         /* Can we access it? */
  322         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
  323         if (error)
  324                 goto cleanup;
  325 
  326         /*
  327          * XXX: This should use vn_open() so that it is properly authorized,
  328          * and to reduce code redundancy all over the place here.
  329          * XXX: Not really, it duplicates far more of exec_check_permissions()
  330          * than vn_open().
  331          */
  332 #ifdef MAC
  333         error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
  334         if (error)
  335                 goto cleanup;
  336 #endif
  337         error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
  338         if (error)
  339                 goto cleanup;
  340         opened = true;
  341 
  342         /* Pull in executable header into exec_map */
  343         error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
  344             VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
  345         if (error)
  346                 goto cleanup;
  347 
  348         /* Is it a Linux binary ? */
  349         if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
  350                 error = ENOEXEC;
  351                 goto cleanup;
  352         }
  353 
  354         /*
  355          * While we are here, we should REALLY do some more checks
  356          */
  357 
  358         /* Set file/virtual offset based on a.out variant. */
  359         switch ((int)(a_out->a_magic & 0xffff)) {
  360         case 0413:                      /* ZMAGIC */
  361                 file_offset = 1024;
  362                 break;
  363         case 0314:                      /* QMAGIC */
  364                 file_offset = 0;
  365                 break;
  366         default:
  367                 error = ENOEXEC;
  368                 goto cleanup;
  369         }
  370 
  371         bss_size = round_page(a_out->a_bss);
  372 
  373         /* Check various fields in header for validity/bounds. */
  374         if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
  375                 error = ENOEXEC;
  376                 goto cleanup;
  377         }
  378 
  379         /* text + data can't exceed file size */
  380         if (a_out->a_data + a_out->a_text > attr.va_size) {
  381                 error = EFAULT;
  382                 goto cleanup;
  383         }
  384 
  385         /*
  386          * text/data/bss must not exceed limits
  387          * XXX - this is not complete. it should check current usage PLUS
  388          * the resources needed by this library.
  389          */
  390         PROC_LOCK(td->td_proc);
  391         if (a_out->a_text > maxtsiz ||
  392             a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) ||
  393             racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
  394             bss_size) != 0) {
  395                 PROC_UNLOCK(td->td_proc);
  396                 error = ENOMEM;
  397                 goto cleanup;
  398         }
  399         PROC_UNLOCK(td->td_proc);
  400 
  401         /*
  402          * Prevent more writers.
  403          */
  404         error = VOP_SET_TEXT(vp);
  405         if (error != 0)
  406                 goto cleanup;
  407         textset = true;
  408 
  409         /*
  410          * Lock no longer needed
  411          */
  412         locked = false;
  413         VOP_UNLOCK(vp);
  414 
  415         /*
  416          * Check if file_offset page aligned. Currently we cannot handle
  417          * misalinged file offsets, and so we read in the entire image
  418          * (what a waste).
  419          */
  420         if (file_offset & PAGE_MASK) {
  421                 /* Map text+data read/write/execute */
  422 
  423                 /* a_entry is the load address and is page aligned */
  424                 vmaddr = trunc_page(a_out->a_entry);
  425 
  426                 /* get anon user mapping, read+write+execute */
  427                 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
  428                     &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
  429                     VM_PROT_ALL, VM_PROT_ALL, 0);
  430                 if (error)
  431                         goto cleanup;
  432 
  433                 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
  434                     a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
  435                     td->td_ucred, NOCRED, &aresid, td);
  436                 if (error != 0)
  437                         goto cleanup;
  438                 if (aresid != 0) {
  439                         error = ENOEXEC;
  440                         goto cleanup;
  441                 }
  442         } else {
  443                 /*
  444                  * for QMAGIC, a_entry is 20 bytes beyond the load address
  445                  * to skip the executable header
  446                  */
  447                 vmaddr = trunc_page(a_out->a_entry);
  448 
  449                 /*
  450                  * Map it all into the process's space as a single
  451                  * copy-on-write "data" segment.
  452                  */
  453                 map = &td->td_proc->p_vmspace->vm_map;
  454                 error = vm_mmap(map, &vmaddr,
  455                     a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
  456                     MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
  457                 if (error)
  458                         goto cleanup;
  459                 vm_map_lock(map);
  460                 if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
  461                         vm_map_unlock(map);
  462                         error = EDOOFUS;
  463                         goto cleanup;
  464                 }
  465                 entry->eflags |= MAP_ENTRY_VN_EXEC;
  466                 vm_map_unlock(map);
  467                 textset = false;
  468         }
  469 
  470         if (bss_size != 0) {
  471                 /* Calculate BSS start address */
  472                 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
  473                     a_out->a_data;
  474 
  475                 /* allocate some 'anon' space */
  476                 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
  477                     &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
  478                     VM_PROT_ALL, 0);
  479                 if (error)
  480                         goto cleanup;
  481         }
  482 
  483 cleanup:
  484         if (opened) {
  485                 if (locked)
  486                         VOP_UNLOCK(vp);
  487                 locked = false;
  488                 VOP_CLOSE(vp, FREAD, td->td_ucred, td);
  489         }
  490         if (textset) {
  491                 if (!locked) {
  492                         locked = true;
  493                         VOP_LOCK(vp, LK_SHARED | LK_RETRY);
  494                 }
  495                 VOP_UNSET_TEXT_CHECKED(vp);
  496         }
  497         if (locked)
  498                 VOP_UNLOCK(vp);
  499 
  500         /* Release the temporary mapping. */
  501         if (a_out)
  502                 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
  503 
  504         return (error);
  505 }
  506 
  507 #endif  /* __i386__ */
  508 
  509 #ifdef LINUX_LEGACY_SYSCALLS
  510 int
  511 linux_select(struct thread *td, struct linux_select_args *args)
  512 {
  513         l_timeval ltv;
  514         struct timeval tv0, tv1, utv, *tvp;
  515         int error;
  516 
  517         /*
  518          * Store current time for computation of the amount of
  519          * time left.
  520          */
  521         if (args->timeout) {
  522                 if ((error = copyin(args->timeout, &ltv, sizeof(ltv))))
  523                         goto select_out;
  524                 utv.tv_sec = ltv.tv_sec;
  525                 utv.tv_usec = ltv.tv_usec;
  526 
  527                 if (itimerfix(&utv)) {
  528                         /*
  529                          * The timeval was invalid.  Convert it to something
  530                          * valid that will act as it does under Linux.
  531                          */
  532                         utv.tv_sec += utv.tv_usec / 1000000;
  533                         utv.tv_usec %= 1000000;
  534                         if (utv.tv_usec < 0) {
  535                                 utv.tv_sec -= 1;
  536                                 utv.tv_usec += 1000000;
  537                         }
  538                         if (utv.tv_sec < 0)
  539                                 timevalclear(&utv);
  540                 }
  541                 microtime(&tv0);
  542                 tvp = &utv;
  543         } else
  544                 tvp = NULL;
  545 
  546         error = kern_select(td, args->nfds, args->readfds, args->writefds,
  547             args->exceptfds, tvp, LINUX_NFDBITS);
  548         if (error)
  549                 goto select_out;
  550 
  551         if (args->timeout) {
  552                 if (td->td_retval[0]) {
  553                         /*
  554                          * Compute how much time was left of the timeout,
  555                          * by subtracting the current time and the time
  556                          * before we started the call, and subtracting
  557                          * that result from the user-supplied value.
  558                          */
  559                         microtime(&tv1);
  560                         timevalsub(&tv1, &tv0);
  561                         timevalsub(&utv, &tv1);
  562                         if (utv.tv_sec < 0)
  563                                 timevalclear(&utv);
  564                 } else
  565                         timevalclear(&utv);
  566                 ltv.tv_sec = utv.tv_sec;
  567                 ltv.tv_usec = utv.tv_usec;
  568                 if ((error = copyout(&ltv, args->timeout, sizeof(ltv))))
  569                         goto select_out;
  570         }
  571 
  572 select_out:
  573         return (error);
  574 }
  575 #endif
  576 
  577 int
  578 linux_mremap(struct thread *td, struct linux_mremap_args *args)
  579 {
  580         uintptr_t addr;
  581         size_t len;
  582         int error = 0;
  583 
  584         if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
  585                 td->td_retval[0] = 0;
  586                 return (EINVAL);
  587         }
  588 
  589         /*
  590          * Check for the page alignment.
  591          * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
  592          */
  593         if (args->addr & PAGE_MASK) {
  594                 td->td_retval[0] = 0;
  595                 return (EINVAL);
  596         }
  597 
  598         args->new_len = round_page(args->new_len);
  599         args->old_len = round_page(args->old_len);
  600 
  601         if (args->new_len > args->old_len) {
  602                 td->td_retval[0] = 0;
  603                 return (ENOMEM);
  604         }
  605 
  606         if (args->new_len < args->old_len) {
  607                 addr = args->addr + args->new_len;
  608                 len = args->old_len - args->new_len;
  609                 error = kern_munmap(td, addr, len);
  610         }
  611 
  612         td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
  613         return (error);
  614 }
  615 
  616 #define LINUX_MS_ASYNC       0x0001
  617 #define LINUX_MS_INVALIDATE  0x0002
  618 #define LINUX_MS_SYNC        0x0004
  619 
  620 int
  621 linux_msync(struct thread *td, struct linux_msync_args *args)
  622 {
  623 
  624         return (kern_msync(td, args->addr, args->len,
  625             args->fl & ~LINUX_MS_SYNC));
  626 }
  627 
  628 #ifdef LINUX_LEGACY_SYSCALLS
  629 int
  630 linux_time(struct thread *td, struct linux_time_args *args)
  631 {
  632         struct timeval tv;
  633         l_time_t tm;
  634         int error;
  635 
  636         microtime(&tv);
  637         tm = tv.tv_sec;
  638         if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
  639                 return (error);
  640         td->td_retval[0] = tm;
  641         return (0);
  642 }
  643 #endif
  644 
  645 struct l_times_argv {
  646         l_clock_t       tms_utime;
  647         l_clock_t       tms_stime;
  648         l_clock_t       tms_cutime;
  649         l_clock_t       tms_cstime;
  650 };
  651 
  652 /*
  653  * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
  654  * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
  655  * auxiliary vector entry.
  656  */
  657 #define CLK_TCK         100
  658 
  659 #define CONVOTCK(r)     (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
  660 #define CONVNTCK(r)     (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
  661 
  662 #define CONVTCK(r)      (linux_kernver(td) >= LINUX_KERNVER_2004000 ?           \
  663                             CONVNTCK(r) : CONVOTCK(r))
  664 
  665 int
  666 linux_times(struct thread *td, struct linux_times_args *args)
  667 {
  668         struct timeval tv, utime, stime, cutime, cstime;
  669         struct l_times_argv tms;
  670         struct proc *p;
  671         int error;
  672 
  673         if (args->buf != NULL) {
  674                 p = td->td_proc;
  675                 PROC_LOCK(p);
  676                 PROC_STATLOCK(p);
  677                 calcru(p, &utime, &stime);
  678                 PROC_STATUNLOCK(p);
  679                 calccru(p, &cutime, &cstime);
  680                 PROC_UNLOCK(p);
  681 
  682                 tms.tms_utime = CONVTCK(utime);
  683                 tms.tms_stime = CONVTCK(stime);
  684 
  685                 tms.tms_cutime = CONVTCK(cutime);
  686                 tms.tms_cstime = CONVTCK(cstime);
  687 
  688                 if ((error = copyout(&tms, args->buf, sizeof(tms))))
  689                         return (error);
  690         }
  691 
  692         microuptime(&tv);
  693         td->td_retval[0] = (int)CONVTCK(tv);
  694         return (0);
  695 }
  696 
  697 int
  698 linux_newuname(struct thread *td, struct linux_newuname_args *args)
  699 {
  700         struct l_new_utsname utsname;
  701         char osname[LINUX_MAX_UTSNAME];
  702         char osrelease[LINUX_MAX_UTSNAME];
  703         char *p;
  704 
  705         linux_get_osname(td, osname);
  706         linux_get_osrelease(td, osrelease);
  707 
  708         bzero(&utsname, sizeof(utsname));
  709         strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
  710         getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
  711         getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
  712         strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
  713         strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
  714         for (p = utsname.version; *p != '\0'; ++p)
  715                 if (*p == '\n') {
  716                         *p = '\0';
  717                         break;
  718                 }
  719 #if defined(__amd64__)
  720         /*
  721          * On amd64, Linux uname(2) needs to return "x86_64"
  722          * for both 64-bit and 32-bit applications.  On 32-bit,
  723          * the string returned by getauxval(AT_PLATFORM) needs
  724          * to remain "i686", though.
  725          */
  726 #if defined(COMPAT_LINUX32)
  727         if (linux32_emulate_i386)
  728                 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
  729         else
  730 #endif
  731         strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
  732 #elif defined(__aarch64__)
  733         strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME);
  734 #elif defined(__i386__)
  735         strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
  736 #endif
  737 
  738         return (copyout(&utsname, args->buf, sizeof(utsname)));
  739 }
  740 
  741 struct l_utimbuf {
  742         l_time_t l_actime;
  743         l_time_t l_modtime;
  744 };
  745 
  746 #ifdef LINUX_LEGACY_SYSCALLS
  747 int
  748 linux_utime(struct thread *td, struct linux_utime_args *args)
  749 {
  750         struct timeval tv[2], *tvp;
  751         struct l_utimbuf lut;
  752         char *fname;
  753         int error;
  754 
  755         if (args->times) {
  756                 if ((error = copyin(args->times, &lut, sizeof lut)) != 0)
  757                         return (error);
  758                 tv[0].tv_sec = lut.l_actime;
  759                 tv[0].tv_usec = 0;
  760                 tv[1].tv_sec = lut.l_modtime;
  761                 tv[1].tv_usec = 0;
  762                 tvp = tv;
  763         } else
  764                 tvp = NULL;
  765 
  766         if (!LUSECONVPATH(td)) {
  767                 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
  768                     tvp, UIO_SYSSPACE);
  769         } else {
  770                 LCONVPATHEXIST(args->fname, &fname);
  771                 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp,
  772                     UIO_SYSSPACE);
  773                 LFREEPATH(fname);
  774         }
  775         return (error);
  776 }
  777 #endif
  778 
  779 #ifdef LINUX_LEGACY_SYSCALLS
  780 int
  781 linux_utimes(struct thread *td, struct linux_utimes_args *args)
  782 {
  783         l_timeval ltv[2];
  784         struct timeval tv[2], *tvp = NULL;
  785         char *fname;
  786         int error;
  787 
  788         if (args->tptr != NULL) {
  789                 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0)
  790                         return (error);
  791                 tv[0].tv_sec = ltv[0].tv_sec;
  792                 tv[0].tv_usec = ltv[0].tv_usec;
  793                 tv[1].tv_sec = ltv[1].tv_sec;
  794                 tv[1].tv_usec = ltv[1].tv_usec;
  795                 tvp = tv;
  796         }
  797 
  798         if (!LUSECONVPATH(td)) {
  799                 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
  800                     tvp, UIO_SYSSPACE);
  801         } else {
  802                 LCONVPATHEXIST(args->fname, &fname);
  803                 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE,
  804                     tvp, UIO_SYSSPACE);
  805                 LFREEPATH(fname);
  806         }
  807         return (error);
  808 }
  809 #endif
  810 
  811 static int
  812 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times)
  813 {
  814 
  815         if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
  816             l_times->tv_nsec != LINUX_UTIME_NOW &&
  817             (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
  818                 return (EINVAL);
  819 
  820         times->tv_sec = l_times->tv_sec;
  821         switch (l_times->tv_nsec)
  822         {
  823         case LINUX_UTIME_OMIT:
  824                 times->tv_nsec = UTIME_OMIT;
  825                 break;
  826         case LINUX_UTIME_NOW:
  827                 times->tv_nsec = UTIME_NOW;
  828                 break;
  829         default:
  830                 times->tv_nsec = l_times->tv_nsec;
  831         }
  832 
  833         return (0);
  834 }
  835 
  836 static int
  837 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname,
  838     struct timespec *timesp, int lflags)
  839 {
  840         char *path = NULL;
  841         int error, dfd, flags = 0;
  842 
  843         dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd;
  844 
  845         if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH))
  846                 return (EINVAL);
  847 
  848         if (timesp != NULL) {
  849                 /* This breaks POSIX, but is what the Linux kernel does
  850                  * _on purpose_ (documented in the man page for utimensat(2)),
  851                  * so we must follow that behaviour. */
  852                 if (timesp[0].tv_nsec == UTIME_OMIT &&
  853                     timesp[1].tv_nsec == UTIME_OMIT)
  854                         return (0);
  855         }
  856 
  857         if (lflags & LINUX_AT_SYMLINK_NOFOLLOW)
  858                 flags |= AT_SYMLINK_NOFOLLOW;
  859         if (lflags & LINUX_AT_EMPTY_PATH)
  860                 flags |= AT_EMPTY_PATH;
  861 
  862         if (!LUSECONVPATH(td)) {
  863                 if (pathname != NULL) {
  864                         return (kern_utimensat(td, dfd, pathname,
  865                             UIO_USERSPACE, timesp, UIO_SYSSPACE, flags));
  866                 }
  867         }
  868 
  869         if (pathname != NULL)
  870                 LCONVPATHEXIST_AT(pathname, &path, dfd);
  871         else if (lflags != 0)
  872                 return (EINVAL);
  873 
  874         if (path == NULL)
  875                 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE);
  876         else {
  877                 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp,
  878                         UIO_SYSSPACE, flags);
  879                 LFREEPATH(path);
  880         }
  881 
  882         return (error);
  883 }
  884 
  885 int
  886 linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
  887 {
  888         struct l_timespec l_times[2];
  889         struct timespec times[2], *timesp;
  890         int error;
  891 
  892         if (args->times != NULL) {
  893                 error = copyin(args->times, l_times, sizeof(l_times));
  894                 if (error != 0)
  895                         return (error);
  896 
  897                 error = linux_utimensat_lts_to_ts(&l_times[0], &times[0]);
  898                 if (error != 0)
  899                         return (error);
  900                 error = linux_utimensat_lts_to_ts(&l_times[1], &times[1]);
  901                 if (error != 0)
  902                         return (error);
  903                 timesp = times;
  904         } else
  905                 timesp = NULL;
  906 
  907         return (linux_common_utimensat(td, args->dfd, args->pathname,
  908             timesp, args->flags));
  909 }
  910 
  911 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  912 static int
  913 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times)
  914 {
  915 
  916         /* Zero out the padding in compat mode. */
  917         l_times->tv_nsec &= 0xFFFFFFFFUL;
  918 
  919         if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
  920             l_times->tv_nsec != LINUX_UTIME_NOW &&
  921             (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
  922                 return (EINVAL);
  923 
  924         times->tv_sec = l_times->tv_sec;
  925         switch (l_times->tv_nsec)
  926         {
  927         case LINUX_UTIME_OMIT:
  928                 times->tv_nsec = UTIME_OMIT;
  929                 break;
  930         case LINUX_UTIME_NOW:
  931                 times->tv_nsec = UTIME_NOW;
  932                 break;
  933         default:
  934                 times->tv_nsec = l_times->tv_nsec;
  935         }
  936 
  937         return (0);
  938 }
  939 
  940 int
  941 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args)
  942 {
  943         struct l_timespec64 l_times[2];
  944         struct timespec times[2], *timesp;
  945         int error;
  946 
  947         if (args->times64 != NULL) {
  948                 error = copyin(args->times64, l_times, sizeof(l_times));
  949                 if (error != 0)
  950                         return (error);
  951 
  952                 error = linux_utimensat_lts64_to_ts(&l_times[0], &times[0]);
  953                 if (error != 0)
  954                         return (error);
  955                 error = linux_utimensat_lts64_to_ts(&l_times[1], &times[1]);
  956                 if (error != 0)
  957                         return (error);
  958                 timesp = times;
  959         } else
  960                 timesp = NULL;
  961 
  962         return (linux_common_utimensat(td, args->dfd, args->pathname,
  963             timesp, args->flags));
  964 }
  965 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
  966 
  967 #ifdef LINUX_LEGACY_SYSCALLS
  968 int
  969 linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
  970 {
  971         l_timeval ltv[2];
  972         struct timeval tv[2], *tvp = NULL;
  973         char *fname;
  974         int error, dfd;
  975 
  976         dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
  977 
  978         if (args->utimes != NULL) {
  979                 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0)
  980                         return (error);
  981                 tv[0].tv_sec = ltv[0].tv_sec;
  982                 tv[0].tv_usec = ltv[0].tv_usec;
  983                 tv[1].tv_sec = ltv[1].tv_sec;
  984                 tv[1].tv_usec = ltv[1].tv_usec;
  985                 tvp = tv;
  986         }
  987 
  988         if (!LUSECONVPATH(td)) {
  989                 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE,
  990                     tvp, UIO_SYSSPACE);
  991         } else {
  992                 LCONVPATHEXIST_AT(args->filename, &fname, dfd);
  993                 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE,
  994                     tvp, UIO_SYSSPACE);
  995                 LFREEPATH(fname);
  996         }
  997         return (error);
  998 }
  999 #endif
 1000 
 1001 static int
 1002 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp,
 1003     int options, void *rup, l_siginfo_t *infop)
 1004 {
 1005         l_siginfo_t lsi;
 1006         siginfo_t siginfo;
 1007         struct __wrusage wru;
 1008         int error, status, tmpstat, sig;
 1009 
 1010         error = kern_wait6(td, idtype, id, &status, options,
 1011             rup != NULL ? &wru : NULL, &siginfo);
 1012 
 1013         if (error == 0 && statusp) {
 1014                 tmpstat = status & 0xffff;
 1015                 if (WIFSIGNALED(tmpstat)) {
 1016                         tmpstat = (tmpstat & 0xffffff80) |
 1017                             bsd_to_linux_signal(WTERMSIG(tmpstat));
 1018                 } else if (WIFSTOPPED(tmpstat)) {
 1019                         tmpstat = (tmpstat & 0xffff00ff) |
 1020                             (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
 1021 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32))
 1022                         if (WSTOPSIG(status) == SIGTRAP) {
 1023                                 tmpstat = linux_ptrace_status(td,
 1024                                     siginfo.si_pid, tmpstat);
 1025                         }
 1026 #endif
 1027                 } else if (WIFCONTINUED(tmpstat)) {
 1028                         tmpstat = 0xffff;
 1029                 }
 1030                 error = copyout(&tmpstat, statusp, sizeof(int));
 1031         }
 1032         if (error == 0 && rup != NULL)
 1033                 error = linux_copyout_rusage(&wru.wru_self, rup);
 1034         if (error == 0 && infop != NULL && td->td_retval[0] != 0) {
 1035                 sig = bsd_to_linux_signal(siginfo.si_signo);
 1036                 siginfo_to_lsiginfo(&siginfo, &lsi, sig);
 1037                 error = copyout(&lsi, infop, sizeof(lsi));
 1038         }
 1039 
 1040         return (error);
 1041 }
 1042 
 1043 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 1044 int
 1045 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
 1046 {
 1047         struct linux_wait4_args wait4_args;
 1048 
 1049         wait4_args.pid = args->pid;
 1050         wait4_args.status = args->status;
 1051         wait4_args.options = args->options;
 1052         wait4_args.rusage = NULL;
 1053 
 1054         return (linux_wait4(td, &wait4_args));
 1055 }
 1056 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 1057 
 1058 int
 1059 linux_wait4(struct thread *td, struct linux_wait4_args *args)
 1060 {
 1061         struct proc *p;
 1062         int options, id, idtype;
 1063 
 1064         if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
 1065             LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
 1066                 return (EINVAL);
 1067 
 1068         /* -INT_MIN is not defined. */
 1069         if (args->pid == INT_MIN)
 1070                 return (ESRCH);
 1071 
 1072         options = 0;
 1073         linux_to_bsd_waitopts(args->options, &options);
 1074 
 1075         /*
 1076          * For backward compatibility we implicitly add flags WEXITED
 1077          * and WTRAPPED here.
 1078          */
 1079         options |= WEXITED | WTRAPPED;
 1080 
 1081         if (args->pid == WAIT_ANY) {
 1082                 idtype = P_ALL;
 1083                 id = 0;
 1084         } else if (args->pid < 0) {
 1085                 idtype = P_PGID;
 1086                 id = (id_t)-args->pid;
 1087         } else if (args->pid == 0) {
 1088                 idtype = P_PGID;
 1089                 p = td->td_proc;
 1090                 PROC_LOCK(p);
 1091                 id = p->p_pgid;
 1092                 PROC_UNLOCK(p);
 1093         } else {
 1094                 idtype = P_PID;
 1095                 id = (id_t)args->pid;
 1096         }
 1097 
 1098         return (linux_common_wait(td, idtype, id, args->status, options,
 1099             args->rusage, NULL));
 1100 }
 1101 
 1102 int
 1103 linux_waitid(struct thread *td, struct linux_waitid_args *args)
 1104 {
 1105         idtype_t idtype;
 1106         int error, options;
 1107         struct proc *p;
 1108         pid_t id;
 1109 
 1110         if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED |
 1111             LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
 1112                 return (EINVAL);
 1113 
 1114         options = 0;
 1115         linux_to_bsd_waitopts(args->options, &options);
 1116 
 1117         id = args->id;
 1118         switch (args->idtype) {
 1119         case LINUX_P_ALL:
 1120                 idtype = P_ALL;
 1121                 break;
 1122         case LINUX_P_PID:
 1123                 if (args->id <= 0)
 1124                         return (EINVAL);
 1125                 idtype = P_PID;
 1126                 break;
 1127         case LINUX_P_PGID:
 1128                 if (linux_use54(td) && args->id == 0) {
 1129                         p = td->td_proc;
 1130                         PROC_LOCK(p);
 1131                         id = p->p_pgid;
 1132                         PROC_UNLOCK(p);
 1133                 } else if (args->id <= 0)
 1134                         return (EINVAL);
 1135                 idtype = P_PGID;
 1136                 break;
 1137         case LINUX_P_PIDFD:
 1138                 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype");
 1139                 return (ENOSYS);
 1140         default:
 1141                 return (EINVAL);
 1142         }
 1143 
 1144         error = linux_common_wait(td, idtype, id, NULL, options,
 1145             args->rusage, args->info);
 1146         td->td_retval[0] = 0;
 1147 
 1148         return (error);
 1149 }
 1150 
 1151 #ifdef LINUX_LEGACY_SYSCALLS
 1152 int
 1153 linux_mknod(struct thread *td, struct linux_mknod_args *args)
 1154 {
 1155         char *path;
 1156         int error;
 1157         enum uio_seg seg;
 1158         bool convpath;
 1159 
 1160         convpath = LUSECONVPATH(td);
 1161         if (!convpath) {
 1162                 path = args->path;
 1163                 seg = UIO_USERSPACE;
 1164         } else {
 1165                 LCONVPATHCREAT(args->path, &path);
 1166                 seg = UIO_SYSSPACE;
 1167         }
 1168 
 1169         switch (args->mode & S_IFMT) {
 1170         case S_IFIFO:
 1171         case S_IFSOCK:
 1172                 error = kern_mkfifoat(td, AT_FDCWD, path, seg,
 1173                     args->mode);
 1174                 break;
 1175 
 1176         case S_IFCHR:
 1177         case S_IFBLK:
 1178                 error = kern_mknodat(td, AT_FDCWD, path, seg,
 1179                     args->mode, args->dev);
 1180                 break;
 1181 
 1182         case S_IFDIR:
 1183                 error = EPERM;
 1184                 break;
 1185 
 1186         case 0:
 1187                 args->mode |= S_IFREG;
 1188                 /* FALLTHROUGH */
 1189         case S_IFREG:
 1190                 error = kern_openat(td, AT_FDCWD, path, seg,
 1191                     O_WRONLY | O_CREAT | O_TRUNC, args->mode);
 1192                 if (error == 0)
 1193                         kern_close(td, td->td_retval[0]);
 1194                 break;
 1195 
 1196         default:
 1197                 error = EINVAL;
 1198                 break;
 1199         }
 1200         if (convpath)
 1201                 LFREEPATH(path);
 1202         return (error);
 1203 }
 1204 #endif
 1205 
 1206 int
 1207 linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
 1208 {
 1209         char *path;
 1210         int error, dfd;
 1211         enum uio_seg seg;
 1212         bool convpath;
 1213 
 1214         dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 1215 
 1216         convpath = LUSECONVPATH(td);
 1217         if (!convpath) {
 1218                 path = __DECONST(char *, args->filename);
 1219                 seg = UIO_USERSPACE;
 1220         } else {
 1221                 LCONVPATHCREAT_AT(args->filename, &path, dfd);
 1222                 seg = UIO_SYSSPACE;
 1223         }
 1224 
 1225         switch (args->mode & S_IFMT) {
 1226         case S_IFIFO:
 1227         case S_IFSOCK:
 1228                 error = kern_mkfifoat(td, dfd, path, seg, args->mode);
 1229                 break;
 1230 
 1231         case S_IFCHR:
 1232         case S_IFBLK:
 1233                 error = kern_mknodat(td, dfd, path, seg, args->mode,
 1234                     args->dev);
 1235                 break;
 1236 
 1237         case S_IFDIR:
 1238                 error = EPERM;
 1239                 break;
 1240 
 1241         case 0:
 1242                 args->mode |= S_IFREG;
 1243                 /* FALLTHROUGH */
 1244         case S_IFREG:
 1245                 error = kern_openat(td, dfd, path, seg,
 1246                     O_WRONLY | O_CREAT | O_TRUNC, args->mode);
 1247                 if (error == 0)
 1248                         kern_close(td, td->td_retval[0]);
 1249                 break;
 1250 
 1251         default:
 1252                 error = EINVAL;
 1253                 break;
 1254         }
 1255         if (convpath)
 1256                 LFREEPATH(path);
 1257         return (error);
 1258 }
 1259 
 1260 /*
 1261  * UGH! This is just about the dumbest idea I've ever heard!!
 1262  */
 1263 int
 1264 linux_personality(struct thread *td, struct linux_personality_args *args)
 1265 {
 1266         struct linux_pemuldata *pem;
 1267         struct proc *p = td->td_proc;
 1268         uint32_t old;
 1269 
 1270         PROC_LOCK(p);
 1271         pem = pem_find(p);
 1272         old = pem->persona;
 1273         if (args->per != 0xffffffff)
 1274                 pem->persona = args->per;
 1275         PROC_UNLOCK(p);
 1276 
 1277         td->td_retval[0] = old;
 1278         return (0);
 1279 }
 1280 
 1281 struct l_itimerval {
 1282         l_timeval it_interval;
 1283         l_timeval it_value;
 1284 };
 1285 
 1286 #define B2L_ITIMERVAL(bip, lip)                                         \
 1287         (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec;          \
 1288         (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec;        \
 1289         (bip)->it_value.tv_sec = (lip)->it_value.tv_sec;                \
 1290         (bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
 1291 
 1292 int
 1293 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
 1294 {
 1295         int error;
 1296         struct l_itimerval ls;
 1297         struct itimerval aitv, oitv;
 1298 
 1299         if (uap->itv == NULL) {
 1300                 uap->itv = uap->oitv;
 1301                 return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
 1302         }
 1303 
 1304         error = copyin(uap->itv, &ls, sizeof(ls));
 1305         if (error != 0)
 1306                 return (error);
 1307         B2L_ITIMERVAL(&aitv, &ls);
 1308         error = kern_setitimer(td, uap->which, &aitv, &oitv);
 1309         if (error != 0 || uap->oitv == NULL)
 1310                 return (error);
 1311         B2L_ITIMERVAL(&ls, &oitv);
 1312 
 1313         return (copyout(&ls, uap->oitv, sizeof(ls)));
 1314 }
 1315 
 1316 int
 1317 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
 1318 {
 1319         int error;
 1320         struct l_itimerval ls;
 1321         struct itimerval aitv;
 1322 
 1323         error = kern_getitimer(td, uap->which, &aitv);
 1324         if (error != 0)
 1325                 return (error);
 1326         B2L_ITIMERVAL(&ls, &aitv);
 1327         return (copyout(&ls, uap->itv, sizeof(ls)));
 1328 }
 1329 
 1330 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 1331 int
 1332 linux_nice(struct thread *td, struct linux_nice_args *args)
 1333 {
 1334 
 1335         return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc));
 1336 }
 1337 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 1338 
 1339 int
 1340 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
 1341 {
 1342         struct ucred *newcred, *oldcred;
 1343         l_gid_t *linux_gidset;
 1344         gid_t *bsd_gidset;
 1345         int ngrp, error;
 1346         struct proc *p;
 1347 
 1348         ngrp = args->gidsetsize;
 1349         if (ngrp < 0 || ngrp >= ngroups_max + 1)
 1350                 return (EINVAL);
 1351         linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
 1352         error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
 1353         if (error)
 1354                 goto out;
 1355         newcred = crget();
 1356         crextend(newcred, ngrp + 1);
 1357         p = td->td_proc;
 1358         PROC_LOCK(p);
 1359         oldcred = p->p_ucred;
 1360         crcopy(newcred, oldcred);
 1361 
 1362         /*
 1363          * cr_groups[0] holds egid. Setting the whole set from
 1364          * the supplied set will cause egid to be changed too.
 1365          * Keep cr_groups[0] unchanged to prevent that.
 1366          */
 1367 
 1368         if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) {
 1369                 PROC_UNLOCK(p);
 1370                 crfree(newcred);
 1371                 goto out;
 1372         }
 1373 
 1374         if (ngrp > 0) {
 1375                 newcred->cr_ngroups = ngrp + 1;
 1376 
 1377                 bsd_gidset = newcred->cr_groups;
 1378                 ngrp--;
 1379                 while (ngrp >= 0) {
 1380                         bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
 1381                         ngrp--;
 1382                 }
 1383         } else
 1384                 newcred->cr_ngroups = 1;
 1385 
 1386         setsugid(p);
 1387         proc_set_cred(p, newcred);
 1388         PROC_UNLOCK(p);
 1389         crfree(oldcred);
 1390         error = 0;
 1391 out:
 1392         free(linux_gidset, M_LINUX);
 1393         return (error);
 1394 }
 1395 
 1396 int
 1397 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
 1398 {
 1399         struct ucred *cred;
 1400         l_gid_t *linux_gidset;
 1401         gid_t *bsd_gidset;
 1402         int bsd_gidsetsz, ngrp, error;
 1403 
 1404         cred = td->td_ucred;
 1405         bsd_gidset = cred->cr_groups;
 1406         bsd_gidsetsz = cred->cr_ngroups - 1;
 1407 
 1408         /*
 1409          * cr_groups[0] holds egid. Returning the whole set
 1410          * here will cause a duplicate. Exclude cr_groups[0]
 1411          * to prevent that.
 1412          */
 1413 
 1414         if ((ngrp = args->gidsetsize) == 0) {
 1415                 td->td_retval[0] = bsd_gidsetsz;
 1416                 return (0);
 1417         }
 1418 
 1419         if (ngrp < bsd_gidsetsz)
 1420                 return (EINVAL);
 1421 
 1422         ngrp = 0;
 1423         linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
 1424             M_LINUX, M_WAITOK);
 1425         while (ngrp < bsd_gidsetsz) {
 1426                 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
 1427                 ngrp++;
 1428         }
 1429 
 1430         error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
 1431         free(linux_gidset, M_LINUX);
 1432         if (error)
 1433                 return (error);
 1434 
 1435         td->td_retval[0] = ngrp;
 1436         return (0);
 1437 }
 1438 
 1439 static bool
 1440 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim)
 1441 {
 1442 
 1443         if (linux_dummy_rlimits == 0)
 1444                 return (false);
 1445 
 1446         switch (resource) {
 1447         case LINUX_RLIMIT_LOCKS:
 1448         case LINUX_RLIMIT_SIGPENDING:
 1449         case LINUX_RLIMIT_MSGQUEUE:
 1450         case LINUX_RLIMIT_RTTIME:
 1451                 rlim->rlim_cur = LINUX_RLIM_INFINITY;
 1452                 rlim->rlim_max = LINUX_RLIM_INFINITY;
 1453                 return (true);
 1454         case LINUX_RLIMIT_NICE:
 1455         case LINUX_RLIMIT_RTPRIO:
 1456                 rlim->rlim_cur = 0;
 1457                 rlim->rlim_max = 0;
 1458                 return (true);
 1459         default:
 1460                 return (false);
 1461         }
 1462 }
 1463 
 1464 int
 1465 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
 1466 {
 1467         struct rlimit bsd_rlim;
 1468         struct l_rlimit rlim;
 1469         u_int which;
 1470         int error;
 1471 
 1472         if (args->resource >= LINUX_RLIM_NLIMITS)
 1473                 return (EINVAL);
 1474 
 1475         which = linux_to_bsd_resource[args->resource];
 1476         if (which == -1)
 1477                 return (EINVAL);
 1478 
 1479         error = copyin(args->rlim, &rlim, sizeof(rlim));
 1480         if (error)
 1481                 return (error);
 1482 
 1483         bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
 1484         bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
 1485         return (kern_setrlimit(td, which, &bsd_rlim));
 1486 }
 1487 
 1488 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 1489 int
 1490 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
 1491 {
 1492         struct l_rlimit rlim;
 1493         struct rlimit bsd_rlim;
 1494         u_int which;
 1495 
 1496         if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
 1497                 rlim.rlim_cur = bsd_rlim.rlim_cur;
 1498                 rlim.rlim_max = bsd_rlim.rlim_max;
 1499                 return (copyout(&rlim, args->rlim, sizeof(rlim)));
 1500         }
 1501 
 1502         if (args->resource >= LINUX_RLIM_NLIMITS)
 1503                 return (EINVAL);
 1504 
 1505         which = linux_to_bsd_resource[args->resource];
 1506         if (which == -1)
 1507                 return (EINVAL);
 1508 
 1509         lim_rlimit(td, which, &bsd_rlim);
 1510 
 1511 #ifdef COMPAT_LINUX32
 1512         rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
 1513         if (rlim.rlim_cur == UINT_MAX)
 1514                 rlim.rlim_cur = INT_MAX;
 1515         rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
 1516         if (rlim.rlim_max == UINT_MAX)
 1517                 rlim.rlim_max = INT_MAX;
 1518 #else
 1519         rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
 1520         if (rlim.rlim_cur == ULONG_MAX)
 1521                 rlim.rlim_cur = LONG_MAX;
 1522         rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
 1523         if (rlim.rlim_max == ULONG_MAX)
 1524                 rlim.rlim_max = LONG_MAX;
 1525 #endif
 1526         return (copyout(&rlim, args->rlim, sizeof(rlim)));
 1527 }
 1528 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 1529 
 1530 int
 1531 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
 1532 {
 1533         struct l_rlimit rlim;
 1534         struct rlimit bsd_rlim;
 1535         u_int which;
 1536 
 1537         if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
 1538                 rlim.rlim_cur = bsd_rlim.rlim_cur;
 1539                 rlim.rlim_max = bsd_rlim.rlim_max;
 1540                 return (copyout(&rlim, args->rlim, sizeof(rlim)));
 1541         }
 1542 
 1543         if (args->resource >= LINUX_RLIM_NLIMITS)
 1544                 return (EINVAL);
 1545 
 1546         which = linux_to_bsd_resource[args->resource];
 1547         if (which == -1)
 1548                 return (EINVAL);
 1549 
 1550         lim_rlimit(td, which, &bsd_rlim);
 1551 
 1552         rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
 1553         rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
 1554         return (copyout(&rlim, args->rlim, sizeof(rlim)));
 1555 }
 1556 
 1557 int
 1558 linux_sched_setscheduler(struct thread *td,
 1559     struct linux_sched_setscheduler_args *args)
 1560 {
 1561         struct sched_param sched_param;
 1562         struct thread *tdt;
 1563         int error, policy;
 1564 
 1565         switch (args->policy) {
 1566         case LINUX_SCHED_OTHER:
 1567                 policy = SCHED_OTHER;
 1568                 break;
 1569         case LINUX_SCHED_FIFO:
 1570                 policy = SCHED_FIFO;
 1571                 break;
 1572         case LINUX_SCHED_RR:
 1573                 policy = SCHED_RR;
 1574                 break;
 1575         default:
 1576                 return (EINVAL);
 1577         }
 1578 
 1579         error = copyin(args->param, &sched_param, sizeof(sched_param));
 1580         if (error)
 1581                 return (error);
 1582 
 1583         if (linux_map_sched_prio) {
 1584                 switch (policy) {
 1585                 case SCHED_OTHER:
 1586                         if (sched_param.sched_priority != 0)
 1587                                 return (EINVAL);
 1588 
 1589                         sched_param.sched_priority =
 1590                             PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
 1591                         break;
 1592                 case SCHED_FIFO:
 1593                 case SCHED_RR:
 1594                         if (sched_param.sched_priority < 1 ||
 1595                             sched_param.sched_priority >= LINUX_MAX_RT_PRIO)
 1596                                 return (EINVAL);
 1597 
 1598                         /*
 1599                          * Map [1, LINUX_MAX_RT_PRIO - 1] to
 1600                          * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
 1601                          */
 1602                         sched_param.sched_priority =
 1603                             (sched_param.sched_priority - 1) *
 1604                             (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
 1605                             (LINUX_MAX_RT_PRIO - 1);
 1606                         break;
 1607                 }
 1608         }
 1609 
 1610         tdt = linux_tdfind(td, args->pid, -1);
 1611         if (tdt == NULL)
 1612                 return (ESRCH);
 1613 
 1614         error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
 1615         PROC_UNLOCK(tdt->td_proc);
 1616         return (error);
 1617 }
 1618 
 1619 int
 1620 linux_sched_getscheduler(struct thread *td,
 1621     struct linux_sched_getscheduler_args *args)
 1622 {
 1623         struct thread *tdt;
 1624         int error, policy;
 1625 
 1626         tdt = linux_tdfind(td, args->pid, -1);
 1627         if (tdt == NULL)
 1628                 return (ESRCH);
 1629 
 1630         error = kern_sched_getscheduler(td, tdt, &policy);
 1631         PROC_UNLOCK(tdt->td_proc);
 1632 
 1633         switch (policy) {
 1634         case SCHED_OTHER:
 1635                 td->td_retval[0] = LINUX_SCHED_OTHER;
 1636                 break;
 1637         case SCHED_FIFO:
 1638                 td->td_retval[0] = LINUX_SCHED_FIFO;
 1639                 break;
 1640         case SCHED_RR:
 1641                 td->td_retval[0] = LINUX_SCHED_RR;
 1642                 break;
 1643         }
 1644         return (error);
 1645 }
 1646 
 1647 int
 1648 linux_sched_get_priority_max(struct thread *td,
 1649     struct linux_sched_get_priority_max_args *args)
 1650 {
 1651         struct sched_get_priority_max_args bsd;
 1652 
 1653         if (linux_map_sched_prio) {
 1654                 switch (args->policy) {
 1655                 case LINUX_SCHED_OTHER:
 1656                         td->td_retval[0] = 0;
 1657                         return (0);
 1658                 case LINUX_SCHED_FIFO:
 1659                 case LINUX_SCHED_RR:
 1660                         td->td_retval[0] = LINUX_MAX_RT_PRIO - 1;
 1661                         return (0);
 1662                 default:
 1663                         return (EINVAL);
 1664                 }
 1665         }
 1666 
 1667         switch (args->policy) {
 1668         case LINUX_SCHED_OTHER:
 1669                 bsd.policy = SCHED_OTHER;
 1670                 break;
 1671         case LINUX_SCHED_FIFO:
 1672                 bsd.policy = SCHED_FIFO;
 1673                 break;
 1674         case LINUX_SCHED_RR:
 1675                 bsd.policy = SCHED_RR;
 1676                 break;
 1677         default:
 1678                 return (EINVAL);
 1679         }
 1680         return (sys_sched_get_priority_max(td, &bsd));
 1681 }
 1682 
 1683 int
 1684 linux_sched_get_priority_min(struct thread *td,
 1685     struct linux_sched_get_priority_min_args *args)
 1686 {
 1687         struct sched_get_priority_min_args bsd;
 1688 
 1689         if (linux_map_sched_prio) {
 1690                 switch (args->policy) {
 1691                 case LINUX_SCHED_OTHER:
 1692                         td->td_retval[0] = 0;
 1693                         return (0);
 1694                 case LINUX_SCHED_FIFO:
 1695                 case LINUX_SCHED_RR:
 1696                         td->td_retval[0] = 1;
 1697                         return (0);
 1698                 default:
 1699                         return (EINVAL);
 1700                 }
 1701         }
 1702 
 1703         switch (args->policy) {
 1704         case LINUX_SCHED_OTHER:
 1705                 bsd.policy = SCHED_OTHER;
 1706                 break;
 1707         case LINUX_SCHED_FIFO:
 1708                 bsd.policy = SCHED_FIFO;
 1709                 break;
 1710         case LINUX_SCHED_RR:
 1711                 bsd.policy = SCHED_RR;
 1712                 break;
 1713         default:
 1714                 return (EINVAL);
 1715         }
 1716         return (sys_sched_get_priority_min(td, &bsd));
 1717 }
 1718 
 1719 #define REBOOT_CAD_ON   0x89abcdef
 1720 #define REBOOT_CAD_OFF  0
 1721 #define REBOOT_HALT     0xcdef0123
 1722 #define REBOOT_RESTART  0x01234567
 1723 #define REBOOT_RESTART2 0xA1B2C3D4
 1724 #define REBOOT_POWEROFF 0x4321FEDC
 1725 #define REBOOT_MAGIC1   0xfee1dead
 1726 #define REBOOT_MAGIC2   0x28121969
 1727 #define REBOOT_MAGIC2A  0x05121996
 1728 #define REBOOT_MAGIC2B  0x16041998
 1729 
 1730 int
 1731 linux_reboot(struct thread *td, struct linux_reboot_args *args)
 1732 {
 1733         struct reboot_args bsd_args;
 1734 
 1735         if (args->magic1 != REBOOT_MAGIC1)
 1736                 return (EINVAL);
 1737 
 1738         switch (args->magic2) {
 1739         case REBOOT_MAGIC2:
 1740         case REBOOT_MAGIC2A:
 1741         case REBOOT_MAGIC2B:
 1742                 break;
 1743         default:
 1744                 return (EINVAL);
 1745         }
 1746 
 1747         switch (args->cmd) {
 1748         case REBOOT_CAD_ON:
 1749         case REBOOT_CAD_OFF:
 1750                 return (priv_check(td, PRIV_REBOOT));
 1751         case REBOOT_HALT:
 1752                 bsd_args.opt = RB_HALT;
 1753                 break;
 1754         case REBOOT_RESTART:
 1755         case REBOOT_RESTART2:
 1756                 bsd_args.opt = 0;
 1757                 break;
 1758         case REBOOT_POWEROFF:
 1759                 bsd_args.opt = RB_POWEROFF;
 1760                 break;
 1761         default:
 1762                 return (EINVAL);
 1763         }
 1764         return (sys_reboot(td, &bsd_args));
 1765 }
 1766 
 1767 int
 1768 linux_getpid(struct thread *td, struct linux_getpid_args *args)
 1769 {
 1770 
 1771         td->td_retval[0] = td->td_proc->p_pid;
 1772 
 1773         return (0);
 1774 }
 1775 
 1776 int
 1777 linux_gettid(struct thread *td, struct linux_gettid_args *args)
 1778 {
 1779         struct linux_emuldata *em;
 1780 
 1781         em = em_find(td);
 1782         KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
 1783 
 1784         td->td_retval[0] = em->em_tid;
 1785 
 1786         return (0);
 1787 }
 1788 
 1789 int
 1790 linux_getppid(struct thread *td, struct linux_getppid_args *args)
 1791 {
 1792 
 1793         td->td_retval[0] = kern_getppid(td);
 1794         return (0);
 1795 }
 1796 
 1797 int
 1798 linux_getgid(struct thread *td, struct linux_getgid_args *args)
 1799 {
 1800 
 1801         td->td_retval[0] = td->td_ucred->cr_rgid;
 1802         return (0);
 1803 }
 1804 
 1805 int
 1806 linux_getuid(struct thread *td, struct linux_getuid_args *args)
 1807 {
 1808 
 1809         td->td_retval[0] = td->td_ucred->cr_ruid;
 1810         return (0);
 1811 }
 1812 
 1813 int
 1814 linux_getsid(struct thread *td, struct linux_getsid_args *args)
 1815 {
 1816 
 1817         return (kern_getsid(td, args->pid));
 1818 }
 1819 
 1820 int
 1821 linux_nosys(struct thread *td, struct nosys_args *ignore)
 1822 {
 1823 
 1824         return (ENOSYS);
 1825 }
 1826 
 1827 int
 1828 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
 1829 {
 1830         int error;
 1831 
 1832         error = kern_getpriority(td, args->which, args->who);
 1833         td->td_retval[0] = 20 - td->td_retval[0];
 1834         return (error);
 1835 }
 1836 
 1837 int
 1838 linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
 1839 {
 1840         int name[2];
 1841 
 1842         name[0] = CTL_KERN;
 1843         name[1] = KERN_HOSTNAME;
 1844         return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
 1845             args->len, 0, 0));
 1846 }
 1847 
 1848 int
 1849 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
 1850 {
 1851         int name[2];
 1852 
 1853         name[0] = CTL_KERN;
 1854         name[1] = KERN_NISDOMAINNAME;
 1855         return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
 1856             args->len, 0, 0));
 1857 }
 1858 
 1859 int
 1860 linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
 1861 {
 1862 
 1863         LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
 1864             args->error_code);
 1865 
 1866         /*
 1867          * XXX: we should send a signal to the parent if
 1868          * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
 1869          * as it doesnt occur often.
 1870          */
 1871         exit1(td, args->error_code, 0);
 1872                 /* NOTREACHED */
 1873 }
 1874 
 1875 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
 1876 #define _LINUX_CAPABILITY_VERSION_2  0x20071026
 1877 #define _LINUX_CAPABILITY_VERSION_3  0x20080522
 1878 
 1879 struct l_user_cap_header {
 1880         l_int   version;
 1881         l_int   pid;
 1882 };
 1883 
 1884 struct l_user_cap_data {
 1885         l_int   effective;
 1886         l_int   permitted;
 1887         l_int   inheritable;
 1888 };
 1889 
 1890 int
 1891 linux_capget(struct thread *td, struct linux_capget_args *uap)
 1892 {
 1893         struct l_user_cap_header luch;
 1894         struct l_user_cap_data lucd[2];
 1895         int error, u32s;
 1896 
 1897         if (uap->hdrp == NULL)
 1898                 return (EFAULT);
 1899 
 1900         error = copyin(uap->hdrp, &luch, sizeof(luch));
 1901         if (error != 0)
 1902                 return (error);
 1903 
 1904         switch (luch.version) {
 1905         case _LINUX_CAPABILITY_VERSION_1:
 1906                 u32s = 1;
 1907                 break;
 1908         case _LINUX_CAPABILITY_VERSION_2:
 1909         case _LINUX_CAPABILITY_VERSION_3:
 1910                 u32s = 2;
 1911                 break;
 1912         default:
 1913                 luch.version = _LINUX_CAPABILITY_VERSION_1;
 1914                 error = copyout(&luch, uap->hdrp, sizeof(luch));
 1915                 if (error)
 1916                         return (error);
 1917                 return (EINVAL);
 1918         }
 1919 
 1920         if (luch.pid)
 1921                 return (EPERM);
 1922 
 1923         if (uap->datap) {
 1924                 /*
 1925                  * The current implementation doesn't support setting
 1926                  * a capability (it's essentially a stub) so indicate
 1927                  * that no capabilities are currently set or available
 1928                  * to request.
 1929                  */
 1930                 memset(&lucd, 0, u32s * sizeof(lucd[0]));
 1931                 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0]));
 1932         }
 1933 
 1934         return (error);
 1935 }
 1936 
 1937 int
 1938 linux_capset(struct thread *td, struct linux_capset_args *uap)
 1939 {
 1940         struct l_user_cap_header luch;
 1941         struct l_user_cap_data lucd[2];
 1942         int error, i, u32s;
 1943 
 1944         if (uap->hdrp == NULL || uap->datap == NULL)
 1945                 return (EFAULT);
 1946 
 1947         error = copyin(uap->hdrp, &luch, sizeof(luch));
 1948         if (error != 0)
 1949                 return (error);
 1950 
 1951         switch (luch.version) {
 1952         case _LINUX_CAPABILITY_VERSION_1:
 1953                 u32s = 1;
 1954                 break;
 1955         case _LINUX_CAPABILITY_VERSION_2:
 1956         case _LINUX_CAPABILITY_VERSION_3:
 1957                 u32s = 2;
 1958                 break;
 1959         default:
 1960                 luch.version = _LINUX_CAPABILITY_VERSION_1;
 1961                 error = copyout(&luch, uap->hdrp, sizeof(luch));
 1962                 if (error)
 1963                         return (error);
 1964                 return (EINVAL);
 1965         }
 1966 
 1967         if (luch.pid)
 1968                 return (EPERM);
 1969 
 1970         error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0]));
 1971         if (error != 0)
 1972                 return (error);
 1973 
 1974         /* We currently don't support setting any capabilities. */
 1975         for (i = 0; i < u32s; i++) {
 1976                 if (lucd[i].effective || lucd[i].permitted ||
 1977                     lucd[i].inheritable) {
 1978                         linux_msg(td,
 1979                             "capset[%d] effective=0x%x, permitted=0x%x, "
 1980                             "inheritable=0x%x is not implemented", i,
 1981                             (int)lucd[i].effective, (int)lucd[i].permitted,
 1982                             (int)lucd[i].inheritable);
 1983                         return (EPERM);
 1984                 }
 1985         }
 1986 
 1987         return (0);
 1988 }
 1989 
 1990 int
 1991 linux_prctl(struct thread *td, struct linux_prctl_args *args)
 1992 {
 1993         int error = 0, max_size, arg;
 1994         struct proc *p = td->td_proc;
 1995         char comm[LINUX_MAX_COMM_LEN];
 1996         int pdeath_signal, trace_state;
 1997 
 1998         switch (args->option) {
 1999         case LINUX_PR_SET_PDEATHSIG:
 2000                 if (!LINUX_SIG_VALID(args->arg2))
 2001                         return (EINVAL);
 2002                 pdeath_signal = linux_to_bsd_signal(args->arg2);
 2003                 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL,
 2004                     &pdeath_signal));
 2005         case LINUX_PR_GET_PDEATHSIG:
 2006                 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS,
 2007                     &pdeath_signal);
 2008                 if (error != 0)
 2009                         return (error);
 2010                 pdeath_signal = bsd_to_linux_signal(pdeath_signal);
 2011                 return (copyout(&pdeath_signal,
 2012                     (void *)(register_t)args->arg2,
 2013                     sizeof(pdeath_signal)));
 2014         /*
 2015          * In Linux, this flag controls if set[gu]id processes can coredump.
 2016          * There are additional semantics imposed on processes that cannot
 2017          * coredump:
 2018          * - Such processes can not be ptraced.
 2019          * - There are some semantics around ownership of process-related files
 2020          *   in the /proc namespace.
 2021          *
 2022          * In FreeBSD, we can (and by default, do) disable setuid coredump
 2023          * system-wide with 'sugid_coredump.'  We control tracability on a
 2024          * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag).
 2025          * By happy coincidence, P2_NOTRACE also prevents coredumping.  So the
 2026          * procctl is roughly analogous to Linux's DUMPABLE.
 2027          *
 2028          * So, proxy these knobs to the corresponding PROC_TRACE setting.
 2029          */
 2030         case LINUX_PR_GET_DUMPABLE:
 2031                 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS,
 2032                     &trace_state);
 2033                 if (error != 0)
 2034                         return (error);
 2035                 td->td_retval[0] = (trace_state != -1);
 2036                 return (0);
 2037         case LINUX_PR_SET_DUMPABLE:
 2038                 /*
 2039                  * It is only valid for userspace to set one of these two
 2040                  * flags, and only one at a time.
 2041                  */
 2042                 switch (args->arg2) {
 2043                 case LINUX_SUID_DUMP_DISABLE:
 2044                         trace_state = PROC_TRACE_CTL_DISABLE_EXEC;
 2045                         break;
 2046                 case LINUX_SUID_DUMP_USER:
 2047                         trace_state = PROC_TRACE_CTL_ENABLE;
 2048                         break;
 2049                 default:
 2050                         return (EINVAL);
 2051                 }
 2052                 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL,
 2053                     &trace_state));
 2054         case LINUX_PR_GET_KEEPCAPS:
 2055                 /*
 2056                  * Indicate that we always clear the effective and
 2057                  * permitted capability sets when the user id becomes
 2058                  * non-zero (actually the capability sets are simply
 2059                  * always zero in the current implementation).
 2060                  */
 2061                 td->td_retval[0] = 0;
 2062                 break;
 2063         case LINUX_PR_SET_KEEPCAPS:
 2064                 /*
 2065                  * Ignore requests to keep the effective and permitted
 2066                  * capability sets when the user id becomes non-zero.
 2067                  */
 2068                 break;
 2069         case LINUX_PR_SET_NAME:
 2070                 /*
 2071                  * To be on the safe side we need to make sure to not
 2072                  * overflow the size a Linux program expects. We already
 2073                  * do this here in the copyin, so that we don't need to
 2074                  * check on copyout.
 2075                  */
 2076                 max_size = MIN(sizeof(comm), sizeof(p->p_comm));
 2077                 error = copyinstr((void *)(register_t)args->arg2, comm,
 2078                     max_size, NULL);
 2079 
 2080                 /* Linux silently truncates the name if it is too long. */
 2081                 if (error == ENAMETOOLONG) {
 2082                         /*
 2083                          * XXX: copyinstr() isn't documented to populate the
 2084                          * array completely, so do a copyin() to be on the
 2085                          * safe side. This should be changed in case
 2086                          * copyinstr() is changed to guarantee this.
 2087                          */
 2088                         error = copyin((void *)(register_t)args->arg2, comm,
 2089                             max_size - 1);
 2090                         comm[max_size - 1] = '\0';
 2091                 }
 2092                 if (error)
 2093                         return (error);
 2094 
 2095                 PROC_LOCK(p);
 2096                 strlcpy(p->p_comm, comm, sizeof(p->p_comm));
 2097                 PROC_UNLOCK(p);
 2098                 break;
 2099         case LINUX_PR_GET_NAME:
 2100                 PROC_LOCK(p);
 2101                 strlcpy(comm, p->p_comm, sizeof(comm));
 2102                 PROC_UNLOCK(p);
 2103                 error = copyout(comm, (void *)(register_t)args->arg2,
 2104                     strlen(comm) + 1);
 2105                 break;
 2106         case LINUX_PR_GET_SECCOMP:
 2107         case LINUX_PR_SET_SECCOMP:
 2108                 /*
 2109                  * Same as returned by Linux without CONFIG_SECCOMP enabled.
 2110                  */
 2111                 error = EINVAL;
 2112                 break;
 2113         case LINUX_PR_CAPBSET_READ:
 2114 #if 0
 2115                 /*
 2116                  * This makes too much noise with Ubuntu Focal.
 2117                  */
 2118                 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d",
 2119                     (int)args->arg2);
 2120 #endif
 2121                 error = EINVAL;
 2122                 break;
 2123         case LINUX_PR_SET_NO_NEW_PRIVS:
 2124                 arg = args->arg2 == 1 ?
 2125                     PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
 2126                 error = kern_procctl(td, P_PID, p->p_pid,
 2127                     PROC_NO_NEW_PRIVS_CTL, &arg);
 2128                 break;
 2129         case LINUX_PR_SET_PTRACER:
 2130                 linux_msg(td, "unsupported prctl PR_SET_PTRACER");
 2131                 error = EINVAL;
 2132                 break;
 2133         default:
 2134                 linux_msg(td, "unsupported prctl option %d", args->option);
 2135                 error = EINVAL;
 2136                 break;
 2137         }
 2138 
 2139         return (error);
 2140 }
 2141 
 2142 int
 2143 linux_sched_setparam(struct thread *td,
 2144     struct linux_sched_setparam_args *uap)
 2145 {
 2146         struct sched_param sched_param;
 2147         struct thread *tdt;
 2148         int error, policy;
 2149 
 2150         error = copyin(uap->param, &sched_param, sizeof(sched_param));
 2151         if (error)
 2152                 return (error);
 2153 
 2154         tdt = linux_tdfind(td, uap->pid, -1);
 2155         if (tdt == NULL)
 2156                 return (ESRCH);
 2157 
 2158         if (linux_map_sched_prio) {
 2159                 error = kern_sched_getscheduler(td, tdt, &policy);
 2160                 if (error)
 2161                         goto out;
 2162 
 2163                 switch (policy) {
 2164                 case SCHED_OTHER:
 2165                         if (sched_param.sched_priority != 0) {
 2166                                 error = EINVAL;
 2167                                 goto out;
 2168                         }
 2169                         sched_param.sched_priority =
 2170                             PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
 2171                         break;
 2172                 case SCHED_FIFO:
 2173                 case SCHED_RR:
 2174                         if (sched_param.sched_priority < 1 ||
 2175                             sched_param.sched_priority >= LINUX_MAX_RT_PRIO) {
 2176                                 error = EINVAL;
 2177                                 goto out;
 2178                         }
 2179                         /*
 2180                          * Map [1, LINUX_MAX_RT_PRIO - 1] to
 2181                          * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
 2182                          */
 2183                         sched_param.sched_priority =
 2184                             (sched_param.sched_priority - 1) *
 2185                             (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
 2186                             (LINUX_MAX_RT_PRIO - 1);
 2187                         break;
 2188                 }
 2189         }
 2190 
 2191         error = kern_sched_setparam(td, tdt, &sched_param);
 2192 out:    PROC_UNLOCK(tdt->td_proc);
 2193         return (error);
 2194 }
 2195 
 2196 int
 2197 linux_sched_getparam(struct thread *td,
 2198     struct linux_sched_getparam_args *uap)
 2199 {
 2200         struct sched_param sched_param;
 2201         struct thread *tdt;
 2202         int error, policy;
 2203 
 2204         tdt = linux_tdfind(td, uap->pid, -1);
 2205         if (tdt == NULL)
 2206                 return (ESRCH);
 2207 
 2208         error = kern_sched_getparam(td, tdt, &sched_param);
 2209         if (error) {
 2210                 PROC_UNLOCK(tdt->td_proc);
 2211                 return (error);
 2212         }
 2213 
 2214         if (linux_map_sched_prio) {
 2215                 error = kern_sched_getscheduler(td, tdt, &policy);
 2216                 PROC_UNLOCK(tdt->td_proc);
 2217                 if (error)
 2218                         return (error);
 2219 
 2220                 switch (policy) {
 2221                 case SCHED_OTHER:
 2222                         sched_param.sched_priority = 0;
 2223                         break;
 2224                 case SCHED_FIFO:
 2225                 case SCHED_RR:
 2226                         /*
 2227                          * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to
 2228                          * [1, LINUX_MAX_RT_PRIO - 1] (rounding up).
 2229                          */
 2230                         sched_param.sched_priority =
 2231                             (sched_param.sched_priority *
 2232                             (LINUX_MAX_RT_PRIO - 1) +
 2233                             (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) /
 2234                             (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1;
 2235                         break;
 2236                 }
 2237         } else
 2238                 PROC_UNLOCK(tdt->td_proc);
 2239 
 2240         error = copyout(&sched_param, uap->param, sizeof(sched_param));
 2241         return (error);
 2242 }
 2243 
 2244 /*
 2245  * Get affinity of a process.
 2246  */
 2247 int
 2248 linux_sched_getaffinity(struct thread *td,
 2249     struct linux_sched_getaffinity_args *args)
 2250 {
 2251         struct thread *tdt;
 2252         cpuset_t *mask;
 2253         size_t size;
 2254         int error;
 2255         id_t tid;
 2256 
 2257         tdt = linux_tdfind(td, args->pid, -1);
 2258         if (tdt == NULL)
 2259                 return (ESRCH);
 2260         tid = tdt->td_tid;
 2261         PROC_UNLOCK(tdt->td_proc);
 2262 
 2263         mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO);
 2264         size = min(args->len, sizeof(cpuset_t));
 2265         error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
 2266             tid, size, mask);
 2267         if (error == ERANGE)
 2268                 error = EINVAL;
 2269         if (error == 0)
 2270                 error = copyout(mask, args->user_mask_ptr, size);
 2271         if (error == 0)
 2272                 td->td_retval[0] = size;
 2273         free(mask, M_LINUX);
 2274         return (error);
 2275 }
 2276 
 2277 /*
 2278  *  Set affinity of a process.
 2279  */
 2280 int
 2281 linux_sched_setaffinity(struct thread *td,
 2282     struct linux_sched_setaffinity_args *args)
 2283 {
 2284         struct thread *tdt;
 2285         cpuset_t *mask;
 2286         int cpu, error;
 2287         size_t len;
 2288         id_t tid;
 2289 
 2290         tdt = linux_tdfind(td, args->pid, -1);
 2291         if (tdt == NULL)
 2292                 return (ESRCH);
 2293         tid = tdt->td_tid;
 2294         PROC_UNLOCK(tdt->td_proc);
 2295 
 2296         len = min(args->len, sizeof(cpuset_t));
 2297         mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);;
 2298         error = copyin(args->user_mask_ptr, mask, len);
 2299         if (error != 0)
 2300                 goto out;
 2301         /* Linux ignore high bits */
 2302         CPU_FOREACH_ISSET(cpu, mask)
 2303                 if (cpu > mp_maxid)
 2304                         CPU_CLR(cpu, mask);
 2305 
 2306         error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
 2307             tid, mask);
 2308         if (error == EDEADLK)
 2309                 error = EINVAL;
 2310 out:
 2311         free(mask, M_TEMP);
 2312         return (error);
 2313 }
 2314 
 2315 struct linux_rlimit64 {
 2316         uint64_t        rlim_cur;
 2317         uint64_t        rlim_max;
 2318 };
 2319 
 2320 int
 2321 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
 2322 {
 2323         struct rlimit rlim, nrlim;
 2324         struct linux_rlimit64 lrlim;
 2325         struct proc *p;
 2326         u_int which;
 2327         int flags;
 2328         int error;
 2329 
 2330         if (args->new == NULL && args->old != NULL) {
 2331                 if (linux_get_dummy_limit(args->resource, &rlim)) {
 2332                         lrlim.rlim_cur = rlim.rlim_cur;
 2333                         lrlim.rlim_max = rlim.rlim_max;
 2334                         return (copyout(&lrlim, args->old, sizeof(lrlim)));
 2335                 }
 2336         }
 2337 
 2338         if (args->resource >= LINUX_RLIM_NLIMITS)
 2339                 return (EINVAL);
 2340 
 2341         which = linux_to_bsd_resource[args->resource];
 2342         if (which == -1)
 2343                 return (EINVAL);
 2344 
 2345         if (args->new != NULL) {
 2346                 /*
 2347                  * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
 2348                  * rlim is unsigned 64-bit. FreeBSD treats negative limits
 2349                  * as INFINITY so we do not need a conversion even.
 2350                  */
 2351                 error = copyin(args->new, &nrlim, sizeof(nrlim));
 2352                 if (error != 0)
 2353                         return (error);
 2354         }
 2355 
 2356         flags = PGET_HOLD | PGET_NOTWEXIT;
 2357         if (args->new != NULL)
 2358                 flags |= PGET_CANDEBUG;
 2359         else
 2360                 flags |= PGET_CANSEE;
 2361         if (args->pid == 0) {
 2362                 p = td->td_proc;
 2363                 PHOLD(p);
 2364         } else {
 2365                 error = pget(args->pid, flags, &p);
 2366                 if (error != 0)
 2367                         return (error);
 2368         }
 2369         if (args->old != NULL) {
 2370                 PROC_LOCK(p);
 2371                 lim_rlimit_proc(p, which, &rlim);
 2372                 PROC_UNLOCK(p);
 2373                 if (rlim.rlim_cur == RLIM_INFINITY)
 2374                         lrlim.rlim_cur = LINUX_RLIM_INFINITY;
 2375                 else
 2376                         lrlim.rlim_cur = rlim.rlim_cur;
 2377                 if (rlim.rlim_max == RLIM_INFINITY)
 2378                         lrlim.rlim_max = LINUX_RLIM_INFINITY;
 2379                 else
 2380                         lrlim.rlim_max = rlim.rlim_max;
 2381                 error = copyout(&lrlim, args->old, sizeof(lrlim));
 2382                 if (error != 0)
 2383                         goto out;
 2384         }
 2385 
 2386         if (args->new != NULL)
 2387                 error = kern_proc_setrlimit(td, p, which, &nrlim);
 2388 
 2389  out:
 2390         PRELE(p);
 2391         return (error);
 2392 }
 2393 
 2394 int
 2395 linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
 2396 {
 2397         struct timespec ts, *tsp;
 2398         int error;
 2399 
 2400         if (args->tsp != NULL) {
 2401                 error = linux_get_timespec(&ts, args->tsp);
 2402                 if (error != 0)
 2403                         return (error);
 2404                 tsp = &ts;
 2405         } else
 2406                 tsp = NULL;
 2407 
 2408         error = linux_common_pselect6(td, args->nfds, args->readfds,
 2409             args->writefds, args->exceptfds, tsp, args->sig);
 2410 
 2411         if (args->tsp != NULL)
 2412                 linux_put_timespec(&ts, args->tsp);
 2413         return (error);
 2414 }
 2415 
 2416 static int
 2417 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds,
 2418     l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp,
 2419     l_uintptr_t *sig)
 2420 {
 2421         struct timeval utv, tv0, tv1, *tvp;
 2422         struct l_pselect6arg lpse6;
 2423         sigset_t *ssp;
 2424         sigset_t ss;
 2425         int error;
 2426 
 2427         ssp = NULL;
 2428         if (sig != NULL) {
 2429                 error = copyin(sig, &lpse6, sizeof(lpse6));
 2430                 if (error != 0)
 2431                         return (error);
 2432                 error = linux_copyin_sigset(td, PTRIN(lpse6.ss),
 2433                     lpse6.ss_len, &ss, &ssp);
 2434                 if (error != 0)
 2435                     return (error);
 2436         } else
 2437                 ssp = NULL;
 2438 
 2439         /*
 2440          * Currently glibc changes nanosecond number to microsecond.
 2441          * This mean losing precision but for now it is hardly seen.
 2442          */
 2443         if (tsp != NULL) {
 2444                 TIMESPEC_TO_TIMEVAL(&utv, tsp);
 2445                 if (itimerfix(&utv))
 2446                         return (EINVAL);
 2447 
 2448                 microtime(&tv0);
 2449                 tvp = &utv;
 2450         } else
 2451                 tvp = NULL;
 2452 
 2453         error = kern_pselect(td, nfds, readfds, writefds,
 2454             exceptfds, tvp, ssp, LINUX_NFDBITS);
 2455 
 2456         if (tsp != NULL) {
 2457                 /*
 2458                  * Compute how much time was left of the timeout,
 2459                  * by subtracting the current time and the time
 2460                  * before we started the call, and subtracting
 2461                  * that result from the user-supplied value.
 2462                  */
 2463                 microtime(&tv1);
 2464                 timevalsub(&tv1, &tv0);
 2465                 timevalsub(&utv, &tv1);
 2466                 if (utv.tv_sec < 0)
 2467                         timevalclear(&utv);
 2468                 TIMEVAL_TO_TIMESPEC(&utv, tsp);
 2469         }
 2470         return (error);
 2471 }
 2472 
 2473 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 2474 int
 2475 linux_pselect6_time64(struct thread *td,
 2476     struct linux_pselect6_time64_args *args)
 2477 {
 2478         struct timespec ts, *tsp;
 2479         int error;
 2480 
 2481         if (args->tsp != NULL) {
 2482                 error = linux_get_timespec64(&ts, args->tsp);
 2483                 if (error != 0)
 2484                         return (error);
 2485                 tsp = &ts;
 2486         } else
 2487                 tsp = NULL;
 2488 
 2489         error = linux_common_pselect6(td, args->nfds, args->readfds,
 2490             args->writefds, args->exceptfds, tsp, args->sig);
 2491 
 2492         if (args->tsp != NULL)
 2493                 linux_put_timespec64(&ts, args->tsp);
 2494         return (error);
 2495 }
 2496 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 2497 
 2498 int
 2499 linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
 2500 {
 2501         struct timespec uts, *tsp;
 2502         int error;
 2503 
 2504         if (args->tsp != NULL) {
 2505                 error = linux_get_timespec(&uts, args->tsp);
 2506                 if (error != 0)
 2507                         return (error);
 2508                 tsp = &uts;
 2509         } else
 2510                 tsp = NULL;
 2511 
 2512         error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
 2513             args->sset, args->ssize);
 2514         if (error == 0 && args->tsp != NULL)
 2515                 error = linux_put_timespec(&uts, args->tsp);
 2516         return (error);
 2517 }
 2518 
 2519 static int
 2520 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
 2521     struct timespec *tsp, l_sigset_t *sset, l_size_t ssize)
 2522 {
 2523         struct timespec ts0, ts1;
 2524         struct pollfd stackfds[32];
 2525         struct pollfd *kfds;
 2526         sigset_t *ssp;
 2527         sigset_t ss;
 2528         int error;
 2529 
 2530         if (kern_poll_maxfds(nfds))
 2531                 return (EINVAL);
 2532         if (sset != NULL) {
 2533                 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp);
 2534                 if (error != 0)
 2535                     return (error);
 2536         } else
 2537                 ssp = NULL;
 2538         if (tsp != NULL)
 2539                 nanotime(&ts0);
 2540 
 2541         if (nfds > nitems(stackfds))
 2542                 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK);
 2543         else
 2544                 kfds = stackfds;
 2545         error = linux_pollin(td, kfds, fds, nfds);
 2546         if (error != 0)
 2547                 goto out;
 2548 
 2549         error = kern_poll_kfds(td, kfds, nfds, tsp, ssp);
 2550         if (error == 0)
 2551                 error = linux_pollout(td, kfds, fds, nfds);
 2552 
 2553         if (error == 0 && tsp != NULL) {
 2554                 if (td->td_retval[0]) {
 2555                         nanotime(&ts1);
 2556                         timespecsub(&ts1, &ts0, &ts1);
 2557                         timespecsub(tsp, &ts1, tsp);
 2558                         if (tsp->tv_sec < 0)
 2559                                 timespecclear(tsp);
 2560                 } else
 2561                         timespecclear(tsp);
 2562         }
 2563 
 2564 out:
 2565         if (nfds > nitems(stackfds))
 2566                 free(kfds, M_TEMP);
 2567         return (error);
 2568 }
 2569 
 2570 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 2571 int
 2572 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args)
 2573 {
 2574         struct timespec uts, *tsp;
 2575         int error;
 2576 
 2577         if (args->tsp != NULL) {
 2578                 error = linux_get_timespec64(&uts, args->tsp);
 2579                 if (error != 0)
 2580                         return (error);
 2581                 tsp = &uts;
 2582         } else
 2583                 tsp = NULL;
 2584         error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
 2585             args->sset, args->ssize);
 2586         if (error == 0 && args->tsp != NULL)
 2587                 error = linux_put_timespec64(&uts, args->tsp);
 2588         return (error);
 2589 }
 2590 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 2591 
 2592 static int
 2593 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
 2594 {
 2595         int error;
 2596         u_int i;
 2597 
 2598         error = copyin(ufds, fds, nfd * sizeof(*fds));
 2599         if (error != 0)
 2600                 return (error);
 2601 
 2602         for (i = 0; i < nfd; i++) {
 2603                 if (fds->events != 0)
 2604                         linux_to_bsd_poll_events(td, fds->fd,
 2605                             fds->events, &fds->events);
 2606                 fds++;
 2607         }
 2608         return (0);
 2609 }
 2610 
 2611 static int
 2612 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
 2613 {
 2614         int error = 0;
 2615         u_int i, n = 0;
 2616 
 2617         for (i = 0; i < nfd; i++) {
 2618                 if (fds->revents != 0) {
 2619                         bsd_to_linux_poll_events(fds->revents,
 2620                             &fds->revents);
 2621                         n++;
 2622                 }
 2623                 error = copyout(&fds->revents, &ufds->revents,
 2624                     sizeof(ufds->revents));
 2625                 if (error)
 2626                         return (error);
 2627                 fds++;
 2628                 ufds++;
 2629         }
 2630         td->td_retval[0] = n;
 2631         return (0);
 2632 }
 2633 
 2634 static int
 2635 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid,
 2636     struct timespec *ts)
 2637 {
 2638         struct thread *tdt;
 2639         int error;
 2640 
 2641         /*
 2642          * According to man in case the invalid pid specified
 2643          * EINVAL should be returned.
 2644          */
 2645         if (pid < 0)
 2646                 return (EINVAL);
 2647 
 2648         tdt = linux_tdfind(td, pid, -1);
 2649         if (tdt == NULL)
 2650                 return (ESRCH);
 2651 
 2652         error = kern_sched_rr_get_interval_td(td, tdt, ts);
 2653         PROC_UNLOCK(tdt->td_proc);
 2654         return (error);
 2655 }
 2656 
 2657 int
 2658 linux_sched_rr_get_interval(struct thread *td,
 2659     struct linux_sched_rr_get_interval_args *uap)
 2660 {
 2661         struct timespec ts;
 2662         int error;
 2663 
 2664         error = linux_sched_rr_get_interval_common(td, uap->pid, &ts);
 2665         if (error != 0)
 2666                 return (error);
 2667         return (linux_put_timespec(&ts, uap->interval));
 2668 }
 2669 
 2670 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 2671 int
 2672 linux_sched_rr_get_interval_time64(struct thread *td,
 2673     struct linux_sched_rr_get_interval_time64_args *uap)
 2674 {
 2675         struct timespec ts;
 2676         int error;
 2677 
 2678         error = linux_sched_rr_get_interval_common(td, uap->pid, &ts);
 2679         if (error != 0)
 2680                 return (error);
 2681         return (linux_put_timespec64(&ts, uap->interval));
 2682 }
 2683 #endif
 2684 
 2685 /*
 2686  * In case when the Linux thread is the initial thread in
 2687  * the thread group thread id is equal to the process id.
 2688  * Glibc depends on this magic (assert in pthread_getattr_np.c).
 2689  */
 2690 struct thread *
 2691 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
 2692 {
 2693         struct linux_emuldata *em;
 2694         struct thread *tdt;
 2695         struct proc *p;
 2696 
 2697         tdt = NULL;
 2698         if (tid == 0 || tid == td->td_tid) {
 2699                 if (pid != -1 && td->td_proc->p_pid != pid)
 2700                         return (NULL);
 2701                 PROC_LOCK(td->td_proc);
 2702                 return (td);
 2703         } else if (tid > PID_MAX)
 2704                 return (tdfind(tid, pid));
 2705 
 2706         /*
 2707          * Initial thread where the tid equal to the pid.
 2708          */
 2709         p = pfind(tid);
 2710         if (p != NULL) {
 2711                 if (SV_PROC_ABI(p) != SV_ABI_LINUX ||
 2712                     (pid != -1 && tid != pid)) {
 2713                         /*
 2714                          * p is not a Linuxulator process.
 2715                          */
 2716                         PROC_UNLOCK(p);
 2717                         return (NULL);
 2718                 }
 2719                 FOREACH_THREAD_IN_PROC(p, tdt) {
 2720                         em = em_find(tdt);
 2721                         if (tid == em->em_tid)
 2722                                 return (tdt);
 2723                 }
 2724                 PROC_UNLOCK(p);
 2725         }
 2726         return (NULL);
 2727 }
 2728 
 2729 void
 2730 linux_to_bsd_waitopts(int options, int *bsdopts)
 2731 {
 2732 
 2733         if (options & LINUX_WNOHANG)
 2734                 *bsdopts |= WNOHANG;
 2735         if (options & LINUX_WUNTRACED)
 2736                 *bsdopts |= WUNTRACED;
 2737         if (options & LINUX_WEXITED)
 2738                 *bsdopts |= WEXITED;
 2739         if (options & LINUX_WCONTINUED)
 2740                 *bsdopts |= WCONTINUED;
 2741         if (options & LINUX_WNOWAIT)
 2742                 *bsdopts |= WNOWAIT;
 2743 
 2744         if (options & __WCLONE)
 2745                 *bsdopts |= WLINUXCLONE;
 2746 }
 2747 
 2748 int
 2749 linux_getrandom(struct thread *td, struct linux_getrandom_args *args)
 2750 {
 2751         struct uio uio;
 2752         struct iovec iov;
 2753         int error;
 2754 
 2755         if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM))
 2756                 return (EINVAL);
 2757         if (args->count > INT_MAX)
 2758                 args->count = INT_MAX;
 2759 
 2760         iov.iov_base = args->buf;
 2761         iov.iov_len = args->count;
 2762 
 2763         uio.uio_iov = &iov;
 2764         uio.uio_iovcnt = 1;
 2765         uio.uio_resid = iov.iov_len;
 2766         uio.uio_segflg = UIO_USERSPACE;
 2767         uio.uio_rw = UIO_READ;
 2768         uio.uio_td = td;
 2769 
 2770         error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK);
 2771         if (error == 0)
 2772                 td->td_retval[0] = args->count - uio.uio_resid;
 2773         return (error);
 2774 }
 2775 
 2776 int
 2777 linux_mincore(struct thread *td, struct linux_mincore_args *args)
 2778 {
 2779 
 2780         /* Needs to be page-aligned */
 2781         if (args->start & PAGE_MASK)
 2782                 return (EINVAL);
 2783         return (kern_mincore(td, args->start, args->len, args->vec));
 2784 }
 2785 
 2786 #define SYSLOG_TAG      "<6>"
 2787 
 2788 int
 2789 linux_syslog(struct thread *td, struct linux_syslog_args *args)
 2790 {
 2791         char buf[128], *src, *dst;
 2792         u_int seq;
 2793         int buflen, error;
 2794 
 2795         if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) {
 2796                 linux_msg(td, "syslog unsupported type 0x%x", args->type);
 2797                 return (EINVAL);
 2798         }
 2799 
 2800         if (args->len < 6) {
 2801                 td->td_retval[0] = 0;
 2802                 return (0);
 2803         }
 2804 
 2805         error = priv_check(td, PRIV_MSGBUF);
 2806         if (error)
 2807                 return (error);
 2808 
 2809         mtx_lock(&msgbuf_lock);
 2810         msgbuf_peekbytes(msgbufp, NULL, 0, &seq);
 2811         mtx_unlock(&msgbuf_lock);
 2812 
 2813         dst = args->buf;
 2814         error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG));
 2815         /* The -1 is to skip the trailing '\0'. */
 2816         dst += sizeof(SYSLOG_TAG) - 1;
 2817 
 2818         while (error == 0) {
 2819                 mtx_lock(&msgbuf_lock);
 2820                 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq);
 2821                 mtx_unlock(&msgbuf_lock);
 2822 
 2823                 if (buflen == 0)
 2824                         break;
 2825 
 2826                 for (src = buf; src < buf + buflen && error == 0; src++) {
 2827                         if (*src == '\0')
 2828                                 continue;
 2829 
 2830                         if (dst >= args->buf + args->len)
 2831                                 goto out;
 2832 
 2833                         error = copyout(src, dst, 1);
 2834                         dst++;
 2835 
 2836                         if (*src == '\n' && *(src + 1) != '<' &&
 2837                             dst + sizeof(SYSLOG_TAG) < args->buf + args->len) {
 2838                                 error = copyout(&SYSLOG_TAG,
 2839                                     dst, sizeof(SYSLOG_TAG));
 2840                                 dst += sizeof(SYSLOG_TAG) - 1;
 2841                         }
 2842                 }
 2843         }
 2844 out:
 2845         td->td_retval[0] = dst - args->buf;
 2846         return (error);
 2847 }
 2848 
 2849 int
 2850 linux_getcpu(struct thread *td, struct linux_getcpu_args *args)
 2851 {
 2852         int cpu, error, node;
 2853 
 2854         cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */
 2855         error = 0;
 2856         node = cpuid_to_pcpu[cpu]->pc_domain;
 2857 
 2858         if (args->cpu != NULL)
 2859                 error = copyout(&cpu, args->cpu, sizeof(l_int));
 2860         if (args->node != NULL)
 2861                 error = copyout(&node, args->node, sizeof(l_int));
 2862         return (error);
 2863 }
 2864 
 2865 #if defined(__i386__) || defined(__amd64__)
 2866 int
 2867 linux_poll(struct thread *td, struct linux_poll_args *args)
 2868 {
 2869         struct timespec ts, *tsp;
 2870 
 2871         if (args->timeout != INFTIM) {
 2872                 if (args->timeout < 0)
 2873                         return (EINVAL);
 2874                 ts.tv_sec = args->timeout / 1000;
 2875                 ts.tv_nsec = (args->timeout % 1000) * 1000000;
 2876                 tsp = &ts;
 2877         } else
 2878                 tsp = NULL;
 2879 
 2880         return (linux_common_ppoll(td, args->fds, args->nfds,
 2881             tsp, NULL, 0));
 2882 }
 2883 #endif /* __i386__ || __amd64__ */
 2884 
 2885 int
 2886 linux_seccomp(struct thread *td, struct linux_seccomp_args *args)
 2887 {
 2888 
 2889         switch (args->op) {
 2890         case LINUX_SECCOMP_GET_ACTION_AVAIL:
 2891                 return (EOPNOTSUPP);
 2892         default:
 2893                 /*
 2894                  * Ignore unknown operations, just like Linux kernel built
 2895                  * without CONFIG_SECCOMP.
 2896                  */
 2897                 return (EINVAL);
 2898         }
 2899 }
 2900 
 2901 #ifndef COMPAT_LINUX32
 2902 int
 2903 linux_execve(struct thread *td, struct linux_execve_args *args)
 2904 {
 2905         struct image_args eargs;
 2906         char *path;
 2907         int error;
 2908 
 2909         LINUX_CTR(execve);
 2910 
 2911         if (!LUSECONVPATH(td)) {
 2912                 error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE,
 2913                     args->argp, args->envp);
 2914         } else {
 2915                 LCONVPATHEXIST(args->path, &path);
 2916                 error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp,
 2917                     args->envp);
 2918                 LFREEPATH(path);
 2919         }
 2920         if (error == 0)
 2921                 error = linux_common_execve(td, &eargs);
 2922         AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 2923         return (error);
 2924 }
 2925 #endif

Cache object: becdce99a80516ee1a2154b39fd0f548


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.