The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_proc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_proc.c,v 1.269 2022/10/26 23:20:36 riastradh Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center, and by Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   35  *      The Regents of the University of California.  All rights reserved.
   36  *
   37  * Redistribution and use in source and binary forms, with or without
   38  * modification, are permitted provided that the following conditions
   39  * are met:
   40  * 1. Redistributions of source code must retain the above copyright
   41  *    notice, this list of conditions and the following disclaimer.
   42  * 2. Redistributions in binary form must reproduce the above copyright
   43  *    notice, this list of conditions and the following disclaimer in the
   44  *    documentation and/or other materials provided with the distribution.
   45  * 3. Neither the name of the University nor the names of its contributors
   46  *    may be used to endorse or promote products derived from this software
   47  *    without specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  *
   61  *      @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
   62  */
   63 
   64 #include <sys/cdefs.h>
   65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.269 2022/10/26 23:20:36 riastradh Exp $");
   66 
   67 #ifdef _KERNEL_OPT
   68 #include "opt_kstack.h"
   69 #include "opt_maxuprc.h"
   70 #include "opt_dtrace.h"
   71 #include "opt_compat_netbsd32.h"
   72 #include "opt_kaslr.h"
   73 #endif
   74 
   75 #if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \
   76     && !defined(_RUMPKERNEL)
   77 #define COMPAT_NETBSD32
   78 #endif
   79 
   80 #include <sys/param.h>
   81 #include <sys/systm.h>
   82 #include <sys/kernel.h>
   83 #include <sys/proc.h>
   84 #include <sys/resourcevar.h>
   85 #include <sys/buf.h>
   86 #include <sys/acct.h>
   87 #include <sys/wait.h>
   88 #include <sys/file.h>
   89 #include <ufs/ufs/quota.h>
   90 #include <sys/uio.h>
   91 #include <sys/pool.h>
   92 #include <sys/pset.h>
   93 #include <sys/ioctl.h>
   94 #include <sys/tty.h>
   95 #include <sys/signalvar.h>
   96 #include <sys/ras.h>
   97 #include <sys/filedesc.h>
   98 #include <sys/syscall_stats.h>
   99 #include <sys/kauth.h>
  100 #include <sys/sleepq.h>
  101 #include <sys/atomic.h>
  102 #include <sys/kmem.h>
  103 #include <sys/namei.h>
  104 #include <sys/dtrace_bsd.h>
  105 #include <sys/sysctl.h>
  106 #include <sys/exec.h>
  107 #include <sys/cpu.h>
  108 #include <sys/compat_stub.h>
  109 #include <sys/futex.h>
  110 #include <sys/pserialize.h>
  111 
  112 #include <uvm/uvm_extern.h>
  113 
  114 /*
  115  * Process lists.
  116  */
  117 
  118 struct proclist         allproc         __cacheline_aligned;
  119 struct proclist         zombproc        __cacheline_aligned;
  120 
  121  kmutex_t               proc_lock       __cacheline_aligned;
  122 static pserialize_t     proc_psz;
  123 
  124 /*
  125  * pid to lwp/proc lookup is done by indexing the pid_table array.
  126  * Since pid numbers are only allocated when an empty slot
  127  * has been found, there is no need to search any lists ever.
  128  * (an orphaned pgrp will lock the slot, a session will lock
  129  * the pgrp with the same number.)
  130  * If the table is too small it is reallocated with twice the
  131  * previous size and the entries 'unzipped' into the two halves.
  132  * A linked list of free entries is passed through the pt_lwp
  133  * field of 'free' items - set odd to be an invalid ptr.  Two
  134  * additional bits are also used to indicate if the slot is
  135  * currently occupied by a proc or lwp, and if the PID is
  136  * hidden from certain kinds of lookups.  We thus require a
  137  * minimum alignment for proc and lwp structures (LWPs are
  138  * at least 32-byte aligned).
  139  */
  140 
  141 struct pid_table {
  142         uintptr_t       pt_slot;
  143         struct pgrp     *pt_pgrp;
  144         pid_t           pt_pid;
  145 };
  146 
  147 #define PT_F_FREE               ((uintptr_t)__BIT(0))
  148 #define PT_F_LWP                0       /* pseudo-flag */
  149 #define PT_F_PROC               ((uintptr_t)__BIT(1))
  150 
  151 #define PT_F_TYPEBITS           (PT_F_FREE|PT_F_PROC)
  152 #define PT_F_ALLBITS            (PT_F_FREE|PT_F_PROC)
  153 
  154 #define PT_VALID(s)             (((s) & PT_F_FREE) == 0)
  155 #define PT_RESERVED(s)          ((s) == 0)
  156 #define PT_NEXT(s)              ((u_int)(s) >> 1)
  157 #define PT_SET_FREE(pid)        (((pid) << 1) | PT_F_FREE)
  158 #define PT_SET_LWP(l)           ((uintptr_t)(l))
  159 #define PT_SET_PROC(p)          (((uintptr_t)(p)) | PT_F_PROC)
  160 #define PT_SET_RESERVED         0
  161 #define PT_GET_LWP(s)           ((struct lwp *)((s) & ~PT_F_ALLBITS))
  162 #define PT_GET_PROC(s)          ((struct proc *)((s) & ~PT_F_ALLBITS))
  163 #define PT_GET_TYPE(s)          ((s) & PT_F_TYPEBITS)
  164 #define PT_IS_LWP(s)            (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0)
  165 #define PT_IS_PROC(s)           (PT_GET_TYPE(s) == PT_F_PROC)
  166 
  167 #define MIN_PROC_ALIGNMENT      (PT_F_ALLBITS + 1)
  168 
  169 /*
  170  * Table of process IDs (PIDs).
  171  */
  172 static struct pid_table *pid_table      __read_mostly;
  173 
  174 #define INITIAL_PID_TABLE_SIZE          (1 << 5)
  175 
  176 /* Table mask, threshold for growing and number of allocated PIDs. */
  177 static u_int            pid_tbl_mask    __read_mostly;
  178 static u_int            pid_alloc_lim   __read_mostly;
  179 static u_int            pid_alloc_cnt   __cacheline_aligned;
  180 
  181 /* Next free, last free and maximum PIDs. */
  182 static u_int            next_free_pt    __cacheline_aligned;
  183 static u_int            last_free_pt    __cacheline_aligned;
  184 static pid_t            pid_max         __read_mostly;
  185 
  186 /* Components of the first process -- never freed. */
  187 
  188 struct session session0 = {
  189         .s_count = 1,
  190         .s_sid = 0,
  191 };
  192 struct pgrp pgrp0 = {
  193         .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
  194         .pg_session = &session0,
  195 };
  196 filedesc_t filedesc0;
  197 struct cwdinfo cwdi0 = {
  198         .cwdi_cmask = CMASK,
  199         .cwdi_refcnt = 1,
  200 };
  201 struct plimit limit0;
  202 struct pstats pstat0;
  203 struct vmspace vmspace0;
  204 struct sigacts sigacts0;
  205 struct proc proc0 = {
  206         .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
  207         .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
  208         .p_nlwps = 1,
  209         .p_nrlwps = 1,
  210         .p_pgrp = &pgrp0,
  211         .p_comm = "system",
  212         /*
  213          * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
  214          * when they exit.  init(8) can easily wait them out for us.
  215          */
  216         .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
  217         .p_stat = SACTIVE,
  218         .p_nice = NZERO,
  219         .p_emul = &emul_netbsd,
  220         .p_cwdi = &cwdi0,
  221         .p_limit = &limit0,
  222         .p_fd = &filedesc0,
  223         .p_vmspace = &vmspace0,
  224         .p_stats = &pstat0,
  225         .p_sigacts = &sigacts0,
  226 #ifdef PROC0_MD_INITIALIZERS
  227         PROC0_MD_INITIALIZERS
  228 #endif
  229 };
  230 kauth_cred_t cred0;
  231 
  232 static const int        nofile  = NOFILE;
  233 static const int        maxuprc = MAXUPRC;
  234 
  235 static int sysctl_doeproc(SYSCTLFN_PROTO);
  236 static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
  237 static int sysctl_security_expose_address(SYSCTLFN_PROTO);
  238 
  239 #ifdef KASLR
  240 static int kern_expose_address = 0;
  241 #else
  242 static int kern_expose_address = 1;
  243 #endif
  244 /*
  245  * The process list descriptors, used during pid allocation and
  246  * by sysctl.  No locking on this data structure is needed since
  247  * it is completely static.
  248  */
  249 const struct proclist_desc proclists[] = {
  250         { &allproc      },
  251         { &zombproc     },
  252         { NULL          },
  253 };
  254 
  255 static struct pgrp *    pg_remove(pid_t);
  256 static void             pg_delete(pid_t);
  257 static void             orphanpg(struct pgrp *);
  258 
  259 static specificdata_domain_t proc_specificdata_domain;
  260 
  261 static pool_cache_t proc_cache;
  262 
  263 static kauth_listener_t proc_listener;
  264 
  265 static void fill_proc(const struct proc *, struct proc *, bool);
  266 static int fill_pathname(struct lwp *, pid_t, void *, size_t *);
  267 static int fill_cwd(struct lwp *, pid_t, void *, size_t *);
  268 
  269 static int
  270 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
  271     void *arg0, void *arg1, void *arg2, void *arg3)
  272 {
  273         struct proc *p;
  274         int result;
  275 
  276         result = KAUTH_RESULT_DEFER;
  277         p = arg0;
  278 
  279         switch (action) {
  280         case KAUTH_PROCESS_CANSEE: {
  281                 enum kauth_process_req req;
  282 
  283                 req = (enum kauth_process_req)(uintptr_t)arg1;
  284 
  285                 switch (req) {
  286                 case KAUTH_REQ_PROCESS_CANSEE_ARGS:
  287                 case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
  288                 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
  289                 case KAUTH_REQ_PROCESS_CANSEE_EPROC:
  290                         result = KAUTH_RESULT_ALLOW;
  291                         break;
  292 
  293                 case KAUTH_REQ_PROCESS_CANSEE_ENV:
  294                         if (kauth_cred_getuid(cred) !=
  295                             kauth_cred_getuid(p->p_cred) ||
  296                             kauth_cred_getuid(cred) !=
  297                             kauth_cred_getsvuid(p->p_cred))
  298                                 break;
  299 
  300                         result = KAUTH_RESULT_ALLOW;
  301 
  302                         break;
  303 
  304                 case KAUTH_REQ_PROCESS_CANSEE_KPTR:
  305                         if (!kern_expose_address)
  306                                 break;
  307 
  308                         if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM))
  309                                 break;
  310 
  311                         result = KAUTH_RESULT_ALLOW;
  312 
  313                         break;
  314 
  315                 default:
  316                         break;
  317                 }
  318 
  319                 break;
  320                 }
  321 
  322         case KAUTH_PROCESS_FORK: {
  323                 int lnprocs = (int)(unsigned long)arg2;
  324 
  325                 /*
  326                  * Don't allow a nonprivileged user to use the last few
  327                  * processes. The variable lnprocs is the current number of
  328                  * processes, maxproc is the limit.
  329                  */
  330                 if (__predict_false((lnprocs >= maxproc - 5)))
  331                         break;
  332 
  333                 result = KAUTH_RESULT_ALLOW;
  334 
  335                 break;
  336                 }
  337 
  338         case KAUTH_PROCESS_CORENAME:
  339         case KAUTH_PROCESS_STOPFLAG:
  340                 if (proc_uidmatch(cred, p->p_cred) == 0)
  341                         result = KAUTH_RESULT_ALLOW;
  342 
  343                 break;
  344 
  345         default:
  346                 break;
  347         }
  348 
  349         return result;
  350 }
  351 
  352 static int
  353 proc_ctor(void *arg __unused, void *obj, int flags __unused)
  354 {
  355         struct proc *p = obj;
  356 
  357         memset(p, 0, sizeof(*p));
  358         klist_init(&p->p_klist);
  359 
  360         /*
  361          * There is no need for a proc_dtor() to do a klist_fini(),
  362          * since knote_proc_exit() ensures that p->p_klist is empty
  363          * when a process exits.
  364          */
  365 
  366         return 0;
  367 }
  368 
  369 static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t);
  370 
  371 /*
  372  * Initialize global process hashing structures.
  373  */
  374 void
  375 procinit(void)
  376 {
  377         const struct proclist_desc *pd;
  378         u_int i;
  379 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
  380 
  381         for (pd = proclists; pd->pd_list != NULL; pd++)
  382                 LIST_INIT(pd->pd_list);
  383 
  384         mutex_init(&proc_lock, MUTEX_DEFAULT, IPL_NONE);
  385 
  386         proc_psz = pserialize_create();
  387 
  388         pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
  389             * sizeof(struct pid_table), KM_SLEEP);
  390         pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
  391         pid_max = PID_MAX;
  392 
  393         /* Set free list running through table...
  394            Preset 'use count' above PID_MAX so we allocate pid 1 next. */
  395         for (i = 0; i <= pid_tbl_mask; i++) {
  396                 pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
  397                 pid_table[i].pt_pgrp = 0;
  398                 pid_table[i].pt_pid = 0;
  399         }
  400         /* slot 0 is just grabbed */
  401         next_free_pt = 1;
  402         /* Need to fix last entry. */
  403         last_free_pt = pid_tbl_mask;
  404         pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
  405         /* point at which we grow table - to avoid reusing pids too often */
  406         pid_alloc_lim = pid_tbl_mask - 1;
  407 #undef LINK_EMPTY
  408 
  409         /* Reserve PID 1 for init(8). */        /* XXX slightly gross */
  410         mutex_enter(&proc_lock);
  411         if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
  412                 panic("failed to reserve PID 1 for init(8)");
  413         mutex_exit(&proc_lock);
  414 
  415         proc_specificdata_domain = specificdata_domain_create();
  416         KASSERT(proc_specificdata_domain != NULL);
  417 
  418         size_t proc_alignment = coherency_unit;
  419         if (proc_alignment < MIN_PROC_ALIGNMENT)
  420                 proc_alignment = MIN_PROC_ALIGNMENT;
  421 
  422         proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0,
  423             "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL);
  424 
  425         proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
  426             proc_listener_cb, NULL);
  427 }
  428 
  429 void
  430 procinit_sysctl(void)
  431 {
  432         static struct sysctllog *clog;
  433 
  434         sysctl_createv(&clog, 0, NULL, NULL,
  435                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
  436                        CTLTYPE_INT, "expose_address",
  437                        SYSCTL_DESCR("Enable exposing kernel addresses"),
  438                        sysctl_security_expose_address, 0,
  439                        &kern_expose_address, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
  440         sysctl_createv(&clog, 0, NULL, NULL,
  441                        CTLFLAG_PERMANENT,
  442                        CTLTYPE_NODE, "proc",
  443                        SYSCTL_DESCR("System-wide process information"),
  444                        sysctl_doeproc, 0, NULL, 0,
  445                        CTL_KERN, KERN_PROC, CTL_EOL);
  446         sysctl_createv(&clog, 0, NULL, NULL,
  447                        CTLFLAG_PERMANENT,
  448                        CTLTYPE_NODE, "proc2",
  449                        SYSCTL_DESCR("Machine-independent process information"),
  450                        sysctl_doeproc, 0, NULL, 0,
  451                        CTL_KERN, KERN_PROC2, CTL_EOL);
  452         sysctl_createv(&clog, 0, NULL, NULL,
  453                        CTLFLAG_PERMANENT,
  454                        CTLTYPE_NODE, "proc_args",
  455                        SYSCTL_DESCR("Process argument information"),
  456                        sysctl_kern_proc_args, 0, NULL, 0,
  457                        CTL_KERN, KERN_PROC_ARGS, CTL_EOL);
  458 
  459         /*
  460           "nodes" under these:
  461 
  462           KERN_PROC_ALL
  463           KERN_PROC_PID pid
  464           KERN_PROC_PGRP pgrp
  465           KERN_PROC_SESSION sess
  466           KERN_PROC_TTY tty
  467           KERN_PROC_UID uid
  468           KERN_PROC_RUID uid
  469           KERN_PROC_GID gid
  470           KERN_PROC_RGID gid
  471 
  472           all in all, probably not worth the effort...
  473         */
  474 }
  475 
  476 /*
  477  * Initialize process 0.
  478  */
  479 void
  480 proc0_init(void)
  481 {
  482         struct proc *p;
  483         struct pgrp *pg;
  484         struct rlimit *rlim;
  485         rlim_t lim;
  486         int i;
  487 
  488         p = &proc0;
  489         pg = &pgrp0;
  490 
  491         mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
  492         mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
  493         p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  494 
  495         rw_init(&p->p_reflock);
  496         cv_init(&p->p_waitcv, "wait");
  497         cv_init(&p->p_lwpcv, "lwpwait");
  498 
  499         LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
  500 
  501         KASSERT(lwp0.l_lid == 0);
  502         pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0);
  503         LIST_INSERT_HEAD(&allproc, p, p_list);
  504 
  505         pid_table[lwp0.l_lid].pt_pgrp = pg;
  506         LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
  507 
  508 #ifdef __HAVE_SYSCALL_INTERN
  509         (*p->p_emul->e_syscall_intern)(p);
  510 #endif
  511 
  512         /* Create credentials. */
  513         cred0 = kauth_cred_alloc();
  514         p->p_cred = cred0;
  515 
  516         /* Create the CWD info. */
  517         rw_init(&cwdi0.cwdi_lock);
  518 
  519         /* Create the limits structures. */
  520         mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
  521 
  522         rlim = limit0.pl_rlimit;
  523         for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) {
  524                 rlim[i].rlim_cur = RLIM_INFINITY;
  525                 rlim[i].rlim_max = RLIM_INFINITY;
  526         }
  527 
  528         rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
  529         rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;
  530 
  531         rlim[RLIMIT_NPROC].rlim_max = maxproc;
  532         rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;
  533 
  534         lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvm_availmem(false)));
  535         rlim[RLIMIT_RSS].rlim_max = lim;
  536         rlim[RLIMIT_MEMLOCK].rlim_max = lim;
  537         rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
  538 
  539         rlim[RLIMIT_NTHR].rlim_max = maxlwp;
  540         rlim[RLIMIT_NTHR].rlim_cur = maxlwp / 2;
  541 
  542         /* Note that default core name has zero length. */
  543         limit0.pl_corename = defcorename;
  544         limit0.pl_cnlen = 0;
  545         limit0.pl_refcnt = 1;
  546         limit0.pl_writeable = false;
  547         limit0.pl_sv_limit = NULL;
  548 
  549         /* Configure virtual memory system, set vm rlimits. */
  550         uvm_init_limits(p);
  551 
  552         /* Initialize file descriptor table for proc0. */
  553         fd_init(&filedesc0);
  554 
  555         /*
  556          * Initialize proc0's vmspace, which uses the kernel pmap.
  557          * All kernel processes (which never have user space mappings)
  558          * share proc0's vmspace, and thus, the kernel pmap.
  559          */
  560         uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
  561             trunc_page(VM_MAXUSER_ADDRESS),
  562 #ifdef __USE_TOPDOWN_VM
  563             true
  564 #else
  565             false
  566 #endif
  567             );
  568 
  569         /* Initialize signal state for proc0. XXX IPL_SCHED */
  570         mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
  571         siginit(p);
  572 
  573         proc_initspecific(p);
  574         kdtrace_proc_ctor(NULL, p);
  575 }
  576 
  577 /*
  578  * Session reference counting.
  579  */
  580 
  581 void
  582 proc_sesshold(struct session *ss)
  583 {
  584 
  585         KASSERT(mutex_owned(&proc_lock));
  586         ss->s_count++;
  587 }
  588 
  589 void
  590 proc_sessrele(struct session *ss)
  591 {
  592         struct pgrp *pg;
  593 
  594         KASSERT(mutex_owned(&proc_lock));
  595         KASSERT(ss->s_count > 0);
  596 
  597         /*
  598          * We keep the pgrp with the same id as the session in order to
  599          * stop a process being given the same pid.  Since the pgrp holds
  600          * a reference to the session, it must be a 'zombie' pgrp by now.
  601          */
  602         if (--ss->s_count == 0) {
  603                 pg = pg_remove(ss->s_sid);
  604         } else {
  605                 pg = NULL;
  606                 ss = NULL;
  607         }
  608 
  609         mutex_exit(&proc_lock);
  610 
  611         if (pg)
  612                 kmem_free(pg, sizeof(struct pgrp));
  613         if (ss)
  614                 kmem_free(ss, sizeof(struct session));
  615 }
  616 
  617 /*
  618  * Check that the specified process group is in the session of the
  619  * specified process.
  620  * Treats -ve ids as process ids.
  621  * Used to validate TIOCSPGRP requests.
  622  */
  623 int
  624 pgid_in_session(struct proc *p, pid_t pg_id)
  625 {
  626         struct pgrp *pgrp;
  627         struct session *session;
  628         int error;
  629 
  630         if (pg_id == INT_MIN)
  631                 return EINVAL;
  632 
  633         mutex_enter(&proc_lock);
  634         if (pg_id < 0) {
  635                 struct proc *p1 = proc_find(-pg_id);
  636                 if (p1 == NULL) {
  637                         error = EINVAL;
  638                         goto fail;
  639                 }
  640                 pgrp = p1->p_pgrp;
  641         } else {
  642                 pgrp = pgrp_find(pg_id);
  643                 if (pgrp == NULL) {
  644                         error = EINVAL;
  645                         goto fail;
  646                 }
  647         }
  648         session = pgrp->pg_session;
  649         error = (session != p->p_pgrp->pg_session) ? EPERM : 0;
  650 fail:
  651         mutex_exit(&proc_lock);
  652         return error;
  653 }
  654 
  655 /*
  656  * p_inferior: is p an inferior of q?
  657  */
  658 static inline bool
  659 p_inferior(struct proc *p, struct proc *q)
  660 {
  661 
  662         KASSERT(mutex_owned(&proc_lock));
  663 
  664         for (; p != q; p = p->p_pptr)
  665                 if (p->p_pid == 0)
  666                         return false;
  667         return true;
  668 }
  669 
  670 /*
  671  * proc_find_lwp: locate an lwp in said proc by the ID.
  672  *
  673  * => Must be called with p::p_lock held.
  674  * => LSIDL lwps are not returned because they are only partially
  675  *    constructed while occupying the slot.
  676  * => Callers need to be careful about lwp::l_stat of the returned
  677  *    lwp.
  678  */
  679 struct lwp *
  680 proc_find_lwp(proc_t *p, pid_t pid)
  681 {
  682         struct pid_table *pt;
  683         unsigned pt_mask;
  684         struct lwp *l = NULL;
  685         uintptr_t slot;
  686         int s;
  687 
  688         KASSERT(mutex_owned(p->p_lock));
  689 
  690         /*
  691          * Look in the pid_table.  This is done unlocked inside a
  692          * pserialize read section covering pid_table's memory
  693          * allocation only, so take care to read things in the correct
  694          * order:
  695          *
  696          * 1. First read the table mask -- this only ever increases, in
  697          *    expand_pid_table, so a stale value is safely
  698          *    conservative.
  699          *
  700          * 2. Next read the pid table -- this is always set _before_
  701          *    the mask increases, so if we see a new table and stale
  702          *    mask, the mask is still valid for the table.
  703          */
  704         s = pserialize_read_enter();
  705         pt_mask = atomic_load_acquire(&pid_tbl_mask);
  706         pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
  707         slot = atomic_load_consume(&pt->pt_slot);
  708         if (__predict_false(!PT_IS_LWP(slot))) {
  709                 pserialize_read_exit(s);
  710                 return NULL;
  711         }
  712 
  713         /*
  714          * Check to see if the LWP is from the correct process.  We won't
  715          * see entries in pid_table from a prior process that also used "p",
  716          * by virtue of the fact that allocating "p" means all prior updates
  717          * to dependant data structures are visible to this thread.
  718          */
  719         l = PT_GET_LWP(slot);
  720         if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) {
  721                 pserialize_read_exit(s);
  722                 return NULL;
  723         }
  724 
  725         /*
  726          * We now know that p->p_lock holds this LWP stable.
  727          *
  728          * If the status is not LSIDL, it means the LWP is intended to be
  729          * findable by LID and l_lid cannot change behind us.
  730          *
  731          * No need to acquire the LWP's lock to check for LSIDL, as
  732          * p->p_lock must be held to transition in and out of LSIDL.
  733          * Any other observed state of is no particular interest.
  734          */
  735         pserialize_read_exit(s);
  736         return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL;
  737 }
  738 
  739 /*
  740  * proc_find_lwp_unlocked: locate an lwp in said proc by the ID.
  741  *
  742  * => Called in a pserialize read section with no locks held.
  743  * => LSIDL lwps are not returned because they are only partially
  744  *    constructed while occupying the slot.
  745  * => Callers need to be careful about lwp::l_stat of the returned
  746  *    lwp.
  747  * => If an LWP is found, it's returned locked.
  748  */
  749 struct lwp *
  750 proc_find_lwp_unlocked(proc_t *p, pid_t pid)
  751 {
  752         struct pid_table *pt;
  753         unsigned pt_mask;
  754         struct lwp *l = NULL;
  755         uintptr_t slot;
  756 
  757         KASSERT(pserialize_in_read_section());
  758 
  759         /*
  760          * Look in the pid_table.  This is done unlocked inside a
  761          * pserialize read section covering pid_table's memory
  762          * allocation only, so take care to read things in the correct
  763          * order:
  764          *
  765          * 1. First read the table mask -- this only ever increases, in
  766          *    expand_pid_table, so a stale value is safely
  767          *    conservative.
  768          *
  769          * 2. Next read the pid table -- this is always set _before_
  770          *    the mask increases, so if we see a new table and stale
  771          *    mask, the mask is still valid for the table.
  772          */
  773         pt_mask = atomic_load_acquire(&pid_tbl_mask);
  774         pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
  775         slot = atomic_load_consume(&pt->pt_slot);
  776         if (__predict_false(!PT_IS_LWP(slot))) {
  777                 return NULL;
  778         }
  779 
  780         /*
  781          * Lock the LWP we found to get it stable.  If it's embryonic or
  782          * reaped (LSIDL) then none of the other fields can safely be
  783          * checked.
  784          */
  785         l = PT_GET_LWP(slot);
  786         lwp_lock(l);
  787         if (__predict_false(l->l_stat == LSIDL)) {
  788                 lwp_unlock(l);
  789                 return NULL;
  790         }
  791 
  792         /*
  793          * l_proc and l_lid are now known stable because the LWP is not
  794          * LSIDL, so check those fields too to make sure we found the
  795          * right thing.
  796          */
  797         if (__predict_false(l->l_proc != p || l->l_lid != pid)) {
  798                 lwp_unlock(l);
  799                 return NULL;
  800         }
  801 
  802         /* Everything checks out, return it locked. */
  803         return l;
  804 }
  805 
  806 /*
  807  * proc_find_lwp_acquire_proc: locate an lwp and acquire a lock
  808  * on its containing proc.
  809  *
  810  * => Similar to proc_find_lwp(), but does not require you to have
  811  *    the proc a priori.
  812  * => Also returns proc * to caller, with p::p_lock held.
  813  * => Same caveats apply.
  814  */
  815 struct lwp *
  816 proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp)
  817 {
  818         struct pid_table *pt;
  819         struct proc *p = NULL;
  820         struct lwp *l = NULL;
  821         uintptr_t slot;
  822 
  823         KASSERT(pp != NULL);
  824         mutex_enter(&proc_lock);
  825         pt = &pid_table[pid & pid_tbl_mask];
  826 
  827         slot = pt->pt_slot;
  828         if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
  829                 l = PT_GET_LWP(slot);
  830                 p = l->l_proc;
  831                 mutex_enter(p->p_lock);
  832                 if (__predict_false(l->l_stat == LSIDL)) {
  833                         mutex_exit(p->p_lock);
  834                         l = NULL;
  835                         p = NULL;
  836                 }
  837         }
  838         mutex_exit(&proc_lock);
  839 
  840         KASSERT(p == NULL || mutex_owned(p->p_lock));
  841         *pp = p;
  842         return l;
  843 }
  844 
  845 /*
  846  * proc_find_raw_pid_table_locked: locate a process by the ID.
  847  *
  848  * => Must be called with proc_lock held.
  849  */
  850 static proc_t *
  851 proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
  852 {
  853         struct pid_table *pt;
  854         proc_t *p = NULL;
  855         uintptr_t slot;
  856 
  857         /* No - used by DDB.  KASSERT(mutex_owned(&proc_lock)); */
  858         pt = &pid_table[pid & pid_tbl_mask];
  859 
  860         slot = pt->pt_slot;
  861         if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
  862                 /*
  863                  * When looking up processes, require a direct match
  864                  * on the PID assigned to the proc, not just one of
  865                  * its LWPs.
  866                  *
  867                  * N.B. We require lwp::l_proc of LSIDL LWPs to be
  868                  * valid here.
  869                  */
  870                 p = PT_GET_LWP(slot)->l_proc;
  871                 if (__predict_false(p->p_pid != pid && !any_lwpid))
  872                         p = NULL;
  873         } else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
  874                 p = PT_GET_PROC(slot);
  875         }
  876         return p;
  877 }
  878 
  879 proc_t *
  880 proc_find_raw(pid_t pid)
  881 {
  882 
  883         return proc_find_raw_pid_table_locked(pid, false);
  884 }
  885 
  886 static proc_t *
  887 proc_find_internal(pid_t pid, bool any_lwpid)
  888 {
  889         proc_t *p;
  890 
  891         KASSERT(mutex_owned(&proc_lock));
  892 
  893         p = proc_find_raw_pid_table_locked(pid, any_lwpid);
  894         if (__predict_false(p == NULL)) {
  895                 return NULL;
  896         }
  897 
  898         /*
  899          * Only allow live processes to be found by PID.
  900          * XXX: p_stat might change, since proc unlocked.
  901          */
  902         if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
  903                 return p;
  904         }
  905         return NULL;
  906 }
  907 
  908 proc_t *
  909 proc_find(pid_t pid)
  910 {
  911         return proc_find_internal(pid, false);
  912 }
  913 
  914 proc_t *
  915 proc_find_lwpid(pid_t pid)
  916 {
  917         return proc_find_internal(pid, true);
  918 }
  919 
  920 /*
  921  * pgrp_find: locate a process group by the ID.
  922  *
  923  * => Must be called with proc_lock held.
  924  */
  925 struct pgrp *
  926 pgrp_find(pid_t pgid)
  927 {
  928         struct pgrp *pg;
  929 
  930         KASSERT(mutex_owned(&proc_lock));
  931 
  932         pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
  933 
  934         /*
  935          * Cannot look up a process group that only exists because the
  936          * session has not died yet (traditional).
  937          */
  938         if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
  939                 return NULL;
  940         }
  941         return pg;
  942 }
  943 
  944 static void
  945 expand_pid_table(void)
  946 {
  947         size_t pt_size, tsz;
  948         struct pid_table *n_pt, *new_pt;
  949         uintptr_t slot;
  950         struct pgrp *pgrp;
  951         pid_t pid, rpid;
  952         u_int i;
  953         uint new_pt_mask;
  954 
  955         KASSERT(mutex_owned(&proc_lock));
  956 
  957         /* Unlock the pid_table briefly to allocate memory. */
  958         pt_size = pid_tbl_mask + 1;
  959         mutex_exit(&proc_lock);
  960 
  961         tsz = pt_size * 2 * sizeof(struct pid_table);
  962         new_pt = kmem_alloc(tsz, KM_SLEEP);
  963         new_pt_mask = pt_size * 2 - 1;
  964 
  965         /* XXX For now.  The pratical limit is much lower anyway. */
  966         KASSERT(new_pt_mask <= FUTEX_TID_MASK);
  967 
  968         mutex_enter(&proc_lock);
  969         if (pt_size != pid_tbl_mask + 1) {
  970                 /* Another process beat us to it... */
  971                 mutex_exit(&proc_lock);
  972                 kmem_free(new_pt, tsz);
  973                 goto out;
  974         }
  975 
  976         /*
  977          * Copy entries from old table into new one.
  978          * If 'pid' is 'odd' we need to place in the upper half,
  979          * even pid's to the lower half.
  980          * Free items stay in the low half so we don't have to
  981          * fixup the reference to them.
  982          * We stuff free items on the front of the freelist
  983          * because we can't write to unmodified entries.
  984          * Processing the table backwards maintains a semblance
  985          * of issuing pid numbers that increase with time.
  986          */
  987         i = pt_size - 1;
  988         n_pt = new_pt + i;
  989         for (; ; i--, n_pt--) {
  990                 slot = pid_table[i].pt_slot;
  991                 pgrp = pid_table[i].pt_pgrp;
  992                 if (!PT_VALID(slot)) {
  993                         /* Up 'use count' so that link is valid */
  994                         pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
  995                         rpid = 0;
  996                         slot = PT_SET_FREE(pid);
  997                         if (pgrp)
  998                                 pid = pgrp->pg_id;
  999                 } else {
 1000                         pid = pid_table[i].pt_pid;
 1001                         rpid = pid;
 1002                 }
 1003 
 1004                 /* Save entry in appropriate half of table */
 1005                 n_pt[pid & pt_size].pt_slot = slot;
 1006                 n_pt[pid & pt_size].pt_pgrp = pgrp;
 1007                 n_pt[pid & pt_size].pt_pid = rpid;
 1008 
 1009                 /* Put other piece on start of free list */
 1010                 pid = (pid ^ pt_size) & ~pid_tbl_mask;
 1011                 n_pt[pid & pt_size].pt_slot =
 1012                         PT_SET_FREE((pid & ~pt_size) | next_free_pt);
 1013                 n_pt[pid & pt_size].pt_pgrp = 0;
 1014                 n_pt[pid & pt_size].pt_pid = 0;
 1015 
 1016                 next_free_pt = i | (pid & pt_size);
 1017                 if (i == 0)
 1018                         break;
 1019         }
 1020 
 1021         /* Save old table size and switch tables */
 1022         tsz = pt_size * sizeof(struct pid_table);
 1023         n_pt = pid_table;
 1024         atomic_store_release(&pid_table, new_pt);
 1025         KASSERT(new_pt_mask >= pid_tbl_mask);
 1026         atomic_store_release(&pid_tbl_mask, new_pt_mask);
 1027 
 1028         /*
 1029          * pid_max starts as PID_MAX (= 30000), once we have 16384
 1030          * allocated pids we need it to be larger!
 1031          */
 1032         if (pid_tbl_mask > PID_MAX) {
 1033                 pid_max = pid_tbl_mask * 2 + 1;
 1034                 pid_alloc_lim |= pid_alloc_lim << 1;
 1035         } else
 1036                 pid_alloc_lim <<= 1;    /* doubles number of free slots... */
 1037 
 1038         mutex_exit(&proc_lock);
 1039 
 1040         /*
 1041          * Make sure that unlocked access to the old pid_table is complete
 1042          * and then free it.
 1043          */
 1044         pserialize_perform(proc_psz);
 1045         kmem_free(n_pt, tsz);
 1046 
 1047  out:   /* Return with proc_lock held again. */
 1048         mutex_enter(&proc_lock);
 1049 }
 1050 
 1051 struct proc *
 1052 proc_alloc(void)
 1053 {
 1054         struct proc *p;
 1055 
 1056         p = pool_cache_get(proc_cache, PR_WAITOK);
 1057         p->p_stat = SIDL;                       /* protect against others */
 1058         proc_initspecific(p);
 1059         kdtrace_proc_ctor(NULL, p);
 1060 
 1061         /*
 1062          * Allocate a placeholder in the pid_table.  When we create the
 1063          * first LWP for this process, it will take ownership of the
 1064          * slot.
 1065          */
 1066         if (__predict_false(proc_alloc_pid(p) == -1)) {
 1067                 /* Allocating the PID failed; unwind. */
 1068                 proc_finispecific(p);
 1069                 proc_free_mem(p);
 1070                 p = NULL;
 1071         }
 1072         return p;
 1073 }
 1074 
 1075 /*
 1076  * proc_alloc_pid_slot: allocate PID and record the occcupant so that
 1077  * proc_find_raw() can find it by the PID.
 1078  */
 1079 static pid_t __noinline
 1080 proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
 1081 {
 1082         struct pid_table *pt;
 1083         pid_t pid;
 1084         int nxt;
 1085 
 1086         KASSERT(mutex_owned(&proc_lock));
 1087 
 1088         for (;;expand_pid_table()) {
 1089                 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
 1090                         /* ensure pids cycle through 2000+ values */
 1091                         continue;
 1092                 }
 1093                 /*
 1094                  * The first user process *must* be given PID 1.
 1095                  * it has already been reserved for us.  This
 1096                  * will be coming in from the proc_alloc() call
 1097                  * above, and the entry will be usurped later when
 1098                  * the first user LWP is created.
 1099                  * XXX this is slightly gross.
 1100                  */
 1101                 if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
 1102                                     p != &proc0)) {
 1103                         KASSERT(PT_IS_PROC(slot));
 1104                         pt = &pid_table[1];
 1105                         pt->pt_slot = slot;
 1106                         return 1;
 1107                 }
 1108                 pt = &pid_table[next_free_pt];
 1109 #ifdef DIAGNOSTIC
 1110                 if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
 1111                         panic("proc_alloc: slot busy");
 1112 #endif
 1113                 nxt = PT_NEXT(pt->pt_slot);
 1114                 if (nxt & pid_tbl_mask)
 1115                         break;
 1116                 /* Table full - expand (NB last entry not used....) */
 1117         }
 1118 
 1119         /* pid is 'saved use count' + 'size' + entry */
 1120         pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
 1121         if ((uint)pid > (uint)pid_max)
 1122                 pid &= pid_tbl_mask;
 1123         next_free_pt = nxt & pid_tbl_mask;
 1124 
 1125         /* XXX For now.  The pratical limit is much lower anyway. */
 1126         KASSERT(pid <= FUTEX_TID_MASK);
 1127 
 1128         /* Grab table slot */
 1129         pt->pt_slot = slot;
 1130 
 1131         KASSERT(pt->pt_pid == 0);
 1132         pt->pt_pid = pid;
 1133         pid_alloc_cnt++;
 1134 
 1135         return pid;
 1136 }
 1137 
 1138 pid_t
 1139 proc_alloc_pid(struct proc *p)
 1140 {
 1141         pid_t pid;
 1142 
 1143         KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
 1144         KASSERT(p->p_stat == SIDL);
 1145 
 1146         mutex_enter(&proc_lock);
 1147         pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
 1148         if (pid != -1)
 1149                 p->p_pid = pid;
 1150         mutex_exit(&proc_lock);
 1151 
 1152         return pid;
 1153 }
 1154 
 1155 pid_t
 1156 proc_alloc_lwpid(struct proc *p, struct lwp *l)
 1157 {
 1158         struct pid_table *pt;
 1159         pid_t pid;
 1160 
 1161         KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
 1162         KASSERT(l->l_proc == p);
 1163         KASSERT(l->l_stat == LSIDL);
 1164 
 1165         /*
 1166          * For unlocked lookup in proc_find_lwp(), make sure l->l_proc
 1167          * is globally visible before the LWP becomes visible via the
 1168          * pid_table.
 1169          */
 1170 #ifndef __HAVE_ATOMIC_AS_MEMBAR
 1171         membar_producer();
 1172 #endif
 1173 
 1174         /*
 1175          * If the slot for p->p_pid currently points to the proc,
 1176          * then we should usurp this ID for the LWP.  This happens
 1177          * at least once per process (for the first LWP), and can
 1178          * happen again if the first LWP for a process exits and
 1179          * before the process creates another.
 1180          */
 1181         mutex_enter(&proc_lock);
 1182         pid = p->p_pid;
 1183         pt = &pid_table[pid & pid_tbl_mask];
 1184         KASSERT(pt->pt_pid == pid);
 1185         if (PT_IS_PROC(pt->pt_slot)) {
 1186                 KASSERT(PT_GET_PROC(pt->pt_slot) == p);
 1187                 l->l_lid = pid;
 1188                 pt->pt_slot = PT_SET_LWP(l);
 1189         } else {
 1190                 /* Need to allocate a new slot. */
 1191                 pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
 1192                 if (pid != -1)
 1193                         l->l_lid = pid;
 1194         }
 1195         mutex_exit(&proc_lock);
 1196 
 1197         return pid;
 1198 }
 1199 
 1200 static void __noinline
 1201 proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
 1202 {
 1203         struct pid_table *pt;
 1204 
 1205         KASSERT(mutex_owned(&proc_lock));
 1206 
 1207         pt = &pid_table[pid & pid_tbl_mask];
 1208 
 1209         KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
 1210         KASSERT(pt->pt_pid == pid);
 1211 
 1212         /* save pid use count in slot */
 1213         pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask);
 1214         pt->pt_pid = 0;
 1215 
 1216         if (pt->pt_pgrp == NULL) {
 1217                 /* link last freed entry onto ours */
 1218                 pid &= pid_tbl_mask;
 1219                 pt = &pid_table[last_free_pt];
 1220                 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid);
 1221                 pt->pt_pid = 0;
 1222                 last_free_pt = pid;
 1223                 pid_alloc_cnt--;
 1224         }
 1225 }
 1226 
 1227 /*
 1228  * Free a process id - called from proc_free (in kern_exit.c)
 1229  *
 1230  * Called with the proc_lock held.
 1231  */
 1232 void
 1233 proc_free_pid(pid_t pid)
 1234 {
 1235 
 1236         KASSERT(mutex_owned(&proc_lock));
 1237         proc_free_pid_internal(pid, PT_F_PROC);
 1238 }
 1239 
 1240 /*
 1241  * Free a process id used by an LWP.  If this was the process's
 1242  * first LWP, we convert the slot to point to the process; the
 1243  * entry will get cleaned up later when the process finishes exiting.
 1244  *
 1245  * If not, then it's the same as proc_free_pid().
 1246  */
 1247 void
 1248 proc_free_lwpid(struct proc *p, pid_t pid)
 1249 {
 1250 
 1251         KASSERT(mutex_owned(&proc_lock));
 1252 
 1253         if (__predict_true(p->p_pid == pid)) {
 1254                 struct pid_table *pt;
 1255 
 1256                 pt = &pid_table[pid & pid_tbl_mask];
 1257 
 1258                 KASSERT(pt->pt_pid == pid);
 1259                 KASSERT(PT_IS_LWP(pt->pt_slot));
 1260                 KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);
 1261 
 1262                 pt->pt_slot = PT_SET_PROC(p);
 1263                 return;
 1264         }
 1265         proc_free_pid_internal(pid, PT_F_LWP);
 1266 }
 1267 
 1268 void
 1269 proc_free_mem(struct proc *p)
 1270 {
 1271 
 1272         kdtrace_proc_dtor(NULL, p);
 1273         pool_cache_put(proc_cache, p);
 1274 }
 1275 
 1276 /*
 1277  * proc_enterpgrp: move p to a new or existing process group (and session).
 1278  *
 1279  * If we are creating a new pgrp, the pgid should equal
 1280  * the calling process' pid.
 1281  * If is only valid to enter a process group that is in the session
 1282  * of the process.
 1283  * Also mksess should only be set if we are creating a process group
 1284  *
 1285  * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
 1286  */
 1287 int
 1288 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
 1289 {
 1290         struct pgrp *new_pgrp, *pgrp;
 1291         struct session *sess;
 1292         struct proc *p;
 1293         int rval;
 1294         pid_t pg_id = NO_PGID;
 1295 
 1296         /* Allocate data areas we might need before doing any validity checks */
 1297         sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
 1298         new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
 1299 
 1300         mutex_enter(&proc_lock);
 1301         rval = EPERM;   /* most common error (to save typing) */
 1302 
 1303         /* Check pgrp exists or can be created */
 1304         pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
 1305         if (pgrp != NULL && pgrp->pg_id != pgid)
 1306                 goto done;
 1307 
 1308         /* Can only set another process under restricted circumstances. */
 1309         if (pid != curp->p_pid) {
 1310                 /* Must exist and be one of our children... */
 1311                 p = proc_find_internal(pid, false);
 1312                 if (p == NULL || !p_inferior(p, curp)) {
 1313                         rval = ESRCH;
 1314                         goto done;
 1315                 }
 1316                 /* ... in the same session... */
 1317                 if (sess != NULL || p->p_session != curp->p_session)
 1318                         goto done;
 1319                 /* ... existing pgid must be in same session ... */
 1320                 if (pgrp != NULL && pgrp->pg_session != p->p_session)
 1321                         goto done;
 1322                 /* ... and not done an exec. */
 1323                 if (p->p_flag & PK_EXEC) {
 1324                         rval = EACCES;
 1325                         goto done;
 1326                 }
 1327         } else {
 1328                 /* ... setsid() cannot re-enter a pgrp */
 1329                 if (mksess && (curp->p_pgid == curp->p_pid ||
 1330                     pgrp_find(curp->p_pid)))
 1331                         goto done;
 1332                 p = curp;
 1333         }
 1334 
 1335         /* Changing the process group/session of a session
 1336            leader is definitely off limits. */
 1337         if (SESS_LEADER(p)) {
 1338                 if (sess == NULL && p->p_pgrp == pgrp)
 1339                         /* unless it's a definite noop */
 1340                         rval = 0;
 1341                 goto done;
 1342         }
 1343 
 1344         /* Can only create a process group with id of process */
 1345         if (pgrp == NULL && pgid != pid)
 1346                 goto done;
 1347 
 1348         /* Can only create a session if creating pgrp */
 1349         if (sess != NULL && pgrp != NULL)
 1350                 goto done;
 1351 
 1352         /* Check we allocated memory for a pgrp... */
 1353         if (pgrp == NULL && new_pgrp == NULL)
 1354                 goto done;
 1355 
 1356         /* Don't attach to 'zombie' pgrp */
 1357         if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
 1358                 goto done;
 1359 
 1360         /* Expect to succeed now */
 1361         rval = 0;
 1362 
 1363         if (pgrp == p->p_pgrp)
 1364                 /* nothing to do */
 1365                 goto done;
 1366 
 1367         /* Ok all setup, link up required structures */
 1368 
 1369         if (pgrp == NULL) {
 1370                 pgrp = new_pgrp;
 1371                 new_pgrp = NULL;
 1372                 if (sess != NULL) {
 1373                         sess->s_sid = p->p_pid;
 1374                         sess->s_leader = p;
 1375                         sess->s_count = 1;
 1376                         sess->s_ttyvp = NULL;
 1377                         sess->s_ttyp = NULL;
 1378                         sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
 1379                         memcpy(sess->s_login, p->p_session->s_login,
 1380                             sizeof(sess->s_login));
 1381                         p->p_lflag &= ~PL_CONTROLT;
 1382                 } else {
 1383                         sess = p->p_pgrp->pg_session;
 1384                         proc_sesshold(sess);
 1385                 }
 1386                 pgrp->pg_session = sess;
 1387                 sess = NULL;
 1388 
 1389                 pgrp->pg_id = pgid;
 1390                 LIST_INIT(&pgrp->pg_members);
 1391 #ifdef DIAGNOSTIC
 1392                 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
 1393                         panic("enterpgrp: pgrp table slot in use");
 1394                 if (__predict_false(mksess && p != curp))
 1395                         panic("enterpgrp: mksession and p != curproc");
 1396 #endif
 1397                 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
 1398                 pgrp->pg_jobc = 0;
 1399         }
 1400 
 1401         /*
 1402          * Adjust eligibility of affected pgrps to participate in job control.
 1403          * Increment eligibility counts before decrementing, otherwise we
 1404          * could reach 0 spuriously during the first call.
 1405          */
 1406         fixjobc(p, pgrp, 1);
 1407         fixjobc(p, p->p_pgrp, 0);
 1408 
 1409         /* Interlock with ttread(). */
 1410         mutex_spin_enter(&tty_lock);
 1411 
 1412         /* Move process to requested group. */
 1413         LIST_REMOVE(p, p_pglist);
 1414         if (LIST_EMPTY(&p->p_pgrp->pg_members))
 1415                 /* defer delete until we've dumped the lock */
 1416                 pg_id = p->p_pgrp->pg_id;
 1417         p->p_pgrp = pgrp;
 1418         LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 1419 
 1420         /* Done with the swap; we can release the tty mutex. */
 1421         mutex_spin_exit(&tty_lock);
 1422 
 1423     done:
 1424         if (pg_id != NO_PGID) {
 1425                 /* Releases proc_lock. */
 1426                 pg_delete(pg_id);
 1427         } else {
 1428                 mutex_exit(&proc_lock);
 1429         }
 1430         if (sess != NULL)
 1431                 kmem_free(sess, sizeof(*sess));
 1432         if (new_pgrp != NULL)
 1433                 kmem_free(new_pgrp, sizeof(*new_pgrp));
 1434 #ifdef DEBUG_PGRP
 1435         if (__predict_false(rval))
 1436                 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
 1437                         pid, pgid, mksess, curp->p_pid, rval);
 1438 #endif
 1439         return rval;
 1440 }
 1441 
 1442 /*
 1443  * proc_leavepgrp: remove a process from its process group.
 1444  *  => must be called with the proc_lock held, which will be released;
 1445  */
 1446 void
 1447 proc_leavepgrp(struct proc *p)
 1448 {
 1449         struct pgrp *pgrp;
 1450 
 1451         KASSERT(mutex_owned(&proc_lock));
 1452 
 1453         /* Interlock with ttread() */
 1454         mutex_spin_enter(&tty_lock);
 1455         pgrp = p->p_pgrp;
 1456         LIST_REMOVE(p, p_pglist);
 1457         p->p_pgrp = NULL;
 1458         mutex_spin_exit(&tty_lock);
 1459 
 1460         if (LIST_EMPTY(&pgrp->pg_members)) {
 1461                 /* Releases proc_lock. */
 1462                 pg_delete(pgrp->pg_id);
 1463         } else {
 1464                 mutex_exit(&proc_lock);
 1465         }
 1466 }
 1467 
 1468 /*
 1469  * pg_remove: remove a process group from the table.
 1470  *  => must be called with the proc_lock held;
 1471  *  => returns process group to free;
 1472  */
 1473 static struct pgrp *
 1474 pg_remove(pid_t pg_id)
 1475 {
 1476         struct pgrp *pgrp;
 1477         struct pid_table *pt;
 1478 
 1479         KASSERT(mutex_owned(&proc_lock));
 1480 
 1481         pt = &pid_table[pg_id & pid_tbl_mask];
 1482         pgrp = pt->pt_pgrp;
 1483 
 1484         KASSERT(pgrp != NULL);
 1485         KASSERT(pgrp->pg_id == pg_id);
 1486         KASSERT(LIST_EMPTY(&pgrp->pg_members));
 1487 
 1488         pt->pt_pgrp = NULL;
 1489 
 1490         if (!PT_VALID(pt->pt_slot)) {
 1491                 /* Orphaned pgrp, put slot onto free list. */
 1492                 KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0);
 1493                 pg_id &= pid_tbl_mask;
 1494                 pt = &pid_table[last_free_pt];
 1495                 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id);
 1496                 KASSERT(pt->pt_pid == 0);
 1497                 last_free_pt = pg_id;
 1498                 pid_alloc_cnt--;
 1499         }
 1500         return pgrp;
 1501 }
 1502 
 1503 /*
 1504  * pg_delete: delete and free a process group.
 1505  *  => must be called with the proc_lock held, which will be released.
 1506  */
 1507 static void
 1508 pg_delete(pid_t pg_id)
 1509 {
 1510         struct pgrp *pg;
 1511         struct tty *ttyp;
 1512         struct session *ss;
 1513 
 1514         KASSERT(mutex_owned(&proc_lock));
 1515 
 1516         pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
 1517         if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
 1518                 mutex_exit(&proc_lock);
 1519                 return;
 1520         }
 1521 
 1522         ss = pg->pg_session;
 1523 
 1524         /* Remove reference (if any) from tty to this process group */
 1525         mutex_spin_enter(&tty_lock);
 1526         ttyp = ss->s_ttyp;
 1527         if (ttyp != NULL && ttyp->t_pgrp == pg) {
 1528                 ttyp->t_pgrp = NULL;
 1529                 KASSERT(ttyp->t_session == ss);
 1530         }
 1531         mutex_spin_exit(&tty_lock);
 1532 
 1533         /*
 1534          * The leading process group in a session is freed by proc_sessrele(),
 1535          * if last reference.  It will also release the locks.
 1536          */
 1537         pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
 1538         proc_sessrele(ss);
 1539 
 1540         if (pg != NULL) {
 1541                 /* Free it, if was not done above. */
 1542                 kmem_free(pg, sizeof(struct pgrp));
 1543         }
 1544 }
 1545 
 1546 /*
 1547  * Adjust pgrp jobc counters when specified process changes process group.
 1548  * We count the number of processes in each process group that "qualify"
 1549  * the group for terminal job control (those with a parent in a different
 1550  * process group of the same session).  If that count reaches zero, the
 1551  * process group becomes orphaned.  Check both the specified process'
 1552  * process group and that of its children.
 1553  * entering == 0 => p is leaving specified group.
 1554  * entering == 1 => p is entering specified group.
 1555  *
 1556  * Call with proc_lock held.
 1557  */
 1558 void
 1559 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
 1560 {
 1561         struct pgrp *hispgrp;
 1562         struct session *mysession = pgrp->pg_session;
 1563         struct proc *child;
 1564 
 1565         KASSERT(mutex_owned(&proc_lock));
 1566 
 1567         /*
 1568          * Check p's parent to see whether p qualifies its own process
 1569          * group; if so, adjust count for p's process group.
 1570          */
 1571         hispgrp = p->p_pptr->p_pgrp;
 1572         if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
 1573                 if (entering) {
 1574                         pgrp->pg_jobc++;
 1575                         p->p_lflag &= ~PL_ORPHANPG;
 1576                 } else {
 1577                         /* KASSERT(pgrp->pg_jobc > 0); */
 1578                         if (--pgrp->pg_jobc == 0)
 1579                                 orphanpg(pgrp);
 1580                 }
 1581         }
 1582 
 1583         /*
 1584          * Check this process' children to see whether they qualify
 1585          * their process groups; if so, adjust counts for children's
 1586          * process groups.
 1587          */
 1588         LIST_FOREACH(child, &p->p_children, p_sibling) {
 1589                 hispgrp = child->p_pgrp;
 1590                 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
 1591                     !P_ZOMBIE(child)) {
 1592                         if (entering) {
 1593                                 child->p_lflag &= ~PL_ORPHANPG;
 1594                                 hispgrp->pg_jobc++;
 1595                         } else {
 1596                                 KASSERT(hispgrp->pg_jobc > 0);
 1597                                 if (--hispgrp->pg_jobc == 0)
 1598                                         orphanpg(hispgrp);
 1599                         }
 1600                 }
 1601         }
 1602 }
 1603 
 1604 /*
 1605  * A process group has become orphaned;
 1606  * if there are any stopped processes in the group,
 1607  * hang-up all process in that group.
 1608  *
 1609  * Call with proc_lock held.
 1610  */
 1611 static void
 1612 orphanpg(struct pgrp *pg)
 1613 {
 1614         struct proc *p;
 1615 
 1616         KASSERT(mutex_owned(&proc_lock));
 1617 
 1618         LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 1619                 if (p->p_stat == SSTOP) {
 1620                         p->p_lflag |= PL_ORPHANPG;
 1621                         psignal(p, SIGHUP);
 1622                         psignal(p, SIGCONT);
 1623                 }
 1624         }
 1625 }
 1626 
 1627 #ifdef DDB
 1628 #include <ddb/db_output.h>
 1629 void pidtbl_dump(void);
 1630 void
 1631 pidtbl_dump(void)
 1632 {
 1633         struct pid_table *pt;
 1634         struct proc *p;
 1635         struct pgrp *pgrp;
 1636         uintptr_t slot;
 1637         int id;
 1638 
 1639         db_printf("pid table %p size %x, next %x, last %x\n",
 1640                 pid_table, pid_tbl_mask+1,
 1641                 next_free_pt, last_free_pt);
 1642         for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
 1643                 slot = pt->pt_slot;
 1644                 if (!PT_VALID(slot) && !pt->pt_pgrp)
 1645                         continue;
 1646                 if (PT_IS_LWP(slot)) {
 1647                         p = PT_GET_LWP(slot)->l_proc;
 1648                 } else if (PT_IS_PROC(slot)) {
 1649                         p = PT_GET_PROC(slot);
 1650                 } else {
 1651                         p = NULL;
 1652                 }
 1653                 db_printf("  id %x: ", id);
 1654                 if (p != NULL)
 1655                         db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
 1656                                 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
 1657                 else
 1658                         db_printf("next %x use %x\n",
 1659                                 PT_NEXT(slot) & pid_tbl_mask,
 1660                                 PT_NEXT(slot) & ~pid_tbl_mask);
 1661                 if ((pgrp = pt->pt_pgrp)) {
 1662                         db_printf("\tsession %p, sid %d, count %d, login %s\n",
 1663                             pgrp->pg_session, pgrp->pg_session->s_sid,
 1664                             pgrp->pg_session->s_count,
 1665                             pgrp->pg_session->s_login);
 1666                         db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
 1667                             pgrp, pgrp->pg_id, pgrp->pg_jobc,
 1668                             LIST_FIRST(&pgrp->pg_members));
 1669                         LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 1670                                 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
 1671                                     p->p_pid, p, p->p_pgrp, p->p_comm);
 1672                         }
 1673                 }
 1674         }
 1675 }
 1676 #endif /* DDB */
 1677 
 1678 #ifdef KSTACK_CHECK_MAGIC
 1679 
 1680 #define KSTACK_MAGIC    0xdeadbeaf
 1681 
 1682 /* XXX should be per process basis? */
 1683 static int      kstackleftmin = KSTACK_SIZE;
 1684 static int      kstackleftthres = KSTACK_SIZE / 8;
 1685 
 1686 void
 1687 kstack_setup_magic(const struct lwp *l)
 1688 {
 1689         uint32_t *ip;
 1690         uint32_t const *end;
 1691 
 1692         KASSERT(l != NULL);
 1693         KASSERT(l != &lwp0);
 1694 
 1695         /*
 1696          * fill all the stack with magic number
 1697          * so that later modification on it can be detected.
 1698          */
 1699         ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
 1700         end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
 1701         for (; ip < end; ip++) {
 1702                 *ip = KSTACK_MAGIC;
 1703         }
 1704 }
 1705 
 1706 void
 1707 kstack_check_magic(const struct lwp *l)
 1708 {
 1709         uint32_t const *ip, *end;
 1710         int stackleft;
 1711 
 1712         KASSERT(l != NULL);
 1713 
 1714         /* don't check proc0 */ /*XXX*/
 1715         if (l == &lwp0)
 1716                 return;
 1717 
 1718 #ifdef __MACHINE_STACK_GROWS_UP
 1719         /* stack grows upwards (eg. hppa) */
 1720         ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
 1721         end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
 1722         for (ip--; ip >= end; ip--)
 1723                 if (*ip != KSTACK_MAGIC)
 1724                         break;
 1725 
 1726         stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
 1727 #else /* __MACHINE_STACK_GROWS_UP */
 1728         /* stack grows downwards (eg. i386) */
 1729         ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
 1730         end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
 1731         for (; ip < end; ip++)
 1732                 if (*ip != KSTACK_MAGIC)
 1733                         break;
 1734 
 1735         stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
 1736 #endif /* __MACHINE_STACK_GROWS_UP */
 1737 
 1738         if (kstackleftmin > stackleft) {
 1739                 kstackleftmin = stackleft;
 1740                 if (stackleft < kstackleftthres)
 1741                         printf("warning: kernel stack left %d bytes"
 1742                             "(pid %u:lid %u)\n", stackleft,
 1743                             (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
 1744         }
 1745 
 1746         if (stackleft <= 0) {
 1747                 panic("magic on the top of kernel stack changed for "
 1748                     "pid %u, lid %u: maybe kernel stack overflow",
 1749                     (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
 1750         }
 1751 }
 1752 #endif /* KSTACK_CHECK_MAGIC */
 1753 
 1754 int
 1755 proclist_foreach_call(struct proclist *list,
 1756     int (*callback)(struct proc *, void *arg), void *arg)
 1757 {
 1758         struct proc marker;
 1759         struct proc *p;
 1760         int ret = 0;
 1761 
 1762         marker.p_flag = PK_MARKER;
 1763         mutex_enter(&proc_lock);
 1764         for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
 1765                 if (p->p_flag & PK_MARKER) {
 1766                         p = LIST_NEXT(p, p_list);
 1767                         continue;
 1768                 }
 1769                 LIST_INSERT_AFTER(p, &marker, p_list);
 1770                 ret = (*callback)(p, arg);
 1771                 KASSERT(mutex_owned(&proc_lock));
 1772                 p = LIST_NEXT(&marker, p_list);
 1773                 LIST_REMOVE(&marker, p_list);
 1774         }
 1775         mutex_exit(&proc_lock);
 1776 
 1777         return ret;
 1778 }
 1779 
 1780 int
 1781 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
 1782 {
 1783 
 1784         /* XXXCDC: how should locking work here? */
 1785 
 1786         /* curproc exception is for coredump. */
 1787 
 1788         if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
 1789             (p->p_vmspace->vm_refcnt < 1)) {
 1790                 return EFAULT;
 1791         }
 1792 
 1793         uvmspace_addref(p->p_vmspace);
 1794         *vm = p->p_vmspace;
 1795 
 1796         return 0;
 1797 }
 1798 
 1799 /*
 1800  * Acquire a write lock on the process credential.
 1801  */
 1802 void
 1803 proc_crmod_enter(void)
 1804 {
 1805         struct lwp *l = curlwp;
 1806         struct proc *p = l->l_proc;
 1807         kauth_cred_t oc;
 1808 
 1809         /* Reset what needs to be reset in plimit. */
 1810         if (p->p_limit->pl_corename != defcorename) {
 1811                 lim_setcorename(p, defcorename, 0);
 1812         }
 1813 
 1814         mutex_enter(p->p_lock);
 1815 
 1816         /* Ensure the LWP cached credentials are up to date. */
 1817         if ((oc = l->l_cred) != p->p_cred) {
 1818                 kauth_cred_hold(p->p_cred);
 1819                 l->l_cred = p->p_cred;
 1820                 kauth_cred_free(oc);
 1821         }
 1822 }
 1823 
 1824 /*
 1825  * Set in a new process credential, and drop the write lock.  The credential
 1826  * must have a reference already.  Optionally, free a no-longer required
 1827  * credential.
 1828  */
 1829 void
 1830 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
 1831 {
 1832         struct lwp *l = curlwp, *l2;
 1833         struct proc *p = l->l_proc;
 1834         kauth_cred_t oc;
 1835 
 1836         KASSERT(mutex_owned(p->p_lock));
 1837 
 1838         /* Is there a new credential to set in? */
 1839         if (scred != NULL) {
 1840                 p->p_cred = scred;
 1841                 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
 1842                         if (l2 != l)
 1843                                 l2->l_prflag |= LPR_CRMOD;
 1844                 }
 1845 
 1846                 /* Ensure the LWP cached credentials are up to date. */
 1847                 if ((oc = l->l_cred) != scred) {
 1848                         kauth_cred_hold(scred);
 1849                         l->l_cred = scred;
 1850                 }
 1851         } else
 1852                 oc = NULL;      /* XXXgcc */
 1853 
 1854         if (sugid) {
 1855                 /*
 1856                  * Mark process as having changed credentials, stops
 1857                  * tracing etc.
 1858                  */
 1859                 p->p_flag |= PK_SUGID;
 1860         }
 1861 
 1862         mutex_exit(p->p_lock);
 1863 
 1864         /* If there is a credential to be released, free it now. */
 1865         if (fcred != NULL) {
 1866                 KASSERT(scred != NULL);
 1867                 kauth_cred_free(fcred);
 1868                 if (oc != scred)
 1869                         kauth_cred_free(oc);
 1870         }
 1871 }
 1872 
 1873 /*
 1874  * proc_specific_key_create --
 1875  *      Create a key for subsystem proc-specific data.
 1876  */
 1877 int
 1878 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
 1879 {
 1880 
 1881         return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
 1882 }
 1883 
 1884 /*
 1885  * proc_specific_key_delete --
 1886  *      Delete a key for subsystem proc-specific data.
 1887  */
 1888 void
 1889 proc_specific_key_delete(specificdata_key_t key)
 1890 {
 1891 
 1892         specificdata_key_delete(proc_specificdata_domain, key);
 1893 }
 1894 
 1895 /*
 1896  * proc_initspecific --
 1897  *      Initialize a proc's specificdata container.
 1898  */
 1899 void
 1900 proc_initspecific(struct proc *p)
 1901 {
 1902         int error __diagused;
 1903 
 1904         error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
 1905         KASSERT(error == 0);
 1906 }
 1907 
 1908 /*
 1909  * proc_finispecific --
 1910  *      Finalize a proc's specificdata container.
 1911  */
 1912 void
 1913 proc_finispecific(struct proc *p)
 1914 {
 1915 
 1916         specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
 1917 }
 1918 
 1919 /*
 1920  * proc_getspecific --
 1921  *      Return proc-specific data corresponding to the specified key.
 1922  */
 1923 void *
 1924 proc_getspecific(struct proc *p, specificdata_key_t key)
 1925 {
 1926 
 1927         return (specificdata_getspecific(proc_specificdata_domain,
 1928                                          &p->p_specdataref, key));
 1929 }
 1930 
 1931 /*
 1932  * proc_setspecific --
 1933  *      Set proc-specific data corresponding to the specified key.
 1934  */
 1935 void
 1936 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
 1937 {
 1938 
 1939         specificdata_setspecific(proc_specificdata_domain,
 1940                                  &p->p_specdataref, key, data);
 1941 }
 1942 
 1943 int
 1944 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
 1945 {
 1946         int r = 0;
 1947 
 1948         if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
 1949             kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
 1950                 /*
 1951                  * suid proc of ours or proc not ours
 1952                  */
 1953                 r = EPERM;
 1954         } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
 1955                 /*
 1956                  * sgid proc has sgid back to us temporarily
 1957                  */
 1958                 r = EPERM;
 1959         } else {
 1960                 /*
 1961                  * our rgid must be in target's group list (ie,
 1962                  * sub-processes started by a sgid process)
 1963                  */
 1964                 int ismember = 0;
 1965 
 1966                 if (kauth_cred_ismember_gid(cred,
 1967                     kauth_cred_getgid(target), &ismember) != 0 ||
 1968                     !ismember)
 1969                         r = EPERM;
 1970         }
 1971 
 1972         return (r);
 1973 }
 1974 
 1975 /*
 1976  * sysctl stuff
 1977  */
 1978 
 1979 #define KERN_PROCSLOP   (5 * sizeof(struct kinfo_proc))
 1980 
 1981 static const u_int sysctl_flagmap[] = {
 1982         PK_ADVLOCK, P_ADVLOCK,
 1983         PK_EXEC, P_EXEC,
 1984         PK_NOCLDWAIT, P_NOCLDWAIT,
 1985         PK_32, P_32,
 1986         PK_CLDSIGIGN, P_CLDSIGIGN,
 1987         PK_SUGID, P_SUGID,
 1988         0
 1989 };
 1990 
 1991 static const u_int sysctl_sflagmap[] = {
 1992         PS_NOCLDSTOP, P_NOCLDSTOP,
 1993         PS_WEXIT, P_WEXIT,
 1994         PS_STOPFORK, P_STOPFORK,
 1995         PS_STOPEXEC, P_STOPEXEC,
 1996         PS_STOPEXIT, P_STOPEXIT,
 1997         0
 1998 };
 1999 
 2000 static const u_int sysctl_slflagmap[] = {
 2001         PSL_TRACED, P_TRACED,
 2002         PSL_CHTRACED, P_CHTRACED,
 2003         PSL_SYSCALL, P_SYSCALL,
 2004         0
 2005 };
 2006 
 2007 static const u_int sysctl_lflagmap[] = {
 2008         PL_CONTROLT, P_CONTROLT,
 2009         PL_PPWAIT, P_PPWAIT,
 2010         0
 2011 };
 2012 
 2013 static const u_int sysctl_stflagmap[] = {
 2014         PST_PROFIL, P_PROFIL,
 2015         0
 2016 
 2017 };
 2018 
 2019 /* used by kern_lwp also */
 2020 const u_int sysctl_lwpflagmap[] = {
 2021         LW_SINTR, L_SINTR,
 2022         LW_SYSTEM, L_SYSTEM,
 2023         0
 2024 };
 2025 
 2026 /*
 2027  * Find the most ``active'' lwp of a process and return it for ps display
 2028  * purposes
 2029  */
 2030 static struct lwp *
 2031 proc_active_lwp(struct proc *p)
 2032 {
 2033         static const int ostat[] = {
 2034                 0,
 2035                 2,      /* LSIDL */
 2036                 6,      /* LSRUN */
 2037                 5,      /* LSSLEEP */
 2038                 4,      /* LSSTOP */
 2039                 0,      /* LSZOMB */
 2040                 1,      /* LSDEAD */
 2041                 7,      /* LSONPROC */
 2042                 3       /* LSSUSPENDED */
 2043         };
 2044 
 2045         struct lwp *l, *lp = NULL;
 2046         LIST_FOREACH(l, &p->p_lwps, l_sibling) {
 2047                 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat));
 2048                 if (lp == NULL ||
 2049                     ostat[l->l_stat] > ostat[lp->l_stat] ||
 2050                     (ostat[l->l_stat] == ostat[lp->l_stat] &&
 2051                     l->l_cpticks > lp->l_cpticks)) {
 2052                         lp = l;
 2053                         continue;
 2054                 }
 2055         }
 2056         return lp;
 2057 }
 2058 
 2059 static int
 2060 sysctl_doeproc(SYSCTLFN_ARGS)
 2061 {
 2062         union {
 2063                 struct kinfo_proc kproc;
 2064                 struct kinfo_proc2 kproc2;
 2065         } *kbuf;
 2066         struct proc *p, *next, *marker;
 2067         char *where, *dp;
 2068         int type, op, arg, error;
 2069         u_int elem_size, kelem_size, elem_count;
 2070         size_t buflen, needed;
 2071         bool match, zombie, mmmbrains;
 2072         const bool allowaddr = get_expose_address(curproc);
 2073 
 2074         if (namelen == 1 && name[0] == CTL_QUERY)
 2075                 return (sysctl_query(SYSCTLFN_CALL(rnode)));
 2076 
 2077         dp = where = oldp;
 2078         buflen = where != NULL ? *oldlenp : 0;
 2079         error = 0;
 2080         needed = 0;
 2081         type = rnode->sysctl_num;
 2082 
 2083         if (type == KERN_PROC) {
 2084                 if (namelen == 0)
 2085                         return EINVAL;
 2086                 switch (op = name[0]) {
 2087                 case KERN_PROC_ALL:
 2088                         if (namelen != 1)
 2089                                 return EINVAL;
 2090                         arg = 0;
 2091                         break;
 2092                 default:
 2093                         if (namelen != 2)
 2094                                 return EINVAL;
 2095                         arg = name[1];
 2096                         break;
 2097                 }
 2098                 elem_count = 0; /* Hush little compiler, don't you cry */
 2099                 kelem_size = elem_size = sizeof(kbuf->kproc);
 2100         } else {
 2101                 if (namelen != 4)
 2102                         return EINVAL;
 2103                 op = name[0];
 2104                 arg = name[1];
 2105                 elem_size = name[2];
 2106                 elem_count = name[3];
 2107                 kelem_size = sizeof(kbuf->kproc2);
 2108         }
 2109 
 2110         sysctl_unlock();
 2111 
 2112         kbuf = kmem_zalloc(sizeof(*kbuf), KM_SLEEP);
 2113         marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
 2114         marker->p_flag = PK_MARKER;
 2115 
 2116         mutex_enter(&proc_lock);
 2117         /*
 2118          * Start with zombies to prevent reporting processes twice, in case they
 2119          * are dying and being moved from the list of alive processes to zombies.
 2120          */
 2121         mmmbrains = true;
 2122         for (p = LIST_FIRST(&zombproc);; p = next) {
 2123                 if (p == NULL) {
 2124                         if (mmmbrains) {
 2125                                 p = LIST_FIRST(&allproc);
 2126                                 mmmbrains = false;
 2127                         }
 2128                         if (p == NULL)
 2129                                 break;
 2130                 }
 2131                 next = LIST_NEXT(p, p_list);
 2132                 if ((p->p_flag & PK_MARKER) != 0)
 2133                         continue;
 2134 
 2135                 /*
 2136                  * Skip embryonic processes.
 2137                  */
 2138                 if (p->p_stat == SIDL)
 2139                         continue;
 2140 
 2141                 mutex_enter(p->p_lock);
 2142                 error = kauth_authorize_process(l->l_cred,
 2143                     KAUTH_PROCESS_CANSEE, p,
 2144                     KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_EPROC), NULL, NULL);
 2145                 if (error != 0) {
 2146                         mutex_exit(p->p_lock);
 2147                         continue;
 2148                 }
 2149 
 2150                 /*
 2151                  * Hande all the operations in one switch on the cost of
 2152                  * algorithm complexity is on purpose. The win splitting this
 2153                  * function into several similar copies makes maintenance
 2154                  * burden, code grow and boost is negligible in practical
 2155                  * systems.
 2156                  */
 2157                 switch (op) {
 2158                 case KERN_PROC_PID:
 2159                         match = (p->p_pid == (pid_t)arg);
 2160                         break;
 2161 
 2162                 case KERN_PROC_PGRP:
 2163                         match = (p->p_pgrp->pg_id == (pid_t)arg);
 2164                         break;
 2165 
 2166                 case KERN_PROC_SESSION:
 2167                         match = (p->p_session->s_sid == (pid_t)arg);
 2168                         break;
 2169 
 2170                 case KERN_PROC_TTY:
 2171                         match = true;
 2172                         if (arg == (int) KERN_PROC_TTY_REVOKE) {
 2173                                 if ((p->p_lflag & PL_CONTROLT) == 0 ||
 2174                                     p->p_session->s_ttyp == NULL ||
 2175                                     p->p_session->s_ttyvp != NULL) {
 2176                                         match = false;
 2177                                 }
 2178                         } else if ((p->p_lflag & PL_CONTROLT) == 0 ||
 2179                             p->p_session->s_ttyp == NULL) {
 2180                                 if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
 2181                                         match = false;
 2182                                 }
 2183                         } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
 2184                                 match = false;
 2185                         }
 2186                         break;
 2187 
 2188                 case KERN_PROC_UID:
 2189                         match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
 2190                         break;
 2191 
 2192                 case KERN_PROC_RUID:
 2193                         match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
 2194                         break;
 2195 
 2196                 case KERN_PROC_GID:
 2197                         match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
 2198                         break;
 2199 
 2200                 case KERN_PROC_RGID:
 2201                         match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
 2202                         break;
 2203 
 2204                 case KERN_PROC_ALL:
 2205                         match = true;
 2206                         /* allow everything */
 2207                         break;
 2208 
 2209                 default:
 2210                         error = EINVAL;
 2211                         mutex_exit(p->p_lock);
 2212                         goto cleanup;
 2213                 }
 2214                 if (!match) {
 2215                         mutex_exit(p->p_lock);
 2216                         continue;
 2217                 }
 2218 
 2219                 /*
 2220                  * Grab a hold on the process.
 2221                  */
 2222                 if (mmmbrains) {
 2223                         zombie = true;
 2224                 } else {
 2225                         zombie = !rw_tryenter(&p->p_reflock, RW_READER);
 2226                 }
 2227                 if (zombie) {
 2228                         LIST_INSERT_AFTER(p, marker, p_list);
 2229                 }
 2230 
 2231                 if (buflen >= elem_size &&
 2232                     (type == KERN_PROC || elem_count > 0)) {
 2233                         ruspace(p);     /* Update process vm resource use */
 2234 
 2235                         if (type == KERN_PROC) {
 2236                                 fill_proc(p, &kbuf->kproc.kp_proc, allowaddr);
 2237                                 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie,
 2238                                     allowaddr);
 2239                         } else {
 2240                                 fill_kproc2(p, &kbuf->kproc2, zombie,
 2241                                     allowaddr);
 2242                                 elem_count--;
 2243                         }
 2244                         mutex_exit(p->p_lock);
 2245                         mutex_exit(&proc_lock);
 2246                         /*
 2247                          * Copy out elem_size, but not larger than kelem_size
 2248                          */
 2249                         error = sysctl_copyout(l, kbuf, dp,
 2250                             uimin(kelem_size, elem_size));
 2251                         mutex_enter(&proc_lock);
 2252                         if (error) {
 2253                                 goto bah;
 2254                         }
 2255                         dp += elem_size;
 2256                         buflen -= elem_size;
 2257                 } else {
 2258                         mutex_exit(p->p_lock);
 2259                 }
 2260                 needed += elem_size;
 2261 
 2262                 /*
 2263                  * Release reference to process.
 2264                  */
 2265                 if (zombie) {
 2266                         next = LIST_NEXT(marker, p_list);
 2267                         LIST_REMOVE(marker, p_list);
 2268                 } else {
 2269                         rw_exit(&p->p_reflock);
 2270                         next = LIST_NEXT(p, p_list);
 2271                 }
 2272 
 2273                 /*
 2274                  * Short-circuit break quickly!
 2275                  */
 2276                 if (op == KERN_PROC_PID)
 2277                         break;
 2278         }
 2279         mutex_exit(&proc_lock);
 2280 
 2281         if (where != NULL) {
 2282                 *oldlenp = dp - where;
 2283                 if (needed > *oldlenp) {
 2284                         error = ENOMEM;
 2285                         goto out;
 2286                 }
 2287         } else {
 2288                 needed += KERN_PROCSLOP;
 2289                 *oldlenp = needed;
 2290         }
 2291         kmem_free(kbuf, sizeof(*kbuf));
 2292         kmem_free(marker, sizeof(*marker));
 2293         sysctl_relock();
 2294         return 0;
 2295  bah:
 2296         if (zombie)
 2297                 LIST_REMOVE(marker, p_list);
 2298         else
 2299                 rw_exit(&p->p_reflock);
 2300  cleanup:
 2301         mutex_exit(&proc_lock);
 2302  out:
 2303         kmem_free(kbuf, sizeof(*kbuf));
 2304         kmem_free(marker, sizeof(*marker));
 2305         sysctl_relock();
 2306         return error;
 2307 }
 2308 
 2309 int
 2310 copyin_psstrings(struct proc *p, struct ps_strings *arginfo)
 2311 {
 2312 #if !defined(_RUMPKERNEL)
 2313         int retval;
 2314 
 2315         if (p->p_flag & PK_32) {
 2316                 MODULE_HOOK_CALL(kern_proc32_copyin_hook, (p, arginfo),
 2317                     enosys(), retval);
 2318                 return retval;
 2319         }
 2320 #endif /* !defined(_RUMPKERNEL) */
 2321 
 2322         return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo));
 2323 }
 2324 
 2325 static int
 2326 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len)
 2327 {
 2328         void **cookie = cookie_;
 2329         struct lwp *l = cookie[0];
 2330         char *dst = cookie[1];
 2331 
 2332         return sysctl_copyout(l, src, dst + off, len);
 2333 }
 2334 
 2335 /*
 2336  * sysctl helper routine for kern.proc_args pseudo-subtree.
 2337  */
 2338 static int
 2339 sysctl_kern_proc_args(SYSCTLFN_ARGS)
 2340 {
 2341         struct ps_strings pss;
 2342         struct proc *p;
 2343         pid_t pid;
 2344         int type, error;
 2345         void *cookie[2];
 2346 
 2347         if (namelen == 1 && name[0] == CTL_QUERY)
 2348                 return (sysctl_query(SYSCTLFN_CALL(rnode)));
 2349 
 2350         if (newp != NULL || namelen != 2)
 2351                 return (EINVAL);
 2352         pid = name[0];
 2353         type = name[1];
 2354 
 2355         switch (type) {
 2356         case KERN_PROC_PATHNAME:
 2357                 sysctl_unlock();
 2358                 error = fill_pathname(l, pid, oldp, oldlenp);
 2359                 sysctl_relock();
 2360                 return error;
 2361 
 2362         case KERN_PROC_CWD:
 2363                 sysctl_unlock();
 2364                 error = fill_cwd(l, pid, oldp, oldlenp);
 2365                 sysctl_relock();
 2366                 return error;
 2367 
 2368         case KERN_PROC_ARGV:
 2369         case KERN_PROC_NARGV:
 2370         case KERN_PROC_ENV:
 2371         case KERN_PROC_NENV:
 2372                 /* ok */
 2373                 break;
 2374         default:
 2375                 return (EINVAL);
 2376         }
 2377 
 2378         sysctl_unlock();
 2379 
 2380         /* check pid */
 2381         mutex_enter(&proc_lock);
 2382         if ((p = proc_find(pid)) == NULL) {
 2383                 error = EINVAL;
 2384                 goto out_locked;
 2385         }
 2386         mutex_enter(p->p_lock);
 2387 
 2388         /* Check permission. */
 2389         if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV)
 2390                 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
 2391                     p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
 2392         else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV)
 2393                 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
 2394                     p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
 2395         else
 2396                 error = EINVAL; /* XXXGCC */
 2397         if (error) {
 2398                 mutex_exit(p->p_lock);
 2399                 goto out_locked;
 2400         }
 2401 
 2402         if (oldp == NULL) {
 2403                 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV)
 2404                         *oldlenp = sizeof (int);
 2405                 else
 2406                         *oldlenp = ARG_MAX;     /* XXX XXX XXX */
 2407                 error = 0;
 2408                 mutex_exit(p->p_lock);
 2409                 goto out_locked;
 2410         }
 2411 
 2412         /*
 2413          * Zombies don't have a stack, so we can't read their psstrings.
 2414          * System processes also don't have a user stack.
 2415          */
 2416         if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
 2417                 error = EINVAL;
 2418                 mutex_exit(p->p_lock);
 2419                 goto out_locked;
 2420         }
 2421 
 2422         error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
 2423         mutex_exit(p->p_lock);
 2424         if (error) {
 2425                 goto out_locked;
 2426         }
 2427         mutex_exit(&proc_lock);
 2428 
 2429         if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) {
 2430                 int value;
 2431                 if ((error = copyin_psstrings(p, &pss)) == 0) {
 2432                         if (type == KERN_PROC_NARGV)
 2433                                 value = pss.ps_nargvstr;
 2434                         else
 2435                                 value = pss.ps_nenvstr;
 2436                         error = sysctl_copyout(l, &value, oldp, sizeof(value));
 2437                         *oldlenp = sizeof(value);
 2438                 }
 2439         } else {
 2440                 cookie[0] = l;
 2441                 cookie[1] = oldp;
 2442                 error = copy_procargs(p, type, oldlenp,
 2443                     copy_procargs_sysctl_cb, cookie);
 2444         }
 2445         rw_exit(&p->p_reflock);
 2446         sysctl_relock();
 2447         return error;
 2448 
 2449 out_locked:
 2450         mutex_exit(&proc_lock);
 2451         sysctl_relock();
 2452         return error;
 2453 }
 2454 
 2455 int
 2456 copy_procargs(struct proc *p, int oid, size_t *limit,
 2457     int (*cb)(void *, const void *, size_t, size_t), void *cookie)
 2458 {
 2459         struct ps_strings pss;
 2460         size_t len, i, loaded, entry_len;
 2461         struct uio auio;
 2462         struct iovec aiov;
 2463         int error, argvlen;
 2464         char *arg;
 2465         char **argv;
 2466         vaddr_t user_argv;
 2467         struct vmspace *vmspace;
 2468 
 2469         /*
 2470          * Allocate a temporary buffer to hold the argument vector and
 2471          * the arguments themselve.
 2472          */
 2473         arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
 2474         argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
 2475 
 2476         /*
 2477          * Lock the process down in memory.
 2478          */
 2479         vmspace = p->p_vmspace;
 2480         uvmspace_addref(vmspace);
 2481 
 2482         /*
 2483          * Read in the ps_strings structure.
 2484          */
 2485         if ((error = copyin_psstrings(p, &pss)) != 0)
 2486                 goto done;
 2487 
 2488         /*
 2489          * Now read the address of the argument vector.
 2490          */
 2491         switch (oid) {
 2492         case KERN_PROC_ARGV:
 2493                 user_argv = (uintptr_t)pss.ps_argvstr;
 2494                 argvlen = pss.ps_nargvstr;
 2495                 break;
 2496         case KERN_PROC_ENV:
 2497                 user_argv = (uintptr_t)pss.ps_envstr;
 2498                 argvlen = pss.ps_nenvstr;
 2499                 break;
 2500         default:
 2501                 error = EINVAL;
 2502                 goto done;
 2503         }
 2504 
 2505         if (argvlen < 0) {
 2506                 error = EIO;
 2507                 goto done;
 2508         }
 2509 
 2510 
 2511         /*
 2512          * Now copy each string.
 2513          */
 2514         len = 0; /* bytes written to user buffer */
 2515         loaded = 0; /* bytes from argv already processed */
 2516         i = 0; /* To make compiler happy */
 2517         entry_len = PROC_PTRSZ(p);
 2518 
 2519         for (; argvlen; --argvlen) {
 2520                 int finished = 0;
 2521                 vaddr_t base;
 2522                 size_t xlen;
 2523                 int j;
 2524 
 2525                 if (loaded == 0) {
 2526                         size_t rem = entry_len * argvlen;
 2527                         loaded = MIN(rem, PAGE_SIZE);
 2528                         error = copyin_vmspace(vmspace,
 2529                             (const void *)user_argv, argv, loaded);
 2530                         if (error)
 2531                                 break;
 2532                         user_argv += loaded;
 2533                         i = 0;
 2534                 }
 2535 
 2536 #if !defined(_RUMPKERNEL)
 2537                 if (p->p_flag & PK_32)
 2538                         MODULE_HOOK_CALL(kern_proc32_base_hook,
 2539                             (argv, i++), 0, base);
 2540                 else
 2541 #endif /* !defined(_RUMPKERNEL) */
 2542                         base = (vaddr_t)argv[i++];
 2543                 loaded -= entry_len;
 2544 
 2545                 /*
 2546                  * The program has messed around with its arguments,
 2547                  * possibly deleting some, and replacing them with
 2548                  * NULL's. Treat this as the last argument and not
 2549                  * a failure.
 2550                  */
 2551                 if (base == 0)
 2552                         break;
 2553 
 2554                 while (!finished) {
 2555                         xlen = PAGE_SIZE - (base & PAGE_MASK);
 2556 
 2557                         aiov.iov_base = arg;
 2558                         aiov.iov_len = PAGE_SIZE;
 2559                         auio.uio_iov = &aiov;
 2560                         auio.uio_iovcnt = 1;
 2561                         auio.uio_offset = base;
 2562                         auio.uio_resid = xlen;
 2563                         auio.uio_rw = UIO_READ;
 2564                         UIO_SETUP_SYSSPACE(&auio);
 2565                         error = uvm_io(&vmspace->vm_map, &auio, 0);
 2566                         if (error)
 2567                                 goto done;
 2568 
 2569                         /* Look for the end of the string */
 2570                         for (j = 0; j < xlen; j++) {
 2571                                 if (arg[j] == '\0') {
 2572                                         xlen = j + 1;
 2573                                         finished = 1;
 2574                                         break;
 2575                                 }
 2576                         }
 2577 
 2578                         /* Check for user buffer overflow */
 2579                         if (len + xlen > *limit) {
 2580                                 finished = 1;
 2581                                 if (len > *limit)
 2582                                         xlen = 0;
 2583                                 else
 2584                                         xlen = *limit - len;
 2585                         }
 2586 
 2587                         /* Copyout the page */
 2588                         error = (*cb)(cookie, arg, len, xlen);
 2589                         if (error)
 2590                                 goto done;
 2591 
 2592                         len += xlen;
 2593                         base += xlen;
 2594                 }
 2595         }
 2596         *limit = len;
 2597 
 2598 done:
 2599         kmem_free(argv, PAGE_SIZE);
 2600         kmem_free(arg, PAGE_SIZE);
 2601         uvmspace_free(vmspace);
 2602         return error;
 2603 }
 2604 
 2605 /*
 2606  * Fill in a proc structure for the specified process.
 2607  */
 2608 static void
 2609 fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr)
 2610 {
 2611         COND_SET_STRUCT(p->p_list, psrc->p_list, allowaddr);
 2612         memset(&p->p_auxlock, 0, sizeof(p->p_auxlock));
 2613         COND_SET_STRUCT(p->p_lock, psrc->p_lock, allowaddr);
 2614         memset(&p->p_stmutex, 0, sizeof(p->p_stmutex));
 2615         memset(&p->p_reflock, 0, sizeof(p->p_reflock));
 2616         COND_SET_STRUCT(p->p_waitcv, psrc->p_waitcv, allowaddr);
 2617         COND_SET_STRUCT(p->p_lwpcv, psrc->p_lwpcv, allowaddr);
 2618         COND_SET_PTR(p->p_cred, psrc->p_cred, allowaddr);
 2619         COND_SET_PTR(p->p_fd, psrc->p_fd, allowaddr);
 2620         COND_SET_PTR(p->p_cwdi, psrc->p_cwdi, allowaddr);
 2621         COND_SET_PTR(p->p_stats, psrc->p_stats, allowaddr);
 2622         COND_SET_PTR(p->p_limit, psrc->p_limit, allowaddr);
 2623         COND_SET_PTR(p->p_vmspace, psrc->p_vmspace, allowaddr);
 2624         COND_SET_PTR(p->p_sigacts, psrc->p_sigacts, allowaddr);
 2625         COND_SET_PTR(p->p_aio, psrc->p_aio, allowaddr);
 2626         p->p_mqueue_cnt = psrc->p_mqueue_cnt;
 2627         memset(&p->p_specdataref, 0, sizeof(p->p_specdataref));
 2628         p->p_exitsig = psrc->p_exitsig;
 2629         p->p_flag = psrc->p_flag;
 2630         p->p_sflag = psrc->p_sflag;
 2631         p->p_slflag = psrc->p_slflag;
 2632         p->p_lflag = psrc->p_lflag;
 2633         p->p_stflag = psrc->p_stflag;
 2634         p->p_stat = psrc->p_stat;
 2635         p->p_trace_enabled = psrc->p_trace_enabled;
 2636         p->p_pid = psrc->p_pid;
 2637         COND_SET_STRUCT(p->p_pglist, psrc->p_pglist, allowaddr);
 2638         COND_SET_PTR(p->p_pptr, psrc->p_pptr, allowaddr);
 2639         COND_SET_STRUCT(p->p_sibling, psrc->p_sibling, allowaddr);
 2640         COND_SET_STRUCT(p->p_children, psrc->p_children, allowaddr);
 2641         COND_SET_STRUCT(p->p_lwps, psrc->p_lwps, allowaddr);
 2642         COND_SET_PTR(p->p_raslist, psrc->p_raslist, allowaddr);
 2643         p->p_nlwps = psrc->p_nlwps;
 2644         p->p_nzlwps = psrc->p_nzlwps;
 2645         p->p_nrlwps = psrc->p_nrlwps;
 2646         p->p_nlwpwait = psrc->p_nlwpwait;
 2647         p->p_ndlwps = psrc->p_ndlwps;
 2648         p->p_nstopchild = psrc->p_nstopchild;
 2649         p->p_waited = psrc->p_waited;
 2650         COND_SET_PTR(p->p_zomblwp, psrc->p_zomblwp, allowaddr);
 2651         COND_SET_PTR(p->p_vforklwp, psrc->p_vforklwp, allowaddr);
 2652         COND_SET_PTR(p->p_sched_info, psrc->p_sched_info, allowaddr);
 2653         p->p_estcpu = psrc->p_estcpu;
 2654         p->p_estcpu_inherited = psrc->p_estcpu_inherited;
 2655         p->p_forktime = psrc->p_forktime;
 2656         p->p_pctcpu = psrc->p_pctcpu;
 2657         COND_SET_PTR(p->p_opptr, psrc->p_opptr, allowaddr);
 2658         COND_SET_PTR(p->p_timers, psrc->p_timers, allowaddr);
 2659         p->p_rtime = psrc->p_rtime;
 2660         p->p_uticks = psrc->p_uticks;
 2661         p->p_sticks = psrc->p_sticks;
 2662         p->p_iticks = psrc->p_iticks;
 2663         p->p_xutime = psrc->p_xutime;
 2664         p->p_xstime = psrc->p_xstime;
 2665         p->p_traceflag = psrc->p_traceflag;
 2666         COND_SET_PTR(p->p_tracep, psrc->p_tracep, allowaddr);
 2667         COND_SET_PTR(p->p_textvp, psrc->p_textvp, allowaddr);
 2668         COND_SET_PTR(p->p_emul, psrc->p_emul, allowaddr);
 2669         COND_SET_PTR(p->p_emuldata, psrc->p_emuldata, allowaddr);
 2670         COND_SET_CPTR(p->p_execsw, psrc->p_execsw, allowaddr);
 2671         COND_SET_STRUCT(p->p_klist, psrc->p_klist, allowaddr);
 2672         COND_SET_STRUCT(p->p_sigwaiters, psrc->p_sigwaiters, allowaddr);
 2673         COND_SET_STRUCT(p->p_sigpend.sp_info, psrc->p_sigpend.sp_info,
 2674             allowaddr);
 2675         p->p_sigpend.sp_set = psrc->p_sigpend.sp_set;
 2676         COND_SET_PTR(p->p_lwpctl, psrc->p_lwpctl, allowaddr);
 2677         p->p_ppid = psrc->p_ppid;
 2678         p->p_oppid = psrc->p_oppid;
 2679         COND_SET_PTR(p->p_path, psrc->p_path, allowaddr);
 2680         p->p_sigctx = psrc->p_sigctx;
 2681         p->p_nice = psrc->p_nice;
 2682         memcpy(p->p_comm, psrc->p_comm, sizeof(p->p_comm));
 2683         COND_SET_PTR(p->p_pgrp, psrc->p_pgrp, allowaddr);
 2684         COND_SET_VALUE(p->p_psstrp, psrc->p_psstrp, allowaddr);
 2685         p->p_pax = psrc->p_pax;
 2686         p->p_xexit = psrc->p_xexit;
 2687         p->p_xsig = psrc->p_xsig;
 2688         p->p_acflag = psrc->p_acflag;
 2689         COND_SET_STRUCT(p->p_md, psrc->p_md, allowaddr);
 2690         p->p_stackbase = psrc->p_stackbase;
 2691         COND_SET_PTR(p->p_dtrace, psrc->p_dtrace, allowaddr);
 2692 }
 2693 
 2694 /*
 2695  * Fill in an eproc structure for the specified process.
 2696  */
 2697 void
 2698 fill_eproc(struct proc *p, struct eproc *ep, bool zombie, bool allowaddr)
 2699 {
 2700         struct tty *tp;
 2701         struct lwp *l;
 2702 
 2703         KASSERT(mutex_owned(&proc_lock));
 2704         KASSERT(mutex_owned(p->p_lock));
 2705 
 2706         COND_SET_PTR(ep->e_paddr, p, allowaddr);
 2707         COND_SET_PTR(ep->e_sess, p->p_session, allowaddr);
 2708         if (p->p_cred) {
 2709                 kauth_cred_topcred(p->p_cred, &ep->e_pcred);
 2710                 kauth_cred_toucred(p->p_cred, &ep->e_ucred);
 2711         }
 2712         if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
 2713                 struct vmspace *vm = p->p_vmspace;
 2714 
 2715                 ep->e_vm.vm_rssize = vm_resident_count(vm);
 2716                 ep->e_vm.vm_tsize = vm->vm_tsize;
 2717                 ep->e_vm.vm_dsize = vm->vm_dsize;
 2718                 ep->e_vm.vm_ssize = vm->vm_ssize;
 2719                 ep->e_vm.vm_map.size = vm->vm_map.size;
 2720 
 2721                 /* Pick the primary (first) LWP */
 2722                 l = proc_active_lwp(p);
 2723                 KASSERT(l != NULL);
 2724                 lwp_lock(l);
 2725                 if (l->l_wchan)
 2726                         strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
 2727                 lwp_unlock(l);
 2728         }
 2729         ep->e_ppid = p->p_ppid;
 2730         if (p->p_pgrp && p->p_session) {
 2731                 ep->e_pgid = p->p_pgrp->pg_id;
 2732                 ep->e_jobc = p->p_pgrp->pg_jobc;
 2733                 ep->e_sid = p->p_session->s_sid;
 2734                 if ((p->p_lflag & PL_CONTROLT) &&
 2735                     (tp = p->p_session->s_ttyp)) {
 2736                         ep->e_tdev = tp->t_dev;
 2737                         ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
 2738                         COND_SET_PTR(ep->e_tsess, tp->t_session, allowaddr);
 2739                 } else
 2740                         ep->e_tdev = (uint32_t)NODEV;
 2741                 ep->e_flag = p->p_session->s_ttyvp ? EPROC_CTTY : 0;
 2742                 if (SESS_LEADER(p))
 2743                         ep->e_flag |= EPROC_SLEADER;
 2744                 strncpy(ep->e_login, p->p_session->s_login, MAXLOGNAME);
 2745         }
 2746         ep->e_xsize = ep->e_xrssize = 0;
 2747         ep->e_xccount = ep->e_xswrss = 0;
 2748 }
 2749 
 2750 /*
 2751  * Fill in a kinfo_proc2 structure for the specified process.
 2752  */
 2753 void
 2754 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie, bool allowaddr)
 2755 {
 2756         struct tty *tp;
 2757         struct lwp *l, *l2;
 2758         struct timeval ut, st, rt;
 2759         sigset_t ss1, ss2;
 2760         struct rusage ru;
 2761         struct vmspace *vm;
 2762 
 2763         KASSERT(mutex_owned(&proc_lock));
 2764         KASSERT(mutex_owned(p->p_lock));
 2765 
 2766         sigemptyset(&ss1);
 2767         sigemptyset(&ss2);
 2768 
 2769         COND_SET_VALUE(ki->p_paddr, PTRTOUINT64(p), allowaddr);
 2770         COND_SET_VALUE(ki->p_fd, PTRTOUINT64(p->p_fd), allowaddr);
 2771         COND_SET_VALUE(ki->p_cwdi, PTRTOUINT64(p->p_cwdi), allowaddr);
 2772         COND_SET_VALUE(ki->p_stats, PTRTOUINT64(p->p_stats), allowaddr);
 2773         COND_SET_VALUE(ki->p_limit, PTRTOUINT64(p->p_limit), allowaddr);
 2774         COND_SET_VALUE(ki->p_vmspace, PTRTOUINT64(p->p_vmspace), allowaddr);
 2775         COND_SET_VALUE(ki->p_sigacts, PTRTOUINT64(p->p_sigacts), allowaddr);
 2776         COND_SET_VALUE(ki->p_sess, PTRTOUINT64(p->p_session), allowaddr);
 2777         ki->p_tsess = 0;        /* may be changed if controlling tty below */
 2778         COND_SET_VALUE(ki->p_ru, PTRTOUINT64(&p->p_stats->p_ru), allowaddr);
 2779         ki->p_eflag = 0;
 2780         ki->p_exitsig = p->p_exitsig;
 2781         ki->p_flag = L_INMEM;   /* Process never swapped out */
 2782         ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag);
 2783         ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
 2784         ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
 2785         ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
 2786         ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
 2787         ki->p_pid = p->p_pid;
 2788         ki->p_ppid = p->p_ppid;
 2789         ki->p_uid = kauth_cred_geteuid(p->p_cred);
 2790         ki->p_ruid = kauth_cred_getuid(p->p_cred);
 2791         ki->p_gid = kauth_cred_getegid(p->p_cred);
 2792         ki->p_rgid = kauth_cred_getgid(p->p_cred);
 2793         ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
 2794         ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
 2795         ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
 2796         kauth_cred_getgroups(p->p_cred, ki->p_groups,
 2797             uimin(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])),
 2798             UIO_SYSSPACE);
 2799 
 2800         ki->p_uticks = p->p_uticks;
 2801         ki->p_sticks = p->p_sticks;
 2802         ki->p_iticks = p->p_iticks;
 2803         ki->p_tpgid = NO_PGID;  /* may be changed if controlling tty below */
 2804         COND_SET_VALUE(ki->p_tracep, PTRTOUINT64(p->p_tracep), allowaddr);
 2805         ki->p_traceflag = p->p_traceflag;
 2806 
 2807         memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
 2808         memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));
 2809 
 2810         ki->p_cpticks = 0;
 2811         ki->p_pctcpu = p->p_pctcpu;
 2812         ki->p_estcpu = 0;
 2813         ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */
 2814         ki->p_realstat = p->p_stat;
 2815         ki->p_nice = p->p_nice;
 2816         ki->p_xstat = P_WAITSTATUS(p);
 2817         ki->p_acflag = p->p_acflag;
 2818 
 2819         strncpy(ki->p_comm, p->p_comm,
 2820             uimin(sizeof(ki->p_comm), sizeof(p->p_comm)));
 2821         strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));
 2822 
 2823         ki->p_nlwps = p->p_nlwps;
 2824         ki->p_realflag = ki->p_flag;
 2825 
 2826         if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
 2827                 vm = p->p_vmspace;
 2828                 ki->p_vm_rssize = vm_resident_count(vm);
 2829                 ki->p_vm_tsize = vm->vm_tsize;
 2830                 ki->p_vm_dsize = vm->vm_dsize;
 2831                 ki->p_vm_ssize = vm->vm_ssize;
 2832                 ki->p_vm_vsize = atop(vm->vm_map.size);
 2833                 /*
 2834                  * Since the stack is initially mapped mostly with
 2835                  * PROT_NONE and grown as needed, adjust the "mapped size"
 2836                  * to skip the unused stack portion.
 2837                  */
 2838                 ki->p_vm_msize =
 2839                     atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;
 2840 
 2841                 /* Pick the primary (first) LWP */
 2842                 l = proc_active_lwp(p);
 2843                 KASSERT(l != NULL);
 2844                 lwp_lock(l);
 2845                 ki->p_nrlwps = p->p_nrlwps;
 2846                 ki->p_forw = 0;
 2847                 ki->p_back = 0;
 2848                 COND_SET_VALUE(ki->p_addr, PTRTOUINT64(l->l_addr), allowaddr);
 2849                 ki->p_stat = l->l_stat;
 2850                 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
 2851                 ki->p_swtime = l->l_swtime;
 2852                 ki->p_slptime = l->l_slptime;
 2853                 if (l->l_stat == LSONPROC)
 2854                         ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
 2855                 else
 2856                         ki->p_schedflags = 0;
 2857                 ki->p_priority = lwp_eprio(l);
 2858                 ki->p_usrpri = l->l_priority;
 2859                 if (l->l_wchan)
 2860                         strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
 2861                 COND_SET_VALUE(ki->p_wchan, PTRTOUINT64(l->l_wchan), allowaddr);
 2862                 ki->p_cpuid = cpu_index(l->l_cpu);
 2863                 lwp_unlock(l);
 2864                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
 2865                         /* This is hardly correct, but... */
 2866                         sigplusset(&l->l_sigpend.sp_set, &ss1);
 2867                         sigplusset(&l->l_sigmask, &ss2);
 2868                         ki->p_cpticks += l->l_cpticks;
 2869                         ki->p_pctcpu += l->l_pctcpu;
 2870                         ki->p_estcpu += l->l_estcpu;
 2871                 }
 2872         }
 2873         sigplusset(&p->p_sigpend.sp_set, &ss1);
 2874         memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
 2875         memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));
 2876 
 2877         if (p->p_session != NULL) {
 2878                 ki->p_sid = p->p_session->s_sid;
 2879                 ki->p__pgid = p->p_pgrp->pg_id;
 2880                 if (p->p_session->s_ttyvp)
 2881                         ki->p_eflag |= EPROC_CTTY;
 2882                 if (SESS_LEADER(p))
 2883                         ki->p_eflag |= EPROC_SLEADER;
 2884                 strncpy(ki->p_login, p->p_session->s_login,
 2885                     uimin(sizeof ki->p_login - 1, sizeof p->p_session->s_login));
 2886                 ki->p_jobc = p->p_pgrp->pg_jobc;
 2887                 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
 2888                         ki->p_tdev = tp->t_dev;
 2889                         ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
 2890                         COND_SET_VALUE(ki->p_tsess, PTRTOUINT64(tp->t_session),
 2891                             allowaddr);
 2892                 } else {
 2893                         ki->p_tdev = (int32_t)NODEV;
 2894                 }
 2895         }
 2896 
 2897         if (!P_ZOMBIE(p) && !zombie) {
 2898                 ki->p_uvalid = 1;
 2899                 ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
 2900                 ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
 2901 
 2902                 calcru(p, &ut, &st, NULL, &rt);
 2903                 ki->p_rtime_sec = rt.tv_sec;
 2904                 ki->p_rtime_usec = rt.tv_usec;
 2905                 ki->p_uutime_sec = ut.tv_sec;
 2906                 ki->p_uutime_usec = ut.tv_usec;
 2907                 ki->p_ustime_sec = st.tv_sec;
 2908                 ki->p_ustime_usec = st.tv_usec;
 2909 
 2910                 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
 2911                 ki->p_uru_nvcsw = 0;
 2912                 ki->p_uru_nivcsw = 0;
 2913                 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
 2914                         ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw);
 2915                         ki->p_uru_nivcsw += l2->l_nivcsw;
 2916                         ruadd(&ru, &l2->l_ru);
 2917                 }
 2918                 ki->p_uru_maxrss = ru.ru_maxrss;
 2919                 ki->p_uru_ixrss = ru.ru_ixrss;
 2920                 ki->p_uru_idrss = ru.ru_idrss;
 2921                 ki->p_uru_isrss = ru.ru_isrss;
 2922                 ki->p_uru_minflt = ru.ru_minflt;
 2923                 ki->p_uru_majflt = ru.ru_majflt;
 2924                 ki->p_uru_nswap = ru.ru_nswap;
 2925                 ki->p_uru_inblock = ru.ru_inblock;
 2926                 ki->p_uru_oublock = ru.ru_oublock;
 2927                 ki->p_uru_msgsnd = ru.ru_msgsnd;
 2928                 ki->p_uru_msgrcv = ru.ru_msgrcv;
 2929                 ki->p_uru_nsignals = ru.ru_nsignals;
 2930 
 2931                 timeradd(&p->p_stats->p_cru.ru_utime,
 2932                          &p->p_stats->p_cru.ru_stime, &ut);
 2933                 ki->p_uctime_sec = ut.tv_sec;
 2934                 ki->p_uctime_usec = ut.tv_usec;
 2935         }
 2936 }
 2937 
 2938 
 2939 int
 2940 proc_find_locked(struct lwp *l, struct proc **p, pid_t pid)
 2941 {
 2942         int error;
 2943 
 2944         mutex_enter(&proc_lock);
 2945         if (pid == -1)
 2946                 *p = l->l_proc;
 2947         else
 2948                 *p = proc_find(pid);
 2949 
 2950         if (*p == NULL) {
 2951                 if (pid != -1)
 2952                         mutex_exit(&proc_lock);
 2953                 return ESRCH;
 2954         }
 2955         if (pid != -1)
 2956                 mutex_enter((*p)->p_lock);
 2957         mutex_exit(&proc_lock);
 2958 
 2959         error = kauth_authorize_process(l->l_cred,
 2960             KAUTH_PROCESS_CANSEE, *p,
 2961             KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
 2962         if (error) {
 2963                 if (pid != -1)
 2964                         mutex_exit((*p)->p_lock);
 2965         }
 2966         return error;
 2967 }
 2968 
 2969 static int
 2970 fill_pathname(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
 2971 {
 2972         int error;
 2973         struct proc *p;
 2974 
 2975         if ((error = proc_find_locked(l, &p, pid)) != 0)
 2976                 return error;
 2977 
 2978         if (p->p_path == NULL) {
 2979                 if (pid != -1)
 2980                         mutex_exit(p->p_lock);
 2981                 return ENOENT;
 2982         }
 2983 
 2984         size_t len = strlen(p->p_path) + 1;
 2985         if (oldp != NULL) {
 2986                 size_t copylen = uimin(len, *oldlenp);
 2987                 error = sysctl_copyout(l, p->p_path, oldp, copylen);
 2988                 if (error == 0 && *oldlenp < len)
 2989                         error = ENOSPC;
 2990         }
 2991         *oldlenp = len;
 2992         if (pid != -1)
 2993                 mutex_exit(p->p_lock);
 2994         return error;
 2995 }
 2996 
 2997 static int
 2998 fill_cwd(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
 2999 {
 3000         int error;
 3001         struct proc *p;
 3002         char *path;
 3003         char *bp, *bend;
 3004         struct cwdinfo *cwdi;
 3005         struct vnode *vp;
 3006         size_t len, lenused;
 3007 
 3008         if ((error = proc_find_locked(l, &p, pid)) != 0)
 3009                 return error;
 3010 
 3011         len = MAXPATHLEN * 4;
 3012 
 3013         path = kmem_alloc(len, KM_SLEEP);
 3014 
 3015         bp = &path[len];
 3016         bend = bp;
 3017         *(--bp) = '\0';
 3018 
 3019         cwdi = p->p_cwdi;
 3020         rw_enter(&cwdi->cwdi_lock, RW_READER);
 3021         vp = cwdi->cwdi_cdir;
 3022         error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l);
 3023         rw_exit(&cwdi->cwdi_lock);
 3024 
 3025         if (error)
 3026                 goto out;
 3027 
 3028         lenused = bend - bp;
 3029 
 3030         if (oldp != NULL) {
 3031                 size_t copylen = uimin(lenused, *oldlenp);
 3032                 error = sysctl_copyout(l, bp, oldp, copylen);
 3033                 if (error == 0 && *oldlenp < lenused)
 3034                         error = ENOSPC;
 3035         }
 3036         *oldlenp = lenused;
 3037 out:
 3038         if (pid != -1)
 3039                 mutex_exit(p->p_lock);
 3040         kmem_free(path, len);
 3041         return error;
 3042 }
 3043 
 3044 int
 3045 proc_getauxv(struct proc *p, void **buf, size_t *len)
 3046 {
 3047         struct ps_strings pss;
 3048         int error;
 3049         void *uauxv, *kauxv;
 3050         size_t size;
 3051 
 3052         if ((error = copyin_psstrings(p, &pss)) != 0)
 3053                 return error;
 3054         if (pss.ps_envstr == NULL)
 3055                 return EIO;
 3056 
 3057         size = p->p_execsw->es_arglen;
 3058         if (size == 0)
 3059                 return EIO;
 3060 
 3061         size_t ptrsz = PROC_PTRSZ(p);
 3062         uauxv = (void *)((char *)pss.ps_envstr + (pss.ps_nenvstr + 1) * ptrsz);
 3063 
 3064         kauxv = kmem_alloc(size, KM_SLEEP);
 3065 
 3066         error = copyin_proc(p, uauxv, kauxv, size);
 3067         if (error) {
 3068                 kmem_free(kauxv, size);
 3069                 return error;
 3070         }
 3071 
 3072         *buf = kauxv;
 3073         *len = size;
 3074 
 3075         return 0;
 3076 }
 3077 
 3078 
 3079 static int
 3080 sysctl_security_expose_address(SYSCTLFN_ARGS)
 3081 {
 3082         int expose_address, error;
 3083         struct sysctlnode node;
 3084 
 3085         node = *rnode;
 3086         node.sysctl_data = &expose_address;
 3087         expose_address = *(int *)rnode->sysctl_data;
 3088         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 3089         if (error || newp == NULL)
 3090                 return error;
 3091 
 3092         if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_KERNADDR,
 3093             0, NULL, NULL, NULL))
 3094                 return EPERM;
 3095 
 3096         switch (expose_address) {
 3097         case 0:
 3098         case 1:
 3099         case 2:
 3100                 break;
 3101         default:
 3102                 return EINVAL;
 3103         }
 3104 
 3105         *(int *)rnode->sysctl_data = expose_address;
 3106 
 3107         return 0;
 3108 }
 3109 
 3110 bool
 3111 get_expose_address(struct proc *p)
 3112 {
 3113         /* allow only if sysctl variable is set or privileged */
 3114         return kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE,
 3115             p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL) == 0;
 3116 }

Cache object: effe50070c929a3d41ece1fc4e73f4fc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.