The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_resource.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)kern_resource.c     8.5 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/10.1/sys/kern/kern_resource.c 262260 2014-02-20 21:52:39Z mjg $");
   39 
   40 #include "opt_compat.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/sysproto.h>
   45 #include <sys/file.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mutex.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/refcount.h>
   53 #include <sys/racct.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/rwlock.h>
   56 #include <sys/sched.h>
   57 #include <sys/sx.h>
   58 #include <sys/syscallsubr.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/sysent.h>
   61 #include <sys/time.h>
   62 #include <sys/umtx.h>
   63 
   64 #include <vm/vm.h>
   65 #include <vm/vm_param.h>
   66 #include <vm/pmap.h>
   67 #include <vm/vm_map.h>
   68 
   69 
   70 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
   71 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
   72 #define UIHASH(uid)     (&uihashtbl[(uid) & uihash])
   73 static struct rwlock uihashtbl_lock;
   74 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
   75 static u_long uihash;           /* size of hash table - 1 */
   76 
   77 static void     calcru1(struct proc *p, struct rusage_ext *ruxp,
   78                     struct timeval *up, struct timeval *sp);
   79 static int      donice(struct thread *td, struct proc *chgp, int n);
   80 static struct uidinfo *uilookup(uid_t uid);
   81 static void     ruxagg_locked(struct rusage_ext *rux, struct thread *td);
   82 
   83 static __inline int     lim_shared(struct plimit *limp);
   84 
   85 /*
   86  * Resource controls and accounting.
   87  */
   88 #ifndef _SYS_SYSPROTO_H_
   89 struct getpriority_args {
   90         int     which;
   91         int     who;
   92 };
   93 #endif
   94 int
   95 sys_getpriority(td, uap)
   96         struct thread *td;
   97         register struct getpriority_args *uap;
   98 {
   99         struct proc *p;
  100         struct pgrp *pg;
  101         int error, low;
  102 
  103         error = 0;
  104         low = PRIO_MAX + 1;
  105         switch (uap->which) {
  106 
  107         case PRIO_PROCESS:
  108                 if (uap->who == 0)
  109                         low = td->td_proc->p_nice;
  110                 else {
  111                         p = pfind(uap->who);
  112                         if (p == NULL)
  113                                 break;
  114                         if (p_cansee(td, p) == 0)
  115                                 low = p->p_nice;
  116                         PROC_UNLOCK(p);
  117                 }
  118                 break;
  119 
  120         case PRIO_PGRP:
  121                 sx_slock(&proctree_lock);
  122                 if (uap->who == 0) {
  123                         pg = td->td_proc->p_pgrp;
  124                         PGRP_LOCK(pg);
  125                 } else {
  126                         pg = pgfind(uap->who);
  127                         if (pg == NULL) {
  128                                 sx_sunlock(&proctree_lock);
  129                                 break;
  130                         }
  131                 }
  132                 sx_sunlock(&proctree_lock);
  133                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
  134                         PROC_LOCK(p);
  135                         if (p->p_state == PRS_NORMAL &&
  136                             p_cansee(td, p) == 0) {
  137                                 if (p->p_nice < low)
  138                                         low = p->p_nice;
  139                         }
  140                         PROC_UNLOCK(p);
  141                 }
  142                 PGRP_UNLOCK(pg);
  143                 break;
  144 
  145         case PRIO_USER:
  146                 if (uap->who == 0)
  147                         uap->who = td->td_ucred->cr_uid;
  148                 sx_slock(&allproc_lock);
  149                 FOREACH_PROC_IN_SYSTEM(p) {
  150                         PROC_LOCK(p);
  151                         if (p->p_state == PRS_NORMAL &&
  152                             p_cansee(td, p) == 0 &&
  153                             p->p_ucred->cr_uid == uap->who) {
  154                                 if (p->p_nice < low)
  155                                         low = p->p_nice;
  156                         }
  157                         PROC_UNLOCK(p);
  158                 }
  159                 sx_sunlock(&allproc_lock);
  160                 break;
  161 
  162         default:
  163                 error = EINVAL;
  164                 break;
  165         }
  166         if (low == PRIO_MAX + 1 && error == 0)
  167                 error = ESRCH;
  168         td->td_retval[0] = low;
  169         return (error);
  170 }
  171 
  172 #ifndef _SYS_SYSPROTO_H_
  173 struct setpriority_args {
  174         int     which;
  175         int     who;
  176         int     prio;
  177 };
  178 #endif
  179 int
  180 sys_setpriority(td, uap)
  181         struct thread *td;
  182         struct setpriority_args *uap;
  183 {
  184         struct proc *curp, *p;
  185         struct pgrp *pg;
  186         int found = 0, error = 0;
  187 
  188         curp = td->td_proc;
  189         switch (uap->which) {
  190         case PRIO_PROCESS:
  191                 if (uap->who == 0) {
  192                         PROC_LOCK(curp);
  193                         error = donice(td, curp, uap->prio);
  194                         PROC_UNLOCK(curp);
  195                 } else {
  196                         p = pfind(uap->who);
  197                         if (p == NULL)
  198                                 break;
  199                         error = p_cansee(td, p);
  200                         if (error == 0)
  201                                 error = donice(td, p, uap->prio);
  202                         PROC_UNLOCK(p);
  203                 }
  204                 found++;
  205                 break;
  206 
  207         case PRIO_PGRP:
  208                 sx_slock(&proctree_lock);
  209                 if (uap->who == 0) {
  210                         pg = curp->p_pgrp;
  211                         PGRP_LOCK(pg);
  212                 } else {
  213                         pg = pgfind(uap->who);
  214                         if (pg == NULL) {
  215                                 sx_sunlock(&proctree_lock);
  216                                 break;
  217                         }
  218                 }
  219                 sx_sunlock(&proctree_lock);
  220                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
  221                         PROC_LOCK(p);
  222                         if (p->p_state == PRS_NORMAL &&
  223                             p_cansee(td, p) == 0) {
  224                                 error = donice(td, p, uap->prio);
  225                                 found++;
  226                         }
  227                         PROC_UNLOCK(p);
  228                 }
  229                 PGRP_UNLOCK(pg);
  230                 break;
  231 
  232         case PRIO_USER:
  233                 if (uap->who == 0)
  234                         uap->who = td->td_ucred->cr_uid;
  235                 sx_slock(&allproc_lock);
  236                 FOREACH_PROC_IN_SYSTEM(p) {
  237                         PROC_LOCK(p);
  238                         if (p->p_state == PRS_NORMAL &&
  239                             p->p_ucred->cr_uid == uap->who &&
  240                             p_cansee(td, p) == 0) {
  241                                 error = donice(td, p, uap->prio);
  242                                 found++;
  243                         }
  244                         PROC_UNLOCK(p);
  245                 }
  246                 sx_sunlock(&allproc_lock);
  247                 break;
  248 
  249         default:
  250                 error = EINVAL;
  251                 break;
  252         }
  253         if (found == 0 && error == 0)
  254                 error = ESRCH;
  255         return (error);
  256 }
  257 
  258 /*
  259  * Set "nice" for a (whole) process.
  260  */
  261 static int
  262 donice(struct thread *td, struct proc *p, int n)
  263 {
  264         int error;
  265 
  266         PROC_LOCK_ASSERT(p, MA_OWNED);
  267         if ((error = p_cansched(td, p)))
  268                 return (error);
  269         if (n > PRIO_MAX)
  270                 n = PRIO_MAX;
  271         if (n < PRIO_MIN)
  272                 n = PRIO_MIN;
  273         if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
  274                 return (EACCES);
  275         sched_nice(p, n);
  276         return (0);
  277 }
  278 
  279 static int unprivileged_idprio;
  280 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW,
  281     &unprivileged_idprio, 0, "Allow non-root users to set an idle priority");
  282 
  283 /*
  284  * Set realtime priority for LWP.
  285  */
  286 #ifndef _SYS_SYSPROTO_H_
  287 struct rtprio_thread_args {
  288         int             function;
  289         lwpid_t         lwpid;
  290         struct rtprio   *rtp;
  291 };
  292 #endif
  293 int
  294 sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
  295 {
  296         struct proc *p;
  297         struct rtprio rtp;
  298         struct thread *td1;
  299         int cierror, error;
  300 
  301         /* Perform copyin before acquiring locks if needed. */
  302         if (uap->function == RTP_SET)
  303                 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
  304         else
  305                 cierror = 0;
  306 
  307         if (uap->lwpid == 0 || uap->lwpid == td->td_tid) {
  308                 p = td->td_proc;
  309                 td1 = td;
  310                 PROC_LOCK(p);
  311         } else {
  312                 /* Only look up thread in current process */
  313                 td1 = tdfind(uap->lwpid, curproc->p_pid);
  314                 if (td1 == NULL)
  315                         return (ESRCH);
  316                 p = td1->td_proc;
  317         }
  318 
  319         switch (uap->function) {
  320         case RTP_LOOKUP:
  321                 if ((error = p_cansee(td, p)))
  322                         break;
  323                 pri_to_rtp(td1, &rtp);
  324                 PROC_UNLOCK(p);
  325                 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
  326         case RTP_SET:
  327                 if ((error = p_cansched(td, p)) || (error = cierror))
  328                         break;
  329 
  330                 /* Disallow setting rtprio in most cases if not superuser. */
  331 
  332                 /*
  333                  * Realtime priority has to be restricted for reasons which
  334                  * should be obvious.  However, for idleprio processes, there is
  335                  * a potential for system deadlock if an idleprio process gains
  336                  * a lock on a resource that other processes need (and the
  337                  * idleprio process can't run due to a CPU-bound normal
  338                  * process).  Fix me!  XXX
  339                  *
  340                  * This problem is not only related to idleprio process.
  341                  * A user level program can obtain a file lock and hold it
  342                  * indefinitely.  Additionally, without idleprio processes it is
  343                  * still conceivable that a program with low priority will never
  344                  * get to run.  In short, allowing this feature might make it
  345                  * easier to lock a resource indefinitely, but it is not the
  346                  * only thing that makes it possible.
  347                  */
  348                 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
  349                     (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
  350                     unprivileged_idprio == 0)) {
  351                         error = priv_check(td, PRIV_SCHED_RTPRIO);
  352                         if (error)
  353                                 break;
  354                 }
  355                 error = rtp_to_pri(&rtp, td1);
  356                 break;
  357         default:
  358                 error = EINVAL;
  359                 break;
  360         }
  361         PROC_UNLOCK(p);
  362         return (error);
  363 }
  364 
  365 /*
  366  * Set realtime priority.
  367  */
  368 #ifndef _SYS_SYSPROTO_H_
  369 struct rtprio_args {
  370         int             function;
  371         pid_t           pid;
  372         struct rtprio   *rtp;
  373 };
  374 #endif
  375 int
  376 sys_rtprio(td, uap)
  377         struct thread *td;              /* curthread */
  378         register struct rtprio_args *uap;
  379 {
  380         struct proc *p;
  381         struct thread *tdp;
  382         struct rtprio rtp;
  383         int cierror, error;
  384 
  385         /* Perform copyin before acquiring locks if needed. */
  386         if (uap->function == RTP_SET)
  387                 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
  388         else
  389                 cierror = 0;
  390 
  391         if (uap->pid == 0) {
  392                 p = td->td_proc;
  393                 PROC_LOCK(p);
  394         } else {
  395                 p = pfind(uap->pid);
  396                 if (p == NULL)
  397                         return (ESRCH);
  398         }
  399 
  400         switch (uap->function) {
  401         case RTP_LOOKUP:
  402                 if ((error = p_cansee(td, p)))
  403                         break;
  404                 /*
  405                  * Return OUR priority if no pid specified,
  406                  * or if one is, report the highest priority
  407                  * in the process.  There isn't much more you can do as
  408                  * there is only room to return a single priority.
  409                  * Note: specifying our own pid is not the same
  410                  * as leaving it zero.
  411                  */
  412                 if (uap->pid == 0) {
  413                         pri_to_rtp(td, &rtp);
  414                 } else {
  415                         struct rtprio rtp2;
  416 
  417                         rtp.type = RTP_PRIO_IDLE;
  418                         rtp.prio = RTP_PRIO_MAX;
  419                         FOREACH_THREAD_IN_PROC(p, tdp) {
  420                                 pri_to_rtp(tdp, &rtp2);
  421                                 if (rtp2.type <  rtp.type ||
  422                                     (rtp2.type == rtp.type &&
  423                                     rtp2.prio < rtp.prio)) {
  424                                         rtp.type = rtp2.type;
  425                                         rtp.prio = rtp2.prio;
  426                                 }
  427                         }
  428                 }
  429                 PROC_UNLOCK(p);
  430                 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
  431         case RTP_SET:
  432                 if ((error = p_cansched(td, p)) || (error = cierror))
  433                         break;
  434 
  435                 /*
  436                  * Disallow setting rtprio in most cases if not superuser.
  437                  * See the comment in sys_rtprio_thread about idprio
  438                  * threads holding a lock.
  439                  */
  440                 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
  441                     (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
  442                     !unprivileged_idprio)) {
  443                         error = priv_check(td, PRIV_SCHED_RTPRIO);
  444                         if (error)
  445                                 break;
  446                 }
  447 
  448                 /*
  449                  * If we are setting our own priority, set just our
  450                  * thread but if we are doing another process,
  451                  * do all the threads on that process. If we
  452                  * specify our own pid we do the latter.
  453                  */
  454                 if (uap->pid == 0) {
  455                         error = rtp_to_pri(&rtp, td);
  456                 } else {
  457                         FOREACH_THREAD_IN_PROC(p, td) {
  458                                 if ((error = rtp_to_pri(&rtp, td)) != 0)
  459                                         break;
  460                         }
  461                 }
  462                 break;
  463         default:
  464                 error = EINVAL;
  465                 break;
  466         }
  467         PROC_UNLOCK(p);
  468         return (error);
  469 }
  470 
  471 int
  472 rtp_to_pri(struct rtprio *rtp, struct thread *td)
  473 {
  474         u_char  newpri, oldclass, oldpri;
  475 
  476         switch (RTP_PRIO_BASE(rtp->type)) {
  477         case RTP_PRIO_REALTIME:
  478                 if (rtp->prio > RTP_PRIO_MAX)
  479                         return (EINVAL);
  480                 newpri = PRI_MIN_REALTIME + rtp->prio;
  481                 break;
  482         case RTP_PRIO_NORMAL:
  483                 if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE))
  484                         return (EINVAL);
  485                 newpri = PRI_MIN_TIMESHARE + rtp->prio;
  486                 break;
  487         case RTP_PRIO_IDLE:
  488                 if (rtp->prio > RTP_PRIO_MAX)
  489                         return (EINVAL);
  490                 newpri = PRI_MIN_IDLE + rtp->prio;
  491                 break;
  492         default:
  493                 return (EINVAL);
  494         }
  495 
  496         thread_lock(td);
  497         oldclass = td->td_pri_class;
  498         sched_class(td, rtp->type);     /* XXX fix */
  499         oldpri = td->td_user_pri;
  500         sched_user_prio(td, newpri);
  501         if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL ||
  502             td->td_pri_class != RTP_PRIO_NORMAL))
  503                 sched_prio(td, td->td_user_pri);
  504         if (TD_ON_UPILOCK(td) && oldpri != newpri) {
  505                 critical_enter();
  506                 thread_unlock(td);
  507                 umtx_pi_adjust(td, oldpri);
  508                 critical_exit();
  509         } else
  510                 thread_unlock(td);
  511         return (0);
  512 }
  513 
  514 void
  515 pri_to_rtp(struct thread *td, struct rtprio *rtp)
  516 {
  517 
  518         thread_lock(td);
  519         switch (PRI_BASE(td->td_pri_class)) {
  520         case PRI_REALTIME:
  521                 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
  522                 break;
  523         case PRI_TIMESHARE:
  524                 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE;
  525                 break;
  526         case PRI_IDLE:
  527                 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE;
  528                 break;
  529         default:
  530                 break;
  531         }
  532         rtp->type = td->td_pri_class;
  533         thread_unlock(td);
  534 }
  535 
  536 #if defined(COMPAT_43)
  537 #ifndef _SYS_SYSPROTO_H_
  538 struct osetrlimit_args {
  539         u_int   which;
  540         struct  orlimit *rlp;
  541 };
  542 #endif
  543 int
  544 osetrlimit(td, uap)
  545         struct thread *td;
  546         register struct osetrlimit_args *uap;
  547 {
  548         struct orlimit olim;
  549         struct rlimit lim;
  550         int error;
  551 
  552         if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
  553                 return (error);
  554         lim.rlim_cur = olim.rlim_cur;
  555         lim.rlim_max = olim.rlim_max;
  556         error = kern_setrlimit(td, uap->which, &lim);
  557         return (error);
  558 }
  559 
  560 #ifndef _SYS_SYSPROTO_H_
  561 struct ogetrlimit_args {
  562         u_int   which;
  563         struct  orlimit *rlp;
  564 };
  565 #endif
  566 int
  567 ogetrlimit(td, uap)
  568         struct thread *td;
  569         register struct ogetrlimit_args *uap;
  570 {
  571         struct orlimit olim;
  572         struct rlimit rl;
  573         struct proc *p;
  574         int error;
  575 
  576         if (uap->which >= RLIM_NLIMITS)
  577                 return (EINVAL);
  578         p = td->td_proc;
  579         PROC_LOCK(p);
  580         lim_rlimit(p, uap->which, &rl);
  581         PROC_UNLOCK(p);
  582 
  583         /*
  584          * XXX would be more correct to convert only RLIM_INFINITY to the
  585          * old RLIM_INFINITY and fail with EOVERFLOW for other larger
  586          * values.  Most 64->32 and 32->16 conversions, including not
  587          * unimportant ones of uids are even more broken than what we
  588          * do here (they blindly truncate).  We don't do this correctly
  589          * here since we have little experience with EOVERFLOW yet.
  590          * Elsewhere, getuid() can't fail...
  591          */
  592         olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
  593         olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
  594         error = copyout(&olim, uap->rlp, sizeof(olim));
  595         return (error);
  596 }
  597 #endif /* COMPAT_43 */
  598 
  599 #ifndef _SYS_SYSPROTO_H_
  600 struct __setrlimit_args {
  601         u_int   which;
  602         struct  rlimit *rlp;
  603 };
  604 #endif
  605 int
  606 sys_setrlimit(td, uap)
  607         struct thread *td;
  608         register struct __setrlimit_args *uap;
  609 {
  610         struct rlimit alim;
  611         int error;
  612 
  613         if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
  614                 return (error);
  615         error = kern_setrlimit(td, uap->which, &alim);
  616         return (error);
  617 }
  618 
  619 static void
  620 lim_cb(void *arg)
  621 {
  622         struct rlimit rlim;
  623         struct thread *td;
  624         struct proc *p;
  625 
  626         p = arg;
  627         PROC_LOCK_ASSERT(p, MA_OWNED);
  628         /*
  629          * Check if the process exceeds its cpu resource allocation.  If
  630          * it reaches the max, arrange to kill the process in ast().
  631          */
  632         if (p->p_cpulimit == RLIM_INFINITY)
  633                 return;
  634         PROC_SLOCK(p);
  635         FOREACH_THREAD_IN_PROC(p, td) {
  636                 ruxagg(p, td);
  637         }
  638         PROC_SUNLOCK(p);
  639         if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
  640                 lim_rlimit(p, RLIMIT_CPU, &rlim);
  641                 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
  642                         killproc(p, "exceeded maximum CPU limit");
  643                 } else {
  644                         if (p->p_cpulimit < rlim.rlim_max)
  645                                 p->p_cpulimit += 5;
  646                         kern_psignal(p, SIGXCPU);
  647                 }
  648         }
  649         if ((p->p_flag & P_WEXIT) == 0)
  650                 callout_reset_sbt(&p->p_limco, SBT_1S, 0,
  651                     lim_cb, p, C_PREL(1));
  652 }
  653 
  654 int
  655 kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp)
  656 {
  657 
  658         return (kern_proc_setrlimit(td, td->td_proc, which, limp));
  659 }
  660 
  661 int
  662 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
  663     struct rlimit *limp)
  664 {
  665         struct plimit *newlim, *oldlim;
  666         register struct rlimit *alimp;
  667         struct rlimit oldssiz;
  668         int error;
  669 
  670         if (which >= RLIM_NLIMITS)
  671                 return (EINVAL);
  672 
  673         /*
  674          * Preserve historical bugs by treating negative limits as unsigned.
  675          */
  676         if (limp->rlim_cur < 0)
  677                 limp->rlim_cur = RLIM_INFINITY;
  678         if (limp->rlim_max < 0)
  679                 limp->rlim_max = RLIM_INFINITY;
  680 
  681         oldssiz.rlim_cur = 0;
  682         newlim = NULL;
  683         PROC_LOCK(p);
  684         if (lim_shared(p->p_limit)) {
  685                 PROC_UNLOCK(p);
  686                 newlim = lim_alloc();
  687                 PROC_LOCK(p);
  688         }
  689         oldlim = p->p_limit;
  690         alimp = &oldlim->pl_rlimit[which];
  691         if (limp->rlim_cur > alimp->rlim_max ||
  692             limp->rlim_max > alimp->rlim_max)
  693                 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
  694                         PROC_UNLOCK(p);
  695                         if (newlim != NULL)
  696                                 lim_free(newlim);
  697                         return (error);
  698                 }
  699         if (limp->rlim_cur > limp->rlim_max)
  700                 limp->rlim_cur = limp->rlim_max;
  701         if (newlim != NULL) {
  702                 lim_copy(newlim, oldlim);
  703                 alimp = &newlim->pl_rlimit[which];
  704         }
  705 
  706         switch (which) {
  707 
  708         case RLIMIT_CPU:
  709                 if (limp->rlim_cur != RLIM_INFINITY &&
  710                     p->p_cpulimit == RLIM_INFINITY)
  711                         callout_reset_sbt(&p->p_limco, SBT_1S, 0,
  712                             lim_cb, p, C_PREL(1));
  713                 p->p_cpulimit = limp->rlim_cur;
  714                 break;
  715         case RLIMIT_DATA:
  716                 if (limp->rlim_cur > maxdsiz)
  717                         limp->rlim_cur = maxdsiz;
  718                 if (limp->rlim_max > maxdsiz)
  719                         limp->rlim_max = maxdsiz;
  720                 break;
  721 
  722         case RLIMIT_STACK:
  723                 if (limp->rlim_cur > maxssiz)
  724                         limp->rlim_cur = maxssiz;
  725                 if (limp->rlim_max > maxssiz)
  726                         limp->rlim_max = maxssiz;
  727                 oldssiz = *alimp;
  728                 if (p->p_sysent->sv_fixlimit != NULL)
  729                         p->p_sysent->sv_fixlimit(&oldssiz,
  730                             RLIMIT_STACK);
  731                 break;
  732 
  733         case RLIMIT_NOFILE:
  734                 if (limp->rlim_cur > maxfilesperproc)
  735                         limp->rlim_cur = maxfilesperproc;
  736                 if (limp->rlim_max > maxfilesperproc)
  737                         limp->rlim_max = maxfilesperproc;
  738                 break;
  739 
  740         case RLIMIT_NPROC:
  741                 if (limp->rlim_cur > maxprocperuid)
  742                         limp->rlim_cur = maxprocperuid;
  743                 if (limp->rlim_max > maxprocperuid)
  744                         limp->rlim_max = maxprocperuid;
  745                 if (limp->rlim_cur < 1)
  746                         limp->rlim_cur = 1;
  747                 if (limp->rlim_max < 1)
  748                         limp->rlim_max = 1;
  749                 break;
  750         }
  751         if (p->p_sysent->sv_fixlimit != NULL)
  752                 p->p_sysent->sv_fixlimit(limp, which);
  753         *alimp = *limp;
  754         if (newlim != NULL)
  755                 p->p_limit = newlim;
  756         PROC_UNLOCK(p);
  757         if (newlim != NULL)
  758                 lim_free(oldlim);
  759 
  760         if (which == RLIMIT_STACK) {
  761                 /*
  762                  * Stack is allocated to the max at exec time with only
  763                  * "rlim_cur" bytes accessible.  If stack limit is going
  764                  * up make more accessible, if going down make inaccessible.
  765                  */
  766                 if (limp->rlim_cur != oldssiz.rlim_cur) {
  767                         vm_offset_t addr;
  768                         vm_size_t size;
  769                         vm_prot_t prot;
  770 
  771                         if (limp->rlim_cur > oldssiz.rlim_cur) {
  772                                 prot = p->p_sysent->sv_stackprot;
  773                                 size = limp->rlim_cur - oldssiz.rlim_cur;
  774                                 addr = p->p_sysent->sv_usrstack -
  775                                     limp->rlim_cur;
  776                         } else {
  777                                 prot = VM_PROT_NONE;
  778                                 size = oldssiz.rlim_cur - limp->rlim_cur;
  779                                 addr = p->p_sysent->sv_usrstack -
  780                                     oldssiz.rlim_cur;
  781                         }
  782                         addr = trunc_page(addr);
  783                         size = round_page(size);
  784                         (void)vm_map_protect(&p->p_vmspace->vm_map,
  785                             addr, addr + size, prot, FALSE);
  786                 }
  787         }
  788 
  789         return (0);
  790 }
  791 
  792 #ifndef _SYS_SYSPROTO_H_
  793 struct __getrlimit_args {
  794         u_int   which;
  795         struct  rlimit *rlp;
  796 };
  797 #endif
  798 /* ARGSUSED */
  799 int
  800 sys_getrlimit(td, uap)
  801         struct thread *td;
  802         register struct __getrlimit_args *uap;
  803 {
  804         struct rlimit rlim;
  805         struct proc *p;
  806         int error;
  807 
  808         if (uap->which >= RLIM_NLIMITS)
  809                 return (EINVAL);
  810         p = td->td_proc;
  811         PROC_LOCK(p);
  812         lim_rlimit(p, uap->which, &rlim);
  813         PROC_UNLOCK(p);
  814         error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
  815         return (error);
  816 }
  817 
  818 /*
  819  * Transform the running time and tick information for children of proc p
  820  * into user and system time usage.
  821  */
  822 void
  823 calccru(p, up, sp)
  824         struct proc *p;
  825         struct timeval *up;
  826         struct timeval *sp;
  827 {
  828 
  829         PROC_LOCK_ASSERT(p, MA_OWNED);
  830         calcru1(p, &p->p_crux, up, sp);
  831 }
  832 
  833 /*
  834  * Transform the running time and tick information in proc p into user
  835  * and system time usage.  If appropriate, include the current time slice
  836  * on this CPU.
  837  */
  838 void
  839 calcru(struct proc *p, struct timeval *up, struct timeval *sp)
  840 {
  841         struct thread *td;
  842         uint64_t runtime, u;
  843 
  844         PROC_LOCK_ASSERT(p, MA_OWNED);
  845         PROC_SLOCK_ASSERT(p, MA_OWNED);
  846         /*
  847          * If we are getting stats for the current process, then add in the
  848          * stats that this thread has accumulated in its current time slice.
  849          * We reset the thread and CPU state as if we had performed a context
  850          * switch right here.
  851          */
  852         td = curthread;
  853         if (td->td_proc == p) {
  854                 u = cpu_ticks();
  855                 runtime = u - PCPU_GET(switchtime);
  856                 td->td_runtime += runtime;
  857                 td->td_incruntime += runtime;
  858                 PCPU_SET(switchtime, u);
  859         }
  860         /* Make sure the per-thread stats are current. */
  861         FOREACH_THREAD_IN_PROC(p, td) {
  862                 if (td->td_incruntime == 0)
  863                         continue;
  864                 ruxagg(p, td);
  865         }
  866         calcru1(p, &p->p_rux, up, sp);
  867 }
  868 
  869 /* Collect resource usage for a single thread. */
  870 void
  871 rufetchtd(struct thread *td, struct rusage *ru)
  872 {
  873         struct proc *p;
  874         uint64_t runtime, u;
  875 
  876         p = td->td_proc;
  877         PROC_SLOCK_ASSERT(p, MA_OWNED);
  878         THREAD_LOCK_ASSERT(td, MA_OWNED);
  879         /*
  880          * If we are getting stats for the current thread, then add in the
  881          * stats that this thread has accumulated in its current time slice.
  882          * We reset the thread and CPU state as if we had performed a context
  883          * switch right here.
  884          */
  885         if (td == curthread) {
  886                 u = cpu_ticks();
  887                 runtime = u - PCPU_GET(switchtime);
  888                 td->td_runtime += runtime;
  889                 td->td_incruntime += runtime;
  890                 PCPU_SET(switchtime, u);
  891         }
  892         ruxagg(p, td);
  893         *ru = td->td_ru;
  894         calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
  895 }
  896 
  897 static void
  898 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
  899     struct timeval *sp)
  900 {
  901         /* {user, system, interrupt, total} {ticks, usec}: */
  902         uint64_t ut, uu, st, su, it, tt, tu;
  903 
  904         ut = ruxp->rux_uticks;
  905         st = ruxp->rux_sticks;
  906         it = ruxp->rux_iticks;
  907         tt = ut + st + it;
  908         if (tt == 0) {
  909                 /* Avoid divide by zero */
  910                 st = 1;
  911                 tt = 1;
  912         }
  913         tu = cputick2usec(ruxp->rux_runtime);
  914         if ((int64_t)tu < 0) {
  915                 /* XXX: this should be an assert /phk */
  916                 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
  917                     (intmax_t)tu, p->p_pid, p->p_comm);
  918                 tu = ruxp->rux_tu;
  919         }
  920 
  921         if (tu >= ruxp->rux_tu) {
  922                 /*
  923                  * The normal case, time increased.
  924                  * Enforce monotonicity of bucketed numbers.
  925                  */
  926                 uu = (tu * ut) / tt;
  927                 if (uu < ruxp->rux_uu)
  928                         uu = ruxp->rux_uu;
  929                 su = (tu * st) / tt;
  930                 if (su < ruxp->rux_su)
  931                         su = ruxp->rux_su;
  932         } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
  933                 /*
  934                  * When we calibrate the cputicker, it is not uncommon to
  935                  * see the presumably fixed frequency increase slightly over
  936                  * time as a result of thermal stabilization and NTP
  937                  * discipline (of the reference clock).  We therefore ignore
  938                  * a bit of backwards slop because we  expect to catch up
  939                  * shortly.  We use a 3 microsecond limit to catch low
  940                  * counts and a 1% limit for high counts.
  941                  */
  942                 uu = ruxp->rux_uu;
  943                 su = ruxp->rux_su;
  944                 tu = ruxp->rux_tu;
  945         } else { /* tu < ruxp->rux_tu */
  946                 /*
  947                  * What happened here was likely that a laptop, which ran at
  948                  * a reduced clock frequency at boot, kicked into high gear.
  949                  * The wisdom of spamming this message in that case is
  950                  * dubious, but it might also be indicative of something
  951                  * serious, so lets keep it and hope laptops can be made
  952                  * more truthful about their CPU speed via ACPI.
  953                  */
  954                 printf("calcru: runtime went backwards from %ju usec "
  955                     "to %ju usec for pid %d (%s)\n",
  956                     (uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
  957                     p->p_pid, p->p_comm);
  958                 uu = (tu * ut) / tt;
  959                 su = (tu * st) / tt;
  960         }
  961 
  962         ruxp->rux_uu = uu;
  963         ruxp->rux_su = su;
  964         ruxp->rux_tu = tu;
  965 
  966         up->tv_sec = uu / 1000000;
  967         up->tv_usec = uu % 1000000;
  968         sp->tv_sec = su / 1000000;
  969         sp->tv_usec = su % 1000000;
  970 }
  971 
  972 #ifndef _SYS_SYSPROTO_H_
  973 struct getrusage_args {
  974         int     who;
  975         struct  rusage *rusage;
  976 };
  977 #endif
  978 int
  979 sys_getrusage(td, uap)
  980         register struct thread *td;
  981         register struct getrusage_args *uap;
  982 {
  983         struct rusage ru;
  984         int error;
  985 
  986         error = kern_getrusage(td, uap->who, &ru);
  987         if (error == 0)
  988                 error = copyout(&ru, uap->rusage, sizeof(struct rusage));
  989         return (error);
  990 }
  991 
  992 int
  993 kern_getrusage(struct thread *td, int who, struct rusage *rup)
  994 {
  995         struct proc *p;
  996         int error;
  997 
  998         error = 0;
  999         p = td->td_proc;
 1000         PROC_LOCK(p);
 1001         switch (who) {
 1002         case RUSAGE_SELF:
 1003                 rufetchcalc(p, rup, &rup->ru_utime,
 1004                     &rup->ru_stime);
 1005                 break;
 1006 
 1007         case RUSAGE_CHILDREN:
 1008                 *rup = p->p_stats->p_cru;
 1009                 calccru(p, &rup->ru_utime, &rup->ru_stime);
 1010                 break;
 1011 
 1012         case RUSAGE_THREAD:
 1013                 PROC_SLOCK(p);
 1014                 thread_lock(td);
 1015                 rufetchtd(td, rup);
 1016                 thread_unlock(td);
 1017                 PROC_SUNLOCK(p);
 1018                 break;
 1019 
 1020         default:
 1021                 error = EINVAL;
 1022         }
 1023         PROC_UNLOCK(p);
 1024         return (error);
 1025 }
 1026 
 1027 void
 1028 rucollect(struct rusage *ru, struct rusage *ru2)
 1029 {
 1030         long *ip, *ip2;
 1031         int i;
 1032 
 1033         if (ru->ru_maxrss < ru2->ru_maxrss)
 1034                 ru->ru_maxrss = ru2->ru_maxrss;
 1035         ip = &ru->ru_first;
 1036         ip2 = &ru2->ru_first;
 1037         for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
 1038                 *ip++ += *ip2++;
 1039 }
 1040 
 1041 void
 1042 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
 1043     struct rusage_ext *rux2)
 1044 {
 1045 
 1046         rux->rux_runtime += rux2->rux_runtime;
 1047         rux->rux_uticks += rux2->rux_uticks;
 1048         rux->rux_sticks += rux2->rux_sticks;
 1049         rux->rux_iticks += rux2->rux_iticks;
 1050         rux->rux_uu += rux2->rux_uu;
 1051         rux->rux_su += rux2->rux_su;
 1052         rux->rux_tu += rux2->rux_tu;
 1053         rucollect(ru, ru2);
 1054 }
 1055 
 1056 /*
 1057  * Aggregate tick counts into the proc's rusage_ext.
 1058  */
 1059 static void
 1060 ruxagg_locked(struct rusage_ext *rux, struct thread *td)
 1061 {
 1062 
 1063         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1064         PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED);
 1065         rux->rux_runtime += td->td_incruntime;
 1066         rux->rux_uticks += td->td_uticks;
 1067         rux->rux_sticks += td->td_sticks;
 1068         rux->rux_iticks += td->td_iticks;
 1069 }
 1070 
 1071 void
 1072 ruxagg(struct proc *p, struct thread *td)
 1073 {
 1074 
 1075         thread_lock(td);
 1076         ruxagg_locked(&p->p_rux, td);
 1077         ruxagg_locked(&td->td_rux, td);
 1078         td->td_incruntime = 0;
 1079         td->td_uticks = 0;
 1080         td->td_iticks = 0;
 1081         td->td_sticks = 0;
 1082         thread_unlock(td);
 1083 }
 1084 
 1085 /*
 1086  * Update the rusage_ext structure and fetch a valid aggregate rusage
 1087  * for proc p if storage for one is supplied.
 1088  */
 1089 void
 1090 rufetch(struct proc *p, struct rusage *ru)
 1091 {
 1092         struct thread *td;
 1093 
 1094         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1095 
 1096         *ru = p->p_ru;
 1097         if (p->p_numthreads > 0)  {
 1098                 FOREACH_THREAD_IN_PROC(p, td) {
 1099                         ruxagg(p, td);
 1100                         rucollect(ru, &td->td_ru);
 1101                 }
 1102         }
 1103 }
 1104 
 1105 /*
 1106  * Atomically perform a rufetch and a calcru together.
 1107  * Consumers, can safely assume the calcru is executed only once
 1108  * rufetch is completed.
 1109  */
 1110 void
 1111 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
 1112     struct timeval *sp)
 1113 {
 1114 
 1115         PROC_SLOCK(p);
 1116         rufetch(p, ru);
 1117         calcru(p, up, sp);
 1118         PROC_SUNLOCK(p);
 1119 }
 1120 
 1121 /*
 1122  * Allocate a new resource limits structure and initialize its
 1123  * reference count and mutex pointer.
 1124  */
 1125 struct plimit *
 1126 lim_alloc()
 1127 {
 1128         struct plimit *limp;
 1129 
 1130         limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
 1131         refcount_init(&limp->pl_refcnt, 1);
 1132         return (limp);
 1133 }
 1134 
 1135 struct plimit *
 1136 lim_hold(limp)
 1137         struct plimit *limp;
 1138 {
 1139 
 1140         refcount_acquire(&limp->pl_refcnt);
 1141         return (limp);
 1142 }
 1143 
 1144 static __inline int
 1145 lim_shared(limp)
 1146         struct plimit *limp;
 1147 {
 1148 
 1149         return (limp->pl_refcnt > 1);
 1150 }
 1151 
 1152 void
 1153 lim_fork(struct proc *p1, struct proc *p2)
 1154 {
 1155 
 1156         PROC_LOCK_ASSERT(p1, MA_OWNED);
 1157         PROC_LOCK_ASSERT(p2, MA_OWNED);
 1158 
 1159         p2->p_limit = lim_hold(p1->p_limit);
 1160         callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
 1161         if (p1->p_cpulimit != RLIM_INFINITY)
 1162                 callout_reset_sbt(&p2->p_limco, SBT_1S, 0,
 1163                     lim_cb, p2, C_PREL(1));
 1164 }
 1165 
 1166 void
 1167 lim_free(limp)
 1168         struct plimit *limp;
 1169 {
 1170 
 1171         KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow"));
 1172         if (refcount_release(&limp->pl_refcnt))
 1173                 free((void *)limp, M_PLIMIT);
 1174 }
 1175 
 1176 /*
 1177  * Make a copy of the plimit structure.
 1178  * We share these structures copy-on-write after fork.
 1179  */
 1180 void
 1181 lim_copy(dst, src)
 1182         struct plimit *dst, *src;
 1183 {
 1184 
 1185         KASSERT(!lim_shared(dst), ("lim_copy to shared limit"));
 1186         bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
 1187 }
 1188 
 1189 /*
 1190  * Return the hard limit for a particular system resource.  The
 1191  * which parameter specifies the index into the rlimit array.
 1192  */
 1193 rlim_t
 1194 lim_max(struct proc *p, int which)
 1195 {
 1196         struct rlimit rl;
 1197 
 1198         lim_rlimit(p, which, &rl);
 1199         return (rl.rlim_max);
 1200 }
 1201 
 1202 /*
 1203  * Return the current (soft) limit for a particular system resource.
 1204  * The which parameter which specifies the index into the rlimit array
 1205  */
 1206 rlim_t
 1207 lim_cur(struct proc *p, int which)
 1208 {
 1209         struct rlimit rl;
 1210 
 1211         lim_rlimit(p, which, &rl);
 1212         return (rl.rlim_cur);
 1213 }
 1214 
 1215 /*
 1216  * Return a copy of the entire rlimit structure for the system limit
 1217  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
 1218  */
 1219 void
 1220 lim_rlimit(struct proc *p, int which, struct rlimit *rlp)
 1221 {
 1222 
 1223         PROC_LOCK_ASSERT(p, MA_OWNED);
 1224         KASSERT(which >= 0 && which < RLIM_NLIMITS,
 1225             ("request for invalid resource limit"));
 1226         *rlp = p->p_limit->pl_rlimit[which];
 1227         if (p->p_sysent->sv_fixlimit != NULL)
 1228                 p->p_sysent->sv_fixlimit(rlp, which);
 1229 }
 1230 
 1231 void
 1232 uihashinit()
 1233 {
 1234 
 1235         uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
 1236         rw_init(&uihashtbl_lock, "uidinfo hash");
 1237 }
 1238 
 1239 /*
 1240  * Look up a uidinfo struct for the parameter uid.
 1241  * uihashtbl_lock must be locked.
 1242  */
 1243 static struct uidinfo *
 1244 uilookup(uid)
 1245         uid_t uid;
 1246 {
 1247         struct uihashhead *uipp;
 1248         struct uidinfo *uip;
 1249 
 1250         rw_assert(&uihashtbl_lock, RA_LOCKED);
 1251         uipp = UIHASH(uid);
 1252         LIST_FOREACH(uip, uipp, ui_hash)
 1253                 if (uip->ui_uid == uid)
 1254                         break;
 1255 
 1256         return (uip);
 1257 }
 1258 
 1259 /*
 1260  * Find or allocate a struct uidinfo for a particular uid.
 1261  * Increase refcount on uidinfo struct returned.
 1262  * uifree() should be called on a struct uidinfo when released.
 1263  */
 1264 struct uidinfo *
 1265 uifind(uid)
 1266         uid_t uid;
 1267 {
 1268         struct uidinfo *old_uip, *uip;
 1269 
 1270         rw_rlock(&uihashtbl_lock);
 1271         uip = uilookup(uid);
 1272         if (uip == NULL) {
 1273                 rw_runlock(&uihashtbl_lock);
 1274                 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
 1275                 racct_create(&uip->ui_racct);
 1276                 rw_wlock(&uihashtbl_lock);
 1277                 /*
 1278                  * There's a chance someone created our uidinfo while we
 1279                  * were in malloc and not holding the lock, so we have to
 1280                  * make sure we don't insert a duplicate uidinfo.
 1281                  */
 1282                 if ((old_uip = uilookup(uid)) != NULL) {
 1283                         /* Someone else beat us to it. */
 1284                         racct_destroy(&uip->ui_racct);
 1285                         free(uip, M_UIDINFO);
 1286                         uip = old_uip;
 1287                 } else {
 1288                         refcount_init(&uip->ui_ref, 0);
 1289                         uip->ui_uid = uid;
 1290                         mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL,
 1291                             MTX_DEF);
 1292                         LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
 1293                 }
 1294         }
 1295         uihold(uip);
 1296         rw_unlock(&uihashtbl_lock);
 1297         return (uip);
 1298 }
 1299 
 1300 /*
 1301  * Place another refcount on a uidinfo struct.
 1302  */
 1303 void
 1304 uihold(uip)
 1305         struct uidinfo *uip;
 1306 {
 1307 
 1308         refcount_acquire(&uip->ui_ref);
 1309 }
 1310 
 1311 /*-
 1312  * Since uidinfo structs have a long lifetime, we use an
 1313  * opportunistic refcounting scheme to avoid locking the lookup hash
 1314  * for each release.
 1315  *
 1316  * If the refcount hits 0, we need to free the structure,
 1317  * which means we need to lock the hash.
 1318  * Optimal case:
 1319  *   After locking the struct and lowering the refcount, if we find
 1320  *   that we don't need to free, simply unlock and return.
 1321  * Suboptimal case:
 1322  *   If refcount lowering results in need to free, bump the count
 1323  *   back up, lose the lock and acquire the locks in the proper
 1324  *   order to try again.
 1325  */
 1326 void
 1327 uifree(uip)
 1328         struct uidinfo *uip;
 1329 {
 1330         int old;
 1331 
 1332         /* Prepare for optimal case. */
 1333         old = uip->ui_ref;
 1334         if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1))
 1335                 return;
 1336 
 1337         /* Prepare for suboptimal case. */
 1338         rw_wlock(&uihashtbl_lock);
 1339         if (refcount_release(&uip->ui_ref)) {
 1340                 racct_destroy(&uip->ui_racct);
 1341                 LIST_REMOVE(uip, ui_hash);
 1342                 rw_wunlock(&uihashtbl_lock);
 1343                 if (uip->ui_sbsize != 0)
 1344                         printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
 1345                             uip->ui_uid, uip->ui_sbsize);
 1346                 if (uip->ui_proccnt != 0)
 1347                         printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
 1348                             uip->ui_uid, uip->ui_proccnt);
 1349                 if (uip->ui_vmsize != 0)
 1350                         printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
 1351                             uip->ui_uid, (unsigned long long)uip->ui_vmsize);
 1352                 mtx_destroy(&uip->ui_vmsize_mtx);
 1353                 free(uip, M_UIDINFO);
 1354                 return;
 1355         }
 1356         /*
 1357          * Someone added a reference between atomic_cmpset_int() and
 1358          * rw_wlock(&uihashtbl_lock).
 1359          */
 1360         rw_wunlock(&uihashtbl_lock);
 1361 }
 1362 
 1363 void
 1364 ui_racct_foreach(void (*callback)(struct racct *racct,
 1365     void *arg2, void *arg3), void *arg2, void *arg3)
 1366 {
 1367         struct uidinfo *uip;
 1368         struct uihashhead *uih;
 1369 
 1370         rw_rlock(&uihashtbl_lock);
 1371         for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
 1372                 LIST_FOREACH(uip, uih, ui_hash) {
 1373                         (callback)(uip->ui_racct, arg2, arg3);
 1374                 }
 1375         }
 1376         rw_runlock(&uihashtbl_lock);
 1377 }
 1378 
 1379 /*
 1380  * Change the count associated with number of processes
 1381  * a given user is using.  When 'max' is 0, don't enforce a limit
 1382  */
 1383 int
 1384 chgproccnt(uip, diff, max)
 1385         struct  uidinfo *uip;
 1386         int     diff;
 1387         rlim_t  max;
 1388 {
 1389 
 1390         /* Don't allow them to exceed max, but allow subtraction. */
 1391         if (diff > 0 && max != 0) {
 1392                 if (atomic_fetchadd_long(&uip->ui_proccnt, (long)diff) + diff > max) {
 1393                         atomic_subtract_long(&uip->ui_proccnt, (long)diff);
 1394                         return (0);
 1395                 }
 1396         } else {
 1397                 atomic_add_long(&uip->ui_proccnt, (long)diff);
 1398                 if (uip->ui_proccnt < 0)
 1399                         printf("negative proccnt for uid = %d\n", uip->ui_uid);
 1400         }
 1401         return (1);
 1402 }
 1403 
 1404 /*
 1405  * Change the total socket buffer size a user has used.
 1406  */
 1407 int
 1408 chgsbsize(uip, hiwat, to, max)
 1409         struct  uidinfo *uip;
 1410         u_int  *hiwat;
 1411         u_int   to;
 1412         rlim_t  max;
 1413 {
 1414         int diff;
 1415 
 1416         diff = to - *hiwat;
 1417         if (diff > 0) {
 1418                 if (atomic_fetchadd_long(&uip->ui_sbsize, (long)diff) + diff > max) {
 1419                         atomic_subtract_long(&uip->ui_sbsize, (long)diff);
 1420                         return (0);
 1421                 }
 1422         } else {
 1423                 atomic_add_long(&uip->ui_sbsize, (long)diff);
 1424                 if (uip->ui_sbsize < 0)
 1425                         printf("negative sbsize for uid = %d\n", uip->ui_uid);
 1426         }
 1427         *hiwat = to;
 1428         return (1);
 1429 }
 1430 
 1431 /*
 1432  * Change the count associated with number of pseudo-terminals
 1433  * a given user is using.  When 'max' is 0, don't enforce a limit
 1434  */
 1435 int
 1436 chgptscnt(uip, diff, max)
 1437         struct  uidinfo *uip;
 1438         int     diff;
 1439         rlim_t  max;
 1440 {
 1441 
 1442         /* Don't allow them to exceed max, but allow subtraction. */
 1443         if (diff > 0 && max != 0) {
 1444                 if (atomic_fetchadd_long(&uip->ui_ptscnt, (long)diff) + diff > max) {
 1445                         atomic_subtract_long(&uip->ui_ptscnt, (long)diff);
 1446                         return (0);
 1447                 }
 1448         } else {
 1449                 atomic_add_long(&uip->ui_ptscnt, (long)diff);
 1450                 if (uip->ui_ptscnt < 0)
 1451                         printf("negative ptscnt for uid = %d\n", uip->ui_uid);
 1452         }
 1453         return (1);
 1454 }

Cache object: 2d6d068195ee326f9a0f22a0ff435a4a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.