kern_resource.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1982, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)kern_resource.c     8.5 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include "opt_compat.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/sysproto.h>
   45 #include <sys/file.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mutex.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/refcount.h>
   53 #include <sys/racct.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/rwlock.h>
   56 #include <sys/sched.h>
   57 #include <sys/sx.h>
   58 #include <sys/syscallsubr.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/sysent.h>
   61 #include <sys/time.h>
   62 #include <sys/umtx.h>
   63 
   64 #include <vm/vm.h>
   65 #include <vm/vm_param.h>
   66 #include <vm/pmap.h>
   67 #include <vm/vm_map.h>
   68 
   69 
   70 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
   71 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
   72 #define UIHASH(uid)     (&uihashtbl[(uid) & uihash])
   73 static struct rwlock uihashtbl_lock;
   74 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
   75 static u_long uihash;           /* size of hash table - 1 */
   76 
   77 static void     calcru1(struct proc *p, struct rusage_ext *ruxp,
   78                     struct timeval *up, struct timeval *sp);
   79 static int      donice(struct thread *td, struct proc *chgp, int n);
   80 static struct uidinfo *uilookup(uid_t uid);
   81 static void     ruxagg_locked(struct rusage_ext *rux, struct thread *td);
   82 
   83 /*
   84  * Resource controls and accounting.
   85  */
   86 #ifndef _SYS_SYSPROTO_H_
   87 struct getpriority_args {
   88         int     which;
   89         int     who;
   90 };
   91 #endif
   92 int
   93 sys_getpriority(struct thread *td, struct getpriority_args *uap)
   94 {
   95         struct proc *p;
   96         struct pgrp *pg;
   97         int error, low;
   98 
   99         error = 0;
  100         low = PRIO_MAX + 1;
  101         switch (uap->which) {
  102 
  103         case PRIO_PROCESS:
  104                 if (uap->who == 0)
  105                         low = td->td_proc->p_nice;
  106                 else {
  107                         p = pfind(uap->who);
  108                         if (p == NULL)
  109                                 break;
  110                         if (p_cansee(td, p) == 0)
  111                                 low = p->p_nice;
  112                         PROC_UNLOCK(p);
  113                 }
  114                 break;
  115 
  116         case PRIO_PGRP:
  117                 sx_slock(&proctree_lock);
  118                 if (uap->who == 0) {
  119                         pg = td->td_proc->p_pgrp;
  120                         PGRP_LOCK(pg);
  121                 } else {
  122                         pg = pgfind(uap->who);
  123                         if (pg == NULL) {
  124                                 sx_sunlock(&proctree_lock);
  125                                 break;
  126                         }
  127                 }
  128                 sx_sunlock(&proctree_lock);
  129                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
  130                         PROC_LOCK(p);
  131                         if (p->p_state == PRS_NORMAL &&
  132                             p_cansee(td, p) == 0) {
  133                                 if (p->p_nice < low)
  134                                         low = p->p_nice;
  135                         }
  136                         PROC_UNLOCK(p);
  137                 }
  138                 PGRP_UNLOCK(pg);
  139                 break;
  140 
  141         case PRIO_USER:
  142                 if (uap->who == 0)
  143                         uap->who = td->td_ucred->cr_uid;
  144                 sx_slock(&allproc_lock);
  145                 FOREACH_PROC_IN_SYSTEM(p) {
  146                         PROC_LOCK(p);
  147                         if (p->p_state == PRS_NORMAL &&
  148                             p_cansee(td, p) == 0 &&
  149                             p->p_ucred->cr_uid == uap->who) {
  150                                 if (p->p_nice < low)
  151                                         low = p->p_nice;
  152                         }
  153                         PROC_UNLOCK(p);
  154                 }
  155                 sx_sunlock(&allproc_lock);
  156                 break;
  157 
  158         default:
  159                 error = EINVAL;
  160                 break;
  161         }
  162         if (low == PRIO_MAX + 1 && error == 0)
  163                 error = ESRCH;
  164         td->td_retval[0] = low;
  165         return (error);
  166 }
  167 
  168 #ifndef _SYS_SYSPROTO_H_
  169 struct setpriority_args {
  170         int     which;
  171         int     who;
  172         int     prio;
  173 };
  174 #endif
  175 int
  176 sys_setpriority(struct thread *td, struct setpriority_args *uap)
  177 {
  178         struct proc *curp, *p;
  179         struct pgrp *pg;
  180         int found = 0, error = 0;
  181 
  182         curp = td->td_proc;
  183         switch (uap->which) {
  184         case PRIO_PROCESS:
  185                 if (uap->who == 0) {
  186                         PROC_LOCK(curp);
  187                         error = donice(td, curp, uap->prio);
  188                         PROC_UNLOCK(curp);
  189                 } else {
  190                         p = pfind(uap->who);
  191                         if (p == NULL)
  192                                 break;
  193                         error = p_cansee(td, p);
  194                         if (error == 0)
  195                                 error = donice(td, p, uap->prio);
  196                         PROC_UNLOCK(p);
  197                 }
  198                 found++;
  199                 break;
  200 
  201         case PRIO_PGRP:
  202                 sx_slock(&proctree_lock);
  203                 if (uap->who == 0) {
  204                         pg = curp->p_pgrp;
  205                         PGRP_LOCK(pg);
  206                 } else {
  207                         pg = pgfind(uap->who);
  208                         if (pg == NULL) {
  209                                 sx_sunlock(&proctree_lock);
  210                                 break;
  211                         }
  212                 }
  213                 sx_sunlock(&proctree_lock);
  214                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
  215                         PROC_LOCK(p);
  216                         if (p->p_state == PRS_NORMAL &&
  217                             p_cansee(td, p) == 0) {
  218                                 error = donice(td, p, uap->prio);
  219                                 found++;
  220                         }
  221                         PROC_UNLOCK(p);
  222                 }
  223                 PGRP_UNLOCK(pg);
  224                 break;
  225 
  226         case PRIO_USER:
  227                 if (uap->who == 0)
  228                         uap->who = td->td_ucred->cr_uid;
  229                 sx_slock(&allproc_lock);
  230                 FOREACH_PROC_IN_SYSTEM(p) {
  231                         PROC_LOCK(p);
  232                         if (p->p_state == PRS_NORMAL &&
  233                             p->p_ucred->cr_uid == uap->who &&
  234                             p_cansee(td, p) == 0) {
  235                                 error = donice(td, p, uap->prio);
  236                                 found++;
  237                         }
  238                         PROC_UNLOCK(p);
  239                 }
  240                 sx_sunlock(&allproc_lock);
  241                 break;
  242 
  243         default:
  244                 error = EINVAL;
  245                 break;
  246         }
  247         if (found == 0 && error == 0)
  248                 error = ESRCH;
  249         return (error);
  250 }
  251 
  252 /*
  253  * Set "nice" for a (whole) process.
  254  */
  255 static int
  256 donice(struct thread *td, struct proc *p, int n)
  257 {
  258         int error;
  259 
  260         PROC_LOCK_ASSERT(p, MA_OWNED);
  261         if ((error = p_cansched(td, p)))
  262                 return (error);
  263         if (n > PRIO_MAX)
  264                 n = PRIO_MAX;
  265         if (n < PRIO_MIN)
  266                 n = PRIO_MIN;
  267         if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
  268                 return (EACCES);
  269         sched_nice(p, n);
  270         return (0);
  271 }
  272 
  273 static int unprivileged_idprio;
  274 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW,
  275     &unprivileged_idprio, 0, "Allow non-root users to set an idle priority");
  276 
  277 /*
  278  * Set realtime priority for LWP.
  279  */
  280 #ifndef _SYS_SYSPROTO_H_
  281 struct rtprio_thread_args {
  282         int             function;
  283         lwpid_t         lwpid;
  284         struct rtprio   *rtp;
  285 };
  286 #endif
  287 int
  288 sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
  289 {
  290         struct proc *p;
  291         struct rtprio rtp;
  292         struct thread *td1;
  293         int cierror, error;
  294 
  295         /* Perform copyin before acquiring locks if needed. */
  296         if (uap->function == RTP_SET)
  297                 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
  298         else
  299                 cierror = 0;
  300 
  301         if (uap->lwpid == 0 || uap->lwpid == td->td_tid) {
  302                 p = td->td_proc;
  303                 td1 = td;
  304                 PROC_LOCK(p);
  305         } else {
  306                 /* Only look up thread in current process */
  307                 td1 = tdfind(uap->lwpid, curproc->p_pid);
  308                 if (td1 == NULL)
  309                         return (ESRCH);
  310                 p = td1->td_proc;
  311         }
  312 
  313         switch (uap->function) {
  314         case RTP_LOOKUP:
  315                 if ((error = p_cansee(td, p)))
  316                         break;
  317                 pri_to_rtp(td1, &rtp);
  318                 PROC_UNLOCK(p);
  319                 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
  320         case RTP_SET:
  321                 if ((error = p_cansched(td, p)) || (error = cierror))
  322                         break;
  323 
  324                 /* Disallow setting rtprio in most cases if not superuser. */
  325 
  326                 /*
  327                  * Realtime priority has to be restricted for reasons which
  328                  * should be obvious.  However, for idleprio processes, there is
  329                  * a potential for system deadlock if an idleprio process gains
  330                  * a lock on a resource that other processes need (and the
  331                  * idleprio process can't run due to a CPU-bound normal
  332                  * process).  Fix me!  XXX
  333                  *
  334                  * This problem is not only related to idleprio process.
  335                  * A user level program can obtain a file lock and hold it
  336                  * indefinitely.  Additionally, without idleprio processes it is
  337                  * still conceivable that a program with low priority will never
  338                  * get to run.  In short, allowing this feature might make it
  339                  * easier to lock a resource indefinitely, but it is not the
  340                  * only thing that makes it possible.
  341                  */
  342                 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
  343                     (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
  344                     unprivileged_idprio == 0)) {
  345                         error = priv_check(td, PRIV_SCHED_RTPRIO);
  346                         if (error)
  347                                 break;
  348                 }
  349                 error = rtp_to_pri(&rtp, td1);
  350                 break;
  351         default:
  352                 error = EINVAL;
  353                 break;
  354         }
  355         PROC_UNLOCK(p);
  356         return (error);
  357 }
  358 
  359 /*
  360  * Set realtime priority.
  361  */
  362 #ifndef _SYS_SYSPROTO_H_
  363 struct rtprio_args {
  364         int             function;
  365         pid_t           pid;
  366         struct rtprio   *rtp;
  367 };
  368 #endif
  369 int
  370 sys_rtprio(struct thread *td, struct rtprio_args *uap)
  371 {
  372         struct proc *p;
  373         struct thread *tdp;
  374         struct rtprio rtp;
  375         int cierror, error;
  376 
  377         /* Perform copyin before acquiring locks if needed. */
  378         if (uap->function == RTP_SET)
  379                 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
  380         else
  381                 cierror = 0;
  382 
  383         if (uap->pid == 0) {
  384                 p = td->td_proc;
  385                 PROC_LOCK(p);
  386         } else {
  387                 p = pfind(uap->pid);
  388                 if (p == NULL)
  389                         return (ESRCH);
  390         }
  391 
  392         switch (uap->function) {
  393         case RTP_LOOKUP:
  394                 if ((error = p_cansee(td, p)))
  395                         break;
  396                 /*
  397                  * Return OUR priority if no pid specified,
  398                  * or if one is, report the highest priority
  399                  * in the process.  There isn't much more you can do as
  400                  * there is only room to return a single priority.
  401                  * Note: specifying our own pid is not the same
  402                  * as leaving it zero.
  403                  */
  404                 if (uap->pid == 0) {
  405                         pri_to_rtp(td, &rtp);
  406                 } else {
  407                         struct rtprio rtp2;
  408 
  409                         rtp.type = RTP_PRIO_IDLE;
  410                         rtp.prio = RTP_PRIO_MAX;
  411                         FOREACH_THREAD_IN_PROC(p, tdp) {
  412                                 pri_to_rtp(tdp, &rtp2);
  413                                 if (rtp2.type <  rtp.type ||
  414                                     (rtp2.type == rtp.type &&
  415                                     rtp2.prio < rtp.prio)) {
  416                                         rtp.type = rtp2.type;
  417                                         rtp.prio = rtp2.prio;
  418                                 }
  419                         }
  420                 }
  421                 PROC_UNLOCK(p);
  422                 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
  423         case RTP_SET:
  424                 if ((error = p_cansched(td, p)) || (error = cierror))
  425                         break;
  426 
  427                 /*
  428                  * Disallow setting rtprio in most cases if not superuser.
  429                  * See the comment in sys_rtprio_thread about idprio
  430                  * threads holding a lock.
  431                  */
  432                 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
  433                     (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
  434                     !unprivileged_idprio)) {
  435                         error = priv_check(td, PRIV_SCHED_RTPRIO);
  436                         if (error)
  437                                 break;
  438                 }
  439 
  440                 /*
  441                  * If we are setting our own priority, set just our
  442                  * thread but if we are doing another process,
  443                  * do all the threads on that process. If we
  444                  * specify our own pid we do the latter.
  445                  */
  446                 if (uap->pid == 0) {
  447                         error = rtp_to_pri(&rtp, td);
  448                 } else {
  449                         FOREACH_THREAD_IN_PROC(p, td) {
  450                                 if ((error = rtp_to_pri(&rtp, td)) != 0)
  451                                         break;
  452                         }
  453                 }
  454                 break;
  455         default:
  456                 error = EINVAL;
  457                 break;
  458         }
  459         PROC_UNLOCK(p);
  460         return (error);
  461 }
  462 
  463 int
  464 rtp_to_pri(struct rtprio *rtp, struct thread *td)
  465 {
  466         u_char  newpri, oldclass, oldpri;
  467 
  468         switch (RTP_PRIO_BASE(rtp->type)) {
  469         case RTP_PRIO_REALTIME:
  470                 if (rtp->prio > RTP_PRIO_MAX)
  471                         return (EINVAL);
  472                 newpri = PRI_MIN_REALTIME + rtp->prio;
  473                 break;
  474         case RTP_PRIO_NORMAL:
  475                 if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE))
  476                         return (EINVAL);
  477                 newpri = PRI_MIN_TIMESHARE + rtp->prio;
  478                 break;
  479         case RTP_PRIO_IDLE:
  480                 if (rtp->prio > RTP_PRIO_MAX)
  481                         return (EINVAL);
  482                 newpri = PRI_MIN_IDLE + rtp->prio;
  483                 break;
  484         default:
  485                 return (EINVAL);
  486         }
  487 
  488         thread_lock(td);
  489         oldclass = td->td_pri_class;
  490         sched_class(td, rtp->type);     /* XXX fix */
  491         oldpri = td->td_user_pri;
  492         sched_user_prio(td, newpri);
  493         if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL ||
  494             td->td_pri_class != RTP_PRIO_NORMAL))
  495                 sched_prio(td, td->td_user_pri);
  496         if (TD_ON_UPILOCK(td) && oldpri != newpri) {
  497                 critical_enter();
  498                 thread_unlock(td);
  499                 umtx_pi_adjust(td, oldpri);
  500                 critical_exit();
  501         } else
  502                 thread_unlock(td);
  503         return (0);
  504 }
  505 
  506 void
  507 pri_to_rtp(struct thread *td, struct rtprio *rtp)
  508 {
  509 
  510         thread_lock(td);
  511         switch (PRI_BASE(td->td_pri_class)) {
  512         case PRI_REALTIME:
  513                 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
  514                 break;
  515         case PRI_TIMESHARE:
  516                 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE;
  517                 break;
  518         case PRI_IDLE:
  519                 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE;
  520                 break;
  521         default:
  522                 break;
  523         }
  524         rtp->type = td->td_pri_class;
  525         thread_unlock(td);
  526 }
  527 
  528 #if defined(COMPAT_43)
  529 #ifndef _SYS_SYSPROTO_H_
  530 struct osetrlimit_args {
  531         u_int   which;
  532         struct  orlimit *rlp;
  533 };
  534 #endif
  535 int
  536 osetrlimit(struct thread *td, struct osetrlimit_args *uap)
  537 {
  538         struct orlimit olim;
  539         struct rlimit lim;
  540         int error;
  541 
  542         if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
  543                 return (error);
  544         lim.rlim_cur = olim.rlim_cur;
  545         lim.rlim_max = olim.rlim_max;
  546         error = kern_setrlimit(td, uap->which, &lim);
  547         return (error);
  548 }
  549 
  550 #ifndef _SYS_SYSPROTO_H_
  551 struct ogetrlimit_args {
  552         u_int   which;
  553         struct  orlimit *rlp;
  554 };
  555 #endif
  556 int
  557 ogetrlimit(struct thread *td, struct ogetrlimit_args *uap)
  558 {
  559         struct orlimit olim;
  560         struct rlimit rl;
  561         int error;
  562 
  563         if (uap->which >= RLIM_NLIMITS)
  564                 return (EINVAL);
  565         lim_rlimit(td, uap->which, &rl);
  566 
  567         /*
  568          * XXX would be more correct to convert only RLIM_INFINITY to the
  569          * old RLIM_INFINITY and fail with EOVERFLOW for other larger
  570          * values.  Most 64->32 and 32->16 conversions, including not
  571          * unimportant ones of uids are even more broken than what we
  572          * do here (they blindly truncate).  We don't do this correctly
  573          * here since we have little experience with EOVERFLOW yet.
  574          * Elsewhere, getuid() can't fail...
  575          */
  576         olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
  577         olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
  578         error = copyout(&olim, uap->rlp, sizeof(olim));
  579         return (error);
  580 }
  581 #endif /* COMPAT_43 */
  582 
  583 #ifndef _SYS_SYSPROTO_H_
  584 struct __setrlimit_args {
  585         u_int   which;
  586         struct  rlimit *rlp;
  587 };
  588 #endif
  589 int
  590 sys_setrlimit(struct thread *td, struct __setrlimit_args *uap)
  591 {
  592         struct rlimit alim;
  593         int error;
  594 
  595         if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
  596                 return (error);
  597         error = kern_setrlimit(td, uap->which, &alim);
  598         return (error);
  599 }
  600 
  601 static void
  602 lim_cb(void *arg)
  603 {
  604         struct rlimit rlim;
  605         struct thread *td;
  606         struct proc *p;
  607 
  608         p = arg;
  609         PROC_LOCK_ASSERT(p, MA_OWNED);
  610         /*
  611          * Check if the process exceeds its cpu resource allocation.  If
  612          * it reaches the max, arrange to kill the process in ast().
  613          */
  614         if (p->p_cpulimit == RLIM_INFINITY)
  615                 return;
  616         PROC_STATLOCK(p);
  617         FOREACH_THREAD_IN_PROC(p, td) {
  618                 ruxagg(p, td);
  619         }
  620         PROC_STATUNLOCK(p);
  621         if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
  622                 lim_rlimit_proc(p, RLIMIT_CPU, &rlim);
  623                 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
  624                         killproc(p, "exceeded maximum CPU limit");
  625                 } else {
  626                         if (p->p_cpulimit < rlim.rlim_max)
  627                                 p->p_cpulimit += 5;
  628                         kern_psignal(p, SIGXCPU);
  629                 }
  630         }
  631         if ((p->p_flag & P_WEXIT) == 0)
  632                 callout_reset_sbt(&p->p_limco, SBT_1S, 0,
  633                     lim_cb, p, C_PREL(1));
  634 }
  635 
  636 int
  637 kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp)
  638 {
  639 
  640         return (kern_proc_setrlimit(td, td->td_proc, which, limp));
  641 }
  642 
  643 int
  644 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
  645     struct rlimit *limp)
  646 {
  647         struct plimit *newlim, *oldlim;
  648         struct rlimit *alimp;
  649         struct rlimit oldssiz;
  650         int error;
  651 
  652         if (which >= RLIM_NLIMITS)
  653                 return (EINVAL);
  654 
  655         /*
  656          * Preserve historical bugs by treating negative limits as unsigned.
  657          */
  658         if (limp->rlim_cur < 0)
  659                 limp->rlim_cur = RLIM_INFINITY;
  660         if (limp->rlim_max < 0)
  661                 limp->rlim_max = RLIM_INFINITY;
  662 
  663         oldssiz.rlim_cur = 0;
  664         newlim = lim_alloc();
  665         PROC_LOCK(p);
  666         oldlim = p->p_limit;
  667         alimp = &oldlim->pl_rlimit[which];
  668         if (limp->rlim_cur > alimp->rlim_max ||
  669             limp->rlim_max > alimp->rlim_max)
  670                 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
  671                         PROC_UNLOCK(p);
  672                         lim_free(newlim);
  673                         return (error);
  674                 }
  675         if (limp->rlim_cur > limp->rlim_max)
  676                 limp->rlim_cur = limp->rlim_max;
  677         lim_copy(newlim, oldlim);
  678         alimp = &newlim->pl_rlimit[which];
  679 
  680         switch (which) {
  681 
  682         case RLIMIT_CPU:
  683                 if (limp->rlim_cur != RLIM_INFINITY &&
  684                     p->p_cpulimit == RLIM_INFINITY)
  685                         callout_reset_sbt(&p->p_limco, SBT_1S, 0,
  686                             lim_cb, p, C_PREL(1));
  687                 p->p_cpulimit = limp->rlim_cur;
  688                 break;
  689         case RLIMIT_DATA:
  690                 if (limp->rlim_cur > maxdsiz)
  691                         limp->rlim_cur = maxdsiz;
  692                 if (limp->rlim_max > maxdsiz)
  693                         limp->rlim_max = maxdsiz;
  694                 break;
  695 
  696         case RLIMIT_STACK:
  697                 if (limp->rlim_cur > maxssiz)
  698                         limp->rlim_cur = maxssiz;
  699                 if (limp->rlim_max > maxssiz)
  700                         limp->rlim_max = maxssiz;
  701                 oldssiz = *alimp;
  702                 if (p->p_sysent->sv_fixlimit != NULL)
  703                         p->p_sysent->sv_fixlimit(&oldssiz,
  704                             RLIMIT_STACK);
  705                 break;
  706 
  707         case RLIMIT_NOFILE:
  708                 if (limp->rlim_cur > maxfilesperproc)
  709                         limp->rlim_cur = maxfilesperproc;
  710                 if (limp->rlim_max > maxfilesperproc)
  711                         limp->rlim_max = maxfilesperproc;
  712                 break;
  713 
  714         case RLIMIT_NPROC:
  715                 if (limp->rlim_cur > maxprocperuid)
  716                         limp->rlim_cur = maxprocperuid;
  717                 if (limp->rlim_max > maxprocperuid)
  718                         limp->rlim_max = maxprocperuid;
  719                 if (limp->rlim_cur < 1)
  720                         limp->rlim_cur = 1;
  721                 if (limp->rlim_max < 1)
  722                         limp->rlim_max = 1;
  723                 break;
  724         }
  725         if (p->p_sysent->sv_fixlimit != NULL)
  726                 p->p_sysent->sv_fixlimit(limp, which);
  727         *alimp = *limp;
  728         p->p_limit = newlim;
  729         PROC_UPDATE_COW(p);
  730         PROC_UNLOCK(p);
  731         lim_free(oldlim);
  732 
  733         if (which == RLIMIT_STACK &&
  734             /*
  735              * Skip calls from exec_new_vmspace(), done when stack is
  736              * not mapped yet.
  737              */
  738             (td != curthread || (p->p_flag & P_INEXEC) == 0)) {
  739                 /*
  740                  * Stack is allocated to the max at exec time with only
  741                  * "rlim_cur" bytes accessible.  If stack limit is going
  742                  * up make more accessible, if going down make inaccessible.
  743                  */
  744                 if (limp->rlim_cur != oldssiz.rlim_cur) {
  745                         vm_offset_t addr;
  746                         vm_size_t size;
  747                         vm_prot_t prot;
  748 
  749                         if (limp->rlim_cur > oldssiz.rlim_cur) {
  750                                 prot = p->p_sysent->sv_stackprot;
  751                                 size = limp->rlim_cur - oldssiz.rlim_cur;
  752                                 addr = p->p_sysent->sv_usrstack -
  753                                     limp->rlim_cur;
  754                         } else {
  755                                 prot = VM_PROT_NONE;
  756                                 size = oldssiz.rlim_cur - limp->rlim_cur;
  757                                 addr = p->p_sysent->sv_usrstack -
  758                                     oldssiz.rlim_cur;
  759                         }
  760                         addr = trunc_page(addr);
  761                         size = round_page(size);
  762                         (void)vm_map_protect(&p->p_vmspace->vm_map,
  763                             addr, addr + size, prot, FALSE);
  764                 }
  765         }
  766 
  767         return (0);
  768 }
  769 
  770 #ifndef _SYS_SYSPROTO_H_
  771 struct __getrlimit_args {
  772         u_int   which;
  773         struct  rlimit *rlp;
  774 };
  775 #endif
  776 /* ARGSUSED */
  777 int
  778 sys_getrlimit(struct thread *td, struct __getrlimit_args *uap)
  779 {
  780         struct rlimit rlim;
  781         int error;
  782 
  783         if (uap->which >= RLIM_NLIMITS)
  784                 return (EINVAL);
  785         lim_rlimit(td, uap->which, &rlim);
  786         error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
  787         return (error);
  788 }
  789 
  790 /*
  791  * Transform the running time and tick information for children of proc p
  792  * into user and system time usage.
  793  */
  794 void
  795 calccru(struct proc *p, struct timeval *up, struct timeval *sp)
  796 {
  797 
  798         PROC_LOCK_ASSERT(p, MA_OWNED);
  799         calcru1(p, &p->p_crux, up, sp);
  800 }
  801 
  802 /*
  803  * Transform the running time and tick information in proc p into user
  804  * and system time usage.  If appropriate, include the current time slice
  805  * on this CPU.
  806  */
  807 void
  808 calcru(struct proc *p, struct timeval *up, struct timeval *sp)
  809 {
  810         struct thread *td;
  811         uint64_t runtime, u;
  812 
  813         PROC_LOCK_ASSERT(p, MA_OWNED);
  814         PROC_STATLOCK_ASSERT(p, MA_OWNED);
  815         /*
  816          * If we are getting stats for the current process, then add in the
  817          * stats that this thread has accumulated in its current time slice.
  818          * We reset the thread and CPU state as if we had performed a context
  819          * switch right here.
  820          */
  821         td = curthread;
  822         if (td->td_proc == p) {
  823                 u = cpu_ticks();
  824                 runtime = u - PCPU_GET(switchtime);
  825                 td->td_runtime += runtime;
  826                 td->td_incruntime += runtime;
  827                 PCPU_SET(switchtime, u);
  828         }
  829         /* Make sure the per-thread stats are current. */
  830         FOREACH_THREAD_IN_PROC(p, td) {
  831                 if (td->td_incruntime == 0)
  832                         continue;
  833                 ruxagg(p, td);
  834         }
  835         calcru1(p, &p->p_rux, up, sp);
  836 }
  837 
  838 /* Collect resource usage for a single thread. */
  839 void
  840 rufetchtd(struct thread *td, struct rusage *ru)
  841 {
  842         struct proc *p;
  843         uint64_t runtime, u;
  844 
  845         p = td->td_proc;
  846         PROC_STATLOCK_ASSERT(p, MA_OWNED);
  847         THREAD_LOCK_ASSERT(td, MA_OWNED);
  848         /*
  849          * If we are getting stats for the current thread, then add in the
  850          * stats that this thread has accumulated in its current time slice.
  851          * We reset the thread and CPU state as if we had performed a context
  852          * switch right here.
  853          */
  854         if (td == curthread) {
  855                 u = cpu_ticks();
  856                 runtime = u - PCPU_GET(switchtime);
  857                 td->td_runtime += runtime;
  858                 td->td_incruntime += runtime;
  859                 PCPU_SET(switchtime, u);
  860         }
  861         ruxagg(p, td);
  862         *ru = td->td_ru;
  863         calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
  864 }
  865 
  866 /* XXX: the MI version is too slow to use: */
  867 #ifndef __HAVE_INLINE_FLSLL
  868 #define flsll(x)        (fls((x) >> 32) != 0 ? fls((x) >> 32) + 32 : fls(x))
  869 #endif
  870 
  871 static uint64_t
  872 mul64_by_fraction(uint64_t a, uint64_t b, uint64_t c)
  873 {
  874         uint64_t acc, bh, bl;
  875         int i, s, sa, sb;
  876 
  877         /*
  878          * Calculate (a * b) / c accurately enough without overflowing.  c
  879          * must be nonzero, and its top bit must be 0.  a or b must be
  880          * <= c, and the implementation is tuned for b <= c.
  881          *
  882          * The comments about times are for use in calcru1() with units of
  883          * microseconds for 'a' and stathz ticks at 128 Hz for b and c.
  884          *
  885          * Let n be the number of top zero bits in c.  Each iteration
  886          * either returns, or reduces b by right shifting it by at least n.
  887          * The number of iterations is at most 1 + 64 / n, and the error is
  888          * at most the number of iterations.
  889          *
  890          * It is very unusual to need even 2 iterations.  Previous
  891          * implementations overflowed essentially by returning early in the
  892          * first iteration, with n = 38 giving overflow at 105+ hours and
  893          * n = 32 giving overlow at at 388+ days despite a more careful
  894          * calculation.  388 days is a reasonable uptime, and the calculation
  895          * needs to work for the uptime times the number of CPUs since 'a'
  896          * is per-process.
  897          */
  898         if (a >= (uint64_t)1 << 63)
  899                 return (0);             /* Unsupported arg -- can't happen. */
  900         acc = 0;
  901         for (i = 0; i < 128; i++) {
  902                 sa = flsll(a);
  903                 sb = flsll(b);
  904                 if (sa + sb <= 64)
  905                         /* Up to 105 hours on first iteration. */
  906                         return (acc + (a * b) / c);
  907                 if (a >= c) {
  908                         /*
  909                          * This reduction is based on a = q * c + r, with the
  910                          * remainder r < c.  'a' may be large to start, and
  911                          * moving bits from b into 'a' at the end of the loop
  912                          * sets the top bit of 'a', so the reduction makes
  913                          * significant progress.
  914                          */
  915                         acc += (a / c) * b;
  916                         a %= c;
  917                         sa = flsll(a);
  918                         if (sa + sb <= 64)
  919                                 /* Up to 388 days on first iteration. */
  920                                 return (acc + (a * b) / c);
  921                 }
  922 
  923                 /*
  924                  * This step writes a * b as a * ((bh << s) + bl) =
  925                  * a * (bh << s) + a * bl = (a << s) * bh + a * bl.  The 2
  926                  * additive terms are handled separately.  Splitting in
  927                  * this way is linear except for rounding errors.
  928                  *
  929                  * s = 64 - sa is the maximum such that a << s fits in 64
  930                  * bits.  Since a < c and c has at least 1 zero top bit,
  931                  * sa < 64 and s > 0.  Thus this step makes progress by
  932                  * reducing b (it increases 'a', but taking remainders on
  933                  * the next iteration completes the reduction).
  934                  *
  935                  * Finally, the choice for s is just what is needed to keep
  936                  * a * bl from overflowing, so we don't need complications
  937                  * like a recursive call mul64_by_fraction(a, bl, c) to
  938                  * handle the second additive term.
  939                  */
  940                 s = 64 - sa;
  941                 bh = b >> s;
  942                 bl = b - (bh << s);
  943                 acc += (a * bl) / c;
  944                 a <<= s;
  945                 b = bh;
  946         }
  947         return (0);             /* Algorithm failure -- can't happen. */
  948 }
  949 
  950 static void
  951 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
  952     struct timeval *sp)
  953 {
  954         /* {user, system, interrupt, total} {ticks, usec}: */
  955         uint64_t ut, uu, st, su, it, tt, tu;
  956 
  957         ut = ruxp->rux_uticks;
  958         st = ruxp->rux_sticks;
  959         it = ruxp->rux_iticks;
  960         tt = ut + st + it;
  961         if (tt == 0) {
  962                 /* Avoid divide by zero */
  963                 st = 1;
  964                 tt = 1;
  965         }
  966         tu = cputick2usec(ruxp->rux_runtime);
  967         if ((int64_t)tu < 0) {
  968                 /* XXX: this should be an assert /phk */
  969                 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
  970                     (intmax_t)tu, p->p_pid, p->p_comm);
  971                 tu = ruxp->rux_tu;
  972         }
  973 
  974         /* Subdivide tu.  Avoid overflow in the multiplications. */
  975         if (__predict_true(tu <= ((uint64_t)1 << 38) && tt <= (1 << 26))) {
  976                 /* Up to 76 hours when stathz is 128. */
  977                 uu = (tu * ut) / tt;
  978                 su = (tu * st) / tt;
  979         } else {
  980                 uu = mul64_by_fraction(tu, ut, tt);
  981                 su = mul64_by_fraction(tu, st, tt);
  982         }
  983 
  984         if (tu >= ruxp->rux_tu) {
  985                 /*
  986                  * The normal case, time increased.
  987                  * Enforce monotonicity of bucketed numbers.
  988                  */
  989                 if (uu < ruxp->rux_uu)
  990                         uu = ruxp->rux_uu;
  991                 if (su < ruxp->rux_su)
  992                         su = ruxp->rux_su;
  993         } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
  994                 /*
  995                  * When we calibrate the cputicker, it is not uncommon to
  996                  * see the presumably fixed frequency increase slightly over
  997                  * time as a result of thermal stabilization and NTP
  998                  * discipline (of the reference clock).  We therefore ignore
  999                  * a bit of backwards slop because we  expect to catch up
 1000                  * shortly.  We use a 3 microsecond limit to catch low
 1001                  * counts and a 1% limit for high counts.
 1002                  */
 1003                 uu = ruxp->rux_uu;
 1004                 su = ruxp->rux_su;
 1005                 tu = ruxp->rux_tu;
 1006         } else { /* tu < ruxp->rux_tu */
 1007                 /*
 1008                  * What happened here was likely that a laptop, which ran at
 1009                  * a reduced clock frequency at boot, kicked into high gear.
 1010                  * The wisdom of spamming this message in that case is
 1011                  * dubious, but it might also be indicative of something
 1012                  * serious, so lets keep it and hope laptops can be made
 1013                  * more truthful about their CPU speed via ACPI.
 1014                  */
 1015                 printf("calcru: runtime went backwards from %ju usec "
 1016                     "to %ju usec for pid %d (%s)\n",
 1017                     (uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
 1018                     p->p_pid, p->p_comm);
 1019         }
 1020 
 1021         ruxp->rux_uu = uu;
 1022         ruxp->rux_su = su;
 1023         ruxp->rux_tu = tu;
 1024 
 1025         up->tv_sec = uu / 1000000;
 1026         up->tv_usec = uu % 1000000;
 1027         sp->tv_sec = su / 1000000;
 1028         sp->tv_usec = su % 1000000;
 1029 }
 1030 
 1031 #ifndef _SYS_SYSPROTO_H_
 1032 struct getrusage_args {
 1033         int     who;
 1034         struct  rusage *rusage;
 1035 };
 1036 #endif
 1037 int
 1038 sys_getrusage(struct thread *td, struct getrusage_args *uap)
 1039 {
 1040         struct rusage ru;
 1041         int error;
 1042 
 1043         error = kern_getrusage(td, uap->who, &ru);
 1044         if (error == 0)
 1045                 error = copyout(&ru, uap->rusage, sizeof(struct rusage));
 1046         return (error);
 1047 }
 1048 
 1049 int
 1050 kern_getrusage(struct thread *td, int who, struct rusage *rup)
 1051 {
 1052         struct proc *p;
 1053         int error;
 1054 
 1055         error = 0;
 1056         p = td->td_proc;
 1057         PROC_LOCK(p);
 1058         switch (who) {
 1059         case RUSAGE_SELF:
 1060                 rufetchcalc(p, rup, &rup->ru_utime,
 1061                     &rup->ru_stime);
 1062                 break;
 1063 
 1064         case RUSAGE_CHILDREN:
 1065                 *rup = p->p_stats->p_cru;
 1066                 calccru(p, &rup->ru_utime, &rup->ru_stime);
 1067                 break;
 1068 
 1069         case RUSAGE_THREAD:
 1070                 PROC_STATLOCK(p);
 1071                 thread_lock(td);
 1072                 rufetchtd(td, rup);
 1073                 thread_unlock(td);
 1074                 PROC_STATUNLOCK(p);
 1075                 break;
 1076 
 1077         default:
 1078                 error = EINVAL;
 1079         }
 1080         PROC_UNLOCK(p);
 1081         return (error);
 1082 }
 1083 
 1084 void
 1085 rucollect(struct rusage *ru, struct rusage *ru2)
 1086 {
 1087         long *ip, *ip2;
 1088         int i;
 1089 
 1090         if (ru->ru_maxrss < ru2->ru_maxrss)
 1091                 ru->ru_maxrss = ru2->ru_maxrss;
 1092         ip = &ru->ru_first;
 1093         ip2 = &ru2->ru_first;
 1094         for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
 1095                 *ip++ += *ip2++;
 1096 }
 1097 
 1098 void
 1099 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
 1100     struct rusage_ext *rux2)
 1101 {
 1102 
 1103         rux->rux_runtime += rux2->rux_runtime;
 1104         rux->rux_uticks += rux2->rux_uticks;
 1105         rux->rux_sticks += rux2->rux_sticks;
 1106         rux->rux_iticks += rux2->rux_iticks;
 1107         rux->rux_uu += rux2->rux_uu;
 1108         rux->rux_su += rux2->rux_su;
 1109         rux->rux_tu += rux2->rux_tu;
 1110         rucollect(ru, ru2);
 1111 }
 1112 
 1113 /*
 1114  * Aggregate tick counts into the proc's rusage_ext.
 1115  */
 1116 static void
 1117 ruxagg_locked(struct rusage_ext *rux, struct thread *td)
 1118 {
 1119 
 1120         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1121         PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
 1122         rux->rux_runtime += td->td_incruntime;
 1123         rux->rux_uticks += td->td_uticks;
 1124         rux->rux_sticks += td->td_sticks;
 1125         rux->rux_iticks += td->td_iticks;
 1126 }
 1127 
 1128 void
 1129 ruxagg(struct proc *p, struct thread *td)
 1130 {
 1131 
 1132         thread_lock(td);
 1133         ruxagg_locked(&p->p_rux, td);
 1134         ruxagg_locked(&td->td_rux, td);
 1135         td->td_incruntime = 0;
 1136         td->td_uticks = 0;
 1137         td->td_iticks = 0;
 1138         td->td_sticks = 0;
 1139         thread_unlock(td);
 1140 }
 1141 
 1142 /*
 1143  * Update the rusage_ext structure and fetch a valid aggregate rusage
 1144  * for proc p if storage for one is supplied.
 1145  */
 1146 void
 1147 rufetch(struct proc *p, struct rusage *ru)
 1148 {
 1149         struct thread *td;
 1150 
 1151         PROC_STATLOCK_ASSERT(p, MA_OWNED);
 1152 
 1153         *ru = p->p_ru;
 1154         if (p->p_numthreads > 0)  {
 1155                 FOREACH_THREAD_IN_PROC(p, td) {
 1156                         ruxagg(p, td);
 1157                         rucollect(ru, &td->td_ru);
 1158                 }
 1159         }
 1160 }
 1161 
 1162 /*
 1163  * Atomically perform a rufetch and a calcru together.
 1164  * Consumers, can safely assume the calcru is executed only once
 1165  * rufetch is completed.
 1166  */
 1167 void
 1168 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
 1169     struct timeval *sp)
 1170 {
 1171 
 1172         PROC_STATLOCK(p);
 1173         rufetch(p, ru);
 1174         calcru(p, up, sp);
 1175         PROC_STATUNLOCK(p);
 1176 }
 1177 
 1178 /*
 1179  * Allocate a new resource limits structure and initialize its
 1180  * reference count and mutex pointer.
 1181  */
 1182 struct plimit *
 1183 lim_alloc()
 1184 {
 1185         struct plimit *limp;
 1186 
 1187         limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
 1188         refcount_init(&limp->pl_refcnt, 1);
 1189         return (limp);
 1190 }
 1191 
 1192 struct plimit *
 1193 lim_hold(struct plimit *limp)
 1194 {
 1195 
 1196         refcount_acquire(&limp->pl_refcnt);
 1197         return (limp);
 1198 }
 1199 
 1200 void
 1201 lim_fork(struct proc *p1, struct proc *p2)
 1202 {
 1203 
 1204         PROC_LOCK_ASSERT(p1, MA_OWNED);
 1205         PROC_LOCK_ASSERT(p2, MA_OWNED);
 1206 
 1207         p2->p_limit = lim_hold(p1->p_limit);
 1208         callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
 1209         if (p1->p_cpulimit != RLIM_INFINITY)
 1210                 callout_reset_sbt(&p2->p_limco, SBT_1S, 0,
 1211                     lim_cb, p2, C_PREL(1));
 1212 }
 1213 
 1214 void
 1215 lim_free(struct plimit *limp)
 1216 {
 1217 
 1218         if (refcount_release(&limp->pl_refcnt))
 1219                 free((void *)limp, M_PLIMIT);
 1220 }
 1221 
 1222 /*
 1223  * Make a copy of the plimit structure.
 1224  * We share these structures copy-on-write after fork.
 1225  */
 1226 void
 1227 lim_copy(struct plimit *dst, struct plimit *src)
 1228 {
 1229 
 1230         KASSERT(dst->pl_refcnt <= 1, ("lim_copy to shared limit"));
 1231         bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
 1232 }
 1233 
 1234 /*
 1235  * Return the hard limit for a particular system resource.  The
 1236  * which parameter specifies the index into the rlimit array.
 1237  */
 1238 rlim_t
 1239 lim_max(struct thread *td, int which)
 1240 {
 1241         struct rlimit rl;
 1242 
 1243         lim_rlimit(td, which, &rl);
 1244         return (rl.rlim_max);
 1245 }
 1246 
 1247 rlim_t
 1248 lim_max_proc(struct proc *p, int which)
 1249 {
 1250         struct rlimit rl;
 1251 
 1252         lim_rlimit_proc(p, which, &rl);
 1253         return (rl.rlim_max);
 1254 }
 1255 
 1256 /*
 1257  * Return the current (soft) limit for a particular system resource.
 1258  * The which parameter which specifies the index into the rlimit array
 1259  */
 1260 rlim_t
 1261 lim_cur(struct thread *td, int which)
 1262 {
 1263         struct rlimit rl;
 1264 
 1265         lim_rlimit(td, which, &rl);
 1266         return (rl.rlim_cur);
 1267 }
 1268 
 1269 rlim_t
 1270 lim_cur_proc(struct proc *p, int which)
 1271 {
 1272         struct rlimit rl;
 1273 
 1274         lim_rlimit_proc(p, which, &rl);
 1275         return (rl.rlim_cur);
 1276 }
 1277 
 1278 /*
 1279  * Return a copy of the entire rlimit structure for the system limit
 1280  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
 1281  */
 1282 void
 1283 lim_rlimit(struct thread *td, int which, struct rlimit *rlp)
 1284 {
 1285         struct proc *p = td->td_proc;
 1286 
 1287         MPASS(td == curthread);
 1288         KASSERT(which >= 0 && which < RLIM_NLIMITS,
 1289             ("request for invalid resource limit"));
 1290         *rlp = td->td_limit->pl_rlimit[which];
 1291         if (p->p_sysent->sv_fixlimit != NULL)
 1292                 p->p_sysent->sv_fixlimit(rlp, which);
 1293 }
 1294 
 1295 void
 1296 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp)
 1297 {
 1298 
 1299         PROC_LOCK_ASSERT(p, MA_OWNED);
 1300         KASSERT(which >= 0 && which < RLIM_NLIMITS,
 1301             ("request for invalid resource limit"));
 1302         *rlp = p->p_limit->pl_rlimit[which];
 1303         if (p->p_sysent->sv_fixlimit != NULL)
 1304                 p->p_sysent->sv_fixlimit(rlp, which);
 1305 }
 1306 
 1307 void
 1308 uihashinit()
 1309 {
 1310 
 1311         uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
 1312         rw_init(&uihashtbl_lock, "uidinfo hash");
 1313 }
 1314 
 1315 /*
 1316  * Look up a uidinfo struct for the parameter uid.
 1317  * uihashtbl_lock must be locked.
 1318  * Increase refcount on uidinfo struct returned.
 1319  */
 1320 static struct uidinfo *
 1321 uilookup(uid_t uid)
 1322 {
 1323         struct uihashhead *uipp;
 1324         struct uidinfo *uip;
 1325 
 1326         rw_assert(&uihashtbl_lock, RA_LOCKED);
 1327         uipp = UIHASH(uid);
 1328         LIST_FOREACH(uip, uipp, ui_hash)
 1329                 if (uip->ui_uid == uid) {
 1330                         uihold(uip);
 1331                         break;
 1332                 }
 1333 
 1334         return (uip);
 1335 }
 1336 
 1337 /*
 1338  * Find or allocate a struct uidinfo for a particular uid.
 1339  * Returns with uidinfo struct referenced.
 1340  * uifree() should be called on a struct uidinfo when released.
 1341  */
 1342 struct uidinfo *
 1343 uifind(uid_t uid)
 1344 {
 1345         struct uidinfo *new_uip, *uip;
 1346 
 1347         rw_rlock(&uihashtbl_lock);
 1348         uip = uilookup(uid);
 1349         rw_runlock(&uihashtbl_lock);
 1350         if (uip != NULL)
 1351                 return (uip);
 1352 
 1353         new_uip = malloc(sizeof(*new_uip), M_UIDINFO, M_WAITOK | M_ZERO);
 1354         racct_create(&new_uip->ui_racct);
 1355         refcount_init(&new_uip->ui_ref, 1);
 1356         new_uip->ui_uid = uid;
 1357         mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF);
 1358 
 1359         rw_wlock(&uihashtbl_lock);
 1360         /*
 1361          * There's a chance someone created our uidinfo while we
 1362          * were in malloc and not holding the lock, so we have to
 1363          * make sure we don't insert a duplicate uidinfo.
 1364          */
 1365         if ((uip = uilookup(uid)) == NULL) {
 1366                 LIST_INSERT_HEAD(UIHASH(uid), new_uip, ui_hash);
 1367                 rw_wunlock(&uihashtbl_lock);
 1368                 uip = new_uip;
 1369         } else {
 1370                 rw_wunlock(&uihashtbl_lock);
 1371                 racct_destroy(&new_uip->ui_racct);
 1372                 mtx_destroy(&new_uip->ui_vmsize_mtx);
 1373                 free(new_uip, M_UIDINFO);
 1374         }
 1375         return (uip);
 1376 }
 1377 
 1378 /*
 1379  * Place another refcount on a uidinfo struct.
 1380  */
 1381 void
 1382 uihold(struct uidinfo *uip)
 1383 {
 1384 
 1385         refcount_acquire(&uip->ui_ref);
 1386 }
 1387 
 1388 /*-
 1389  * Since uidinfo structs have a long lifetime, we use an
 1390  * opportunistic refcounting scheme to avoid locking the lookup hash
 1391  * for each release.
 1392  *
 1393  * If the refcount hits 0, we need to free the structure,
 1394  * which means we need to lock the hash.
 1395  * Optimal case:
 1396  *   After locking the struct and lowering the refcount, if we find
 1397  *   that we don't need to free, simply unlock and return.
 1398  * Suboptimal case:
 1399  *   If refcount lowering results in need to free, bump the count
 1400  *   back up, lose the lock and acquire the locks in the proper
 1401  *   order to try again.
 1402  */
 1403 void
 1404 uifree(struct uidinfo *uip)
 1405 {
 1406         int old;
 1407 
 1408         /* Prepare for optimal case. */
 1409         old = uip->ui_ref;
 1410         if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1))
 1411                 return;
 1412 
 1413         /* Prepare for suboptimal case. */
 1414         rw_wlock(&uihashtbl_lock);
 1415         if (refcount_release(&uip->ui_ref) == 0) {
 1416                 rw_wunlock(&uihashtbl_lock);
 1417                 return;
 1418         }
 1419 
 1420         racct_destroy(&uip->ui_racct);
 1421         LIST_REMOVE(uip, ui_hash);
 1422         rw_wunlock(&uihashtbl_lock);
 1423 
 1424         if (uip->ui_sbsize != 0)
 1425                 printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
 1426                     uip->ui_uid, uip->ui_sbsize);
 1427         if (uip->ui_proccnt != 0)
 1428                 printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
 1429                     uip->ui_uid, uip->ui_proccnt);
 1430         if (uip->ui_vmsize != 0)
 1431                 printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
 1432                     uip->ui_uid, (unsigned long long)uip->ui_vmsize);
 1433         mtx_destroy(&uip->ui_vmsize_mtx);
 1434         free(uip, M_UIDINFO);
 1435 }
 1436 
 1437 #ifdef RACCT
 1438 void
 1439 ui_racct_foreach(void (*callback)(struct racct *racct,
 1440     void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
 1441     void *arg2, void *arg3)
 1442 {
 1443         struct uidinfo *uip;
 1444         struct uihashhead *uih;
 1445 
 1446         rw_rlock(&uihashtbl_lock);
 1447         if (pre != NULL)
 1448                 (pre)();
 1449         for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
 1450                 LIST_FOREACH(uip, uih, ui_hash) {
 1451                         (callback)(uip->ui_racct, arg2, arg3);
 1452                 }
 1453         }
 1454         if (post != NULL)
 1455                 (post)();
 1456         rw_runlock(&uihashtbl_lock);
 1457 }
 1458 #endif
 1459 
 1460 static inline int
 1461 chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name)
 1462 {
 1463 
 1464         /* Don't allow them to exceed max, but allow subtraction. */
 1465         if (diff > 0 && max != 0) {
 1466                 if (atomic_fetchadd_long(limit, (long)diff) + diff > max) {
 1467                         atomic_subtract_long(limit, (long)diff);
 1468                         return (0);
 1469                 }
 1470         } else {
 1471                 atomic_add_long(limit, (long)diff);
 1472                 if (*limit < 0)
 1473                         printf("negative %s for uid = %d\n", name, uip->ui_uid);
 1474         }
 1475         return (1);
 1476 }
 1477 
 1478 /*
 1479  * Change the count associated with number of processes
 1480  * a given user is using.  When 'max' is 0, don't enforce a limit
 1481  */
 1482 int
 1483 chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
 1484 {
 1485 
 1486         return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt"));
 1487 }
 1488 
 1489 /*
 1490  * Change the total socket buffer size a user has used.
 1491  */
 1492 int
 1493 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max)
 1494 {
 1495         int diff, rv;
 1496 
 1497         diff = to - *hiwat;
 1498         if (diff > 0 && max == 0) {
 1499                 rv = 0;
 1500         } else {
 1501                 rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize");
 1502                 if (rv != 0)
 1503                         *hiwat = to;
 1504         }
 1505         return (rv);
 1506 }
 1507 
 1508 /*
 1509  * Change the count associated with number of pseudo-terminals
 1510  * a given user is using.  When 'max' is 0, don't enforce a limit
 1511  */
 1512 int
 1513 chgptscnt(struct uidinfo *uip, int diff, rlim_t max)
 1514 {
 1515 
 1516         return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt"));
 1517 }
 1518 
 1519 int
 1520 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max)
 1521 {
 1522 
 1523         return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt"));
 1524 }
 1525 
 1526 int
 1527 chgumtxcnt(struct uidinfo *uip, int diff, rlim_t max)
 1528 {
 1529 
 1530         return (chglimit(uip, &uip->ui_umtxcnt, diff, max, "umtxcnt"));
 1531 }
Cache object: dc488b76da0ffa9b7fb34f72fa939632
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_resource.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_resource.c