The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_resource.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)kern_resource.c     8.5 (Berkeley) 1/21/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/sysproto.h>
   45 #include <sys/file.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mutex.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/refcount.h>
   53 #include <sys/racct.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/rwlock.h>
   56 #include <sys/sched.h>
   57 #include <sys/sx.h>
   58 #include <sys/syscallsubr.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/sysent.h>
   61 #include <sys/time.h>
   62 #include <sys/umtx.h>
   63 
   64 #include <vm/vm.h>
   65 #include <vm/vm_param.h>
   66 #include <vm/pmap.h>
   67 #include <vm/vm_map.h>
   68 
   69 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
   70 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
   71 #define UIHASH(uid)     (&uihashtbl[(uid) & uihash])
   72 static struct rwlock uihashtbl_lock;
   73 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
   74 static u_long uihash;           /* size of hash table - 1 */
   75 
   76 static void     calcru1(struct proc *p, struct rusage_ext *ruxp,
   77                     struct timeval *up, struct timeval *sp);
   78 static int      donice(struct thread *td, struct proc *chgp, int n);
   79 static struct uidinfo *uilookup(uid_t uid);
   80 static void     ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td);
   81 
   82 /*
   83  * Resource controls and accounting.
   84  */
   85 #ifndef _SYS_SYSPROTO_H_
   86 struct getpriority_args {
   87         int     which;
   88         int     who;
   89 };
   90 #endif
   91 int
   92 sys_getpriority(struct thread *td, struct getpriority_args *uap)
   93 {
   94 
   95         return (kern_getpriority(td, uap->which, uap->who));
   96 }
   97 
   98 int
   99 kern_getpriority(struct thread *td, int which, int who)
  100 {
  101         struct proc *p;
  102         struct pgrp *pg;
  103         int error, low;
  104 
  105         error = 0;
  106         low = PRIO_MAX + 1;
  107         switch (which) {
  108         case PRIO_PROCESS:
  109                 if (who == 0)
  110                         low = td->td_proc->p_nice;
  111                 else {
  112                         p = pfind(who);
  113                         if (p == NULL)
  114                                 break;
  115                         if (p_cansee(td, p) == 0)
  116                                 low = p->p_nice;
  117                         PROC_UNLOCK(p);
  118                 }
  119                 break;
  120 
  121         case PRIO_PGRP:
  122                 sx_slock(&proctree_lock);
  123                 if (who == 0) {
  124                         pg = td->td_proc->p_pgrp;
  125                         PGRP_LOCK(pg);
  126                 } else {
  127                         pg = pgfind(who);
  128                         if (pg == NULL) {
  129                                 sx_sunlock(&proctree_lock);
  130                                 break;
  131                         }
  132                 }
  133                 sx_sunlock(&proctree_lock);
  134                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
  135                         PROC_LOCK(p);
  136                         if (p->p_state == PRS_NORMAL &&
  137                             p_cansee(td, p) == 0) {
  138                                 if (p->p_nice < low)
  139                                         low = p->p_nice;
  140                         }
  141                         PROC_UNLOCK(p);
  142                 }
  143                 PGRP_UNLOCK(pg);
  144                 break;
  145 
  146         case PRIO_USER:
  147                 if (who == 0)
  148                         who = td->td_ucred->cr_uid;
  149                 sx_slock(&allproc_lock);
  150                 FOREACH_PROC_IN_SYSTEM(p) {
  151                         PROC_LOCK(p);
  152                         if (p->p_state == PRS_NORMAL &&
  153                             p_cansee(td, p) == 0 &&
  154                             p->p_ucred->cr_uid == who) {
  155                                 if (p->p_nice < low)
  156                                         low = p->p_nice;
  157                         }
  158                         PROC_UNLOCK(p);
  159                 }
  160                 sx_sunlock(&allproc_lock);
  161                 break;
  162 
  163         default:
  164                 error = EINVAL;
  165                 break;
  166         }
  167         if (low == PRIO_MAX + 1 && error == 0)
  168                 error = ESRCH;
  169         td->td_retval[0] = low;
  170         return (error);
  171 }
  172 
  173 #ifndef _SYS_SYSPROTO_H_
  174 struct setpriority_args {
  175         int     which;
  176         int     who;
  177         int     prio;
  178 };
  179 #endif
  180 int
  181 sys_setpriority(struct thread *td, struct setpriority_args *uap)
  182 {
  183 
  184         return (kern_setpriority(td, uap->which, uap->who, uap->prio));
  185 }
  186 
  187 int
  188 kern_setpriority(struct thread *td, int which, int who, int prio)
  189 {
  190         struct proc *curp, *p;
  191         struct pgrp *pg;
  192         int found = 0, error = 0;
  193 
  194         curp = td->td_proc;
  195         switch (which) {
  196         case PRIO_PROCESS:
  197                 if (who == 0) {
  198                         PROC_LOCK(curp);
  199                         error = donice(td, curp, prio);
  200                         PROC_UNLOCK(curp);
  201                 } else {
  202                         p = pfind(who);
  203                         if (p == NULL)
  204                                 break;
  205                         error = p_cansee(td, p);
  206                         if (error == 0)
  207                                 error = donice(td, p, prio);
  208                         PROC_UNLOCK(p);
  209                 }
  210                 found++;
  211                 break;
  212 
  213         case PRIO_PGRP:
  214                 sx_slock(&proctree_lock);
  215                 if (who == 0) {
  216                         pg = curp->p_pgrp;
  217                         PGRP_LOCK(pg);
  218                 } else {
  219                         pg = pgfind(who);
  220                         if (pg == NULL) {
  221                                 sx_sunlock(&proctree_lock);
  222                                 break;
  223                         }
  224                 }
  225                 sx_sunlock(&proctree_lock);
  226                 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
  227                         PROC_LOCK(p);
  228                         if (p->p_state == PRS_NORMAL &&
  229                             p_cansee(td, p) == 0) {
  230                                 error = donice(td, p, prio);
  231                                 found++;
  232                         }
  233                         PROC_UNLOCK(p);
  234                 }
  235                 PGRP_UNLOCK(pg);
  236                 break;
  237 
  238         case PRIO_USER:
  239                 if (who == 0)
  240                         who = td->td_ucred->cr_uid;
  241                 sx_slock(&allproc_lock);
  242                 FOREACH_PROC_IN_SYSTEM(p) {
  243                         PROC_LOCK(p);
  244                         if (p->p_state == PRS_NORMAL &&
  245                             p->p_ucred->cr_uid == who &&
  246                             p_cansee(td, p) == 0) {
  247                                 error = donice(td, p, prio);
  248                                 found++;
  249                         }
  250                         PROC_UNLOCK(p);
  251                 }
  252                 sx_sunlock(&allproc_lock);
  253                 break;
  254 
  255         default:
  256                 error = EINVAL;
  257                 break;
  258         }
  259         if (found == 0 && error == 0)
  260                 error = ESRCH;
  261         return (error);
  262 }
  263 
  264 /*
  265  * Set "nice" for a (whole) process.
  266  */
  267 static int
  268 donice(struct thread *td, struct proc *p, int n)
  269 {
  270         int error;
  271 
  272         PROC_LOCK_ASSERT(p, MA_OWNED);
  273         if ((error = p_cansched(td, p)))
  274                 return (error);
  275         if (n > PRIO_MAX)
  276                 n = PRIO_MAX;
  277         if (n < PRIO_MIN)
  278                 n = PRIO_MIN;
  279         if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
  280                 return (EACCES);
  281         sched_nice(p, n);
  282         return (0);
  283 }
  284 
  285 static int unprivileged_idprio;
  286 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW,
  287     &unprivileged_idprio, 0, "Allow non-root users to set an idle priority");
  288 
  289 /*
  290  * Set realtime priority for LWP.
  291  */
  292 #ifndef _SYS_SYSPROTO_H_
  293 struct rtprio_thread_args {
  294         int             function;
  295         lwpid_t         lwpid;
  296         struct rtprio   *rtp;
  297 };
  298 #endif
  299 int
  300 sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
  301 {
  302         struct proc *p;
  303         struct rtprio rtp;
  304         struct thread *td1;
  305         int cierror, error;
  306 
  307         /* Perform copyin before acquiring locks if needed. */
  308         if (uap->function == RTP_SET)
  309                 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
  310         else
  311                 cierror = 0;
  312 
  313         if (uap->lwpid == 0 || uap->lwpid == td->td_tid) {
  314                 p = td->td_proc;
  315                 td1 = td;
  316                 PROC_LOCK(p);
  317         } else {
  318                 td1 = tdfind(uap->lwpid, -1);
  319                 if (td1 == NULL)
  320                         return (ESRCH);
  321                 p = td1->td_proc;
  322         }
  323 
  324         switch (uap->function) {
  325         case RTP_LOOKUP:
  326                 if ((error = p_cansee(td, p)))
  327                         break;
  328                 pri_to_rtp(td1, &rtp);
  329                 PROC_UNLOCK(p);
  330                 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
  331         case RTP_SET:
  332                 if ((error = p_cansched(td, p)) || (error = cierror))
  333                         break;
  334 
  335                 /* Disallow setting rtprio in most cases if not superuser. */
  336 
  337                 /*
  338                  * Realtime priority has to be restricted for reasons which
  339                  * should be obvious.  However, for idleprio processes, there is
  340                  * a potential for system deadlock if an idleprio process gains
  341                  * a lock on a resource that other processes need (and the
  342                  * idleprio process can't run due to a CPU-bound normal
  343                  * process).  Fix me!  XXX
  344                  *
  345                  * This problem is not only related to idleprio process.
  346                  * A user level program can obtain a file lock and hold it
  347                  * indefinitely.  Additionally, without idleprio processes it is
  348                  * still conceivable that a program with low priority will never
  349                  * get to run.  In short, allowing this feature might make it
  350                  * easier to lock a resource indefinitely, but it is not the
  351                  * only thing that makes it possible.
  352                  */
  353                 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
  354                     (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
  355                     unprivileged_idprio == 0)) {
  356                         error = priv_check(td, PRIV_SCHED_RTPRIO);
  357                         if (error)
  358                                 break;
  359                 }
  360                 error = rtp_to_pri(&rtp, td1);
  361                 break;
  362         default:
  363                 error = EINVAL;
  364                 break;
  365         }
  366         PROC_UNLOCK(p);
  367         return (error);
  368 }
  369 
  370 /*
  371  * Set realtime priority.
  372  */
  373 #ifndef _SYS_SYSPROTO_H_
  374 struct rtprio_args {
  375         int             function;
  376         pid_t           pid;
  377         struct rtprio   *rtp;
  378 };
  379 #endif
  380 int
  381 sys_rtprio(struct thread *td, struct rtprio_args *uap)
  382 {
  383         struct proc *p;
  384         struct thread *tdp;
  385         struct rtprio rtp;
  386         int cierror, error;
  387 
  388         /* Perform copyin before acquiring locks if needed. */
  389         if (uap->function == RTP_SET)
  390                 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
  391         else
  392                 cierror = 0;
  393 
  394         if (uap->pid == 0) {
  395                 p = td->td_proc;
  396                 PROC_LOCK(p);
  397         } else {
  398                 p = pfind(uap->pid);
  399                 if (p == NULL)
  400                         return (ESRCH);
  401         }
  402 
  403         switch (uap->function) {
  404         case RTP_LOOKUP:
  405                 if ((error = p_cansee(td, p)))
  406                         break;
  407                 /*
  408                  * Return OUR priority if no pid specified,
  409                  * or if one is, report the highest priority
  410                  * in the process.  There isn't much more you can do as
  411                  * there is only room to return a single priority.
  412                  * Note: specifying our own pid is not the same
  413                  * as leaving it zero.
  414                  */
  415                 if (uap->pid == 0) {
  416                         pri_to_rtp(td, &rtp);
  417                 } else {
  418                         struct rtprio rtp2;
  419 
  420                         rtp.type = RTP_PRIO_IDLE;
  421                         rtp.prio = RTP_PRIO_MAX;
  422                         FOREACH_THREAD_IN_PROC(p, tdp) {
  423                                 pri_to_rtp(tdp, &rtp2);
  424                                 if (rtp2.type <  rtp.type ||
  425                                     (rtp2.type == rtp.type &&
  426                                     rtp2.prio < rtp.prio)) {
  427                                         rtp.type = rtp2.type;
  428                                         rtp.prio = rtp2.prio;
  429                                 }
  430                         }
  431                 }
  432                 PROC_UNLOCK(p);
  433                 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
  434         case RTP_SET:
  435                 if ((error = p_cansched(td, p)) || (error = cierror))
  436                         break;
  437 
  438                 /*
  439                  * Disallow setting rtprio in most cases if not superuser.
  440                  * See the comment in sys_rtprio_thread about idprio
  441                  * threads holding a lock.
  442                  */
  443                 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
  444                     (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
  445                     !unprivileged_idprio)) {
  446                         error = priv_check(td, PRIV_SCHED_RTPRIO);
  447                         if (error)
  448                                 break;
  449                 }
  450 
  451                 /*
  452                  * If we are setting our own priority, set just our
  453                  * thread but if we are doing another process,
  454                  * do all the threads on that process. If we
  455                  * specify our own pid we do the latter.
  456                  */
  457                 if (uap->pid == 0) {
  458                         error = rtp_to_pri(&rtp, td);
  459                 } else {
  460                         FOREACH_THREAD_IN_PROC(p, td) {
  461                                 if ((error = rtp_to_pri(&rtp, td)) != 0)
  462                                         break;
  463                         }
  464                 }
  465                 break;
  466         default:
  467                 error = EINVAL;
  468                 break;
  469         }
  470         PROC_UNLOCK(p);
  471         return (error);
  472 }
  473 
  474 int
  475 rtp_to_pri(struct rtprio *rtp, struct thread *td)
  476 {
  477         u_char  newpri, oldclass, oldpri;
  478 
  479         switch (RTP_PRIO_BASE(rtp->type)) {
  480         case RTP_PRIO_REALTIME:
  481                 if (rtp->prio > RTP_PRIO_MAX)
  482                         return (EINVAL);
  483                 newpri = PRI_MIN_REALTIME + rtp->prio;
  484                 break;
  485         case RTP_PRIO_NORMAL:
  486                 if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE))
  487                         return (EINVAL);
  488                 newpri = PRI_MIN_TIMESHARE + rtp->prio;
  489                 break;
  490         case RTP_PRIO_IDLE:
  491                 if (rtp->prio > RTP_PRIO_MAX)
  492                         return (EINVAL);
  493                 newpri = PRI_MIN_IDLE + rtp->prio;
  494                 break;
  495         default:
  496                 return (EINVAL);
  497         }
  498 
  499         thread_lock(td);
  500         oldclass = td->td_pri_class;
  501         sched_class(td, rtp->type);     /* XXX fix */
  502         oldpri = td->td_user_pri;
  503         sched_user_prio(td, newpri);
  504         if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL ||
  505             td->td_pri_class != RTP_PRIO_NORMAL))
  506                 sched_prio(td, td->td_user_pri);
  507         if (TD_ON_UPILOCK(td) && oldpri != newpri) {
  508                 critical_enter();
  509                 thread_unlock(td);
  510                 umtx_pi_adjust(td, oldpri);
  511                 critical_exit();
  512         } else
  513                 thread_unlock(td);
  514         return (0);
  515 }
  516 
  517 void
  518 pri_to_rtp(struct thread *td, struct rtprio *rtp)
  519 {
  520 
  521         thread_lock(td);
  522         switch (PRI_BASE(td->td_pri_class)) {
  523         case PRI_REALTIME:
  524                 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
  525                 break;
  526         case PRI_TIMESHARE:
  527                 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE;
  528                 break;
  529         case PRI_IDLE:
  530                 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE;
  531                 break;
  532         default:
  533                 break;
  534         }
  535         rtp->type = td->td_pri_class;
  536         thread_unlock(td);
  537 }
  538 
  539 #if defined(COMPAT_43)
  540 #ifndef _SYS_SYSPROTO_H_
  541 struct osetrlimit_args {
  542         u_int   which;
  543         struct  orlimit *rlp;
  544 };
  545 #endif
  546 int
  547 osetrlimit(struct thread *td, struct osetrlimit_args *uap)
  548 {
  549         struct orlimit olim;
  550         struct rlimit lim;
  551         int error;
  552 
  553         if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
  554                 return (error);
  555         lim.rlim_cur = olim.rlim_cur;
  556         lim.rlim_max = olim.rlim_max;
  557         error = kern_setrlimit(td, uap->which, &lim);
  558         return (error);
  559 }
  560 
  561 #ifndef _SYS_SYSPROTO_H_
  562 struct ogetrlimit_args {
  563         u_int   which;
  564         struct  orlimit *rlp;
  565 };
  566 #endif
  567 int
  568 ogetrlimit(struct thread *td, struct ogetrlimit_args *uap)
  569 {
  570         struct orlimit olim;
  571         struct rlimit rl;
  572         int error;
  573 
  574         if (uap->which >= RLIM_NLIMITS)
  575                 return (EINVAL);
  576         lim_rlimit(td, uap->which, &rl);
  577 
  578         /*
  579          * XXX would be more correct to convert only RLIM_INFINITY to the
  580          * old RLIM_INFINITY and fail with EOVERFLOW for other larger
  581          * values.  Most 64->32 and 32->16 conversions, including not
  582          * unimportant ones of uids are even more broken than what we
  583          * do here (they blindly truncate).  We don't do this correctly
  584          * here since we have little experience with EOVERFLOW yet.
  585          * Elsewhere, getuid() can't fail...
  586          */
  587         olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
  588         olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
  589         error = copyout(&olim, uap->rlp, sizeof(olim));
  590         return (error);
  591 }
  592 #endif /* COMPAT_43 */
  593 
  594 #ifndef _SYS_SYSPROTO_H_
  595 struct __setrlimit_args {
  596         u_int   which;
  597         struct  rlimit *rlp;
  598 };
  599 #endif
  600 int
  601 sys_setrlimit(struct thread *td, struct __setrlimit_args *uap)
  602 {
  603         struct rlimit alim;
  604         int error;
  605 
  606         if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
  607                 return (error);
  608         error = kern_setrlimit(td, uap->which, &alim);
  609         return (error);
  610 }
  611 
  612 static void
  613 lim_cb(void *arg)
  614 {
  615         struct rlimit rlim;
  616         struct thread *td;
  617         struct proc *p;
  618 
  619         p = arg;
  620         PROC_LOCK_ASSERT(p, MA_OWNED);
  621         /*
  622          * Check if the process exceeds its cpu resource allocation.  If
  623          * it reaches the max, arrange to kill the process in ast().
  624          */
  625         if (p->p_cpulimit == RLIM_INFINITY)
  626                 return;
  627         PROC_STATLOCK(p);
  628         FOREACH_THREAD_IN_PROC(p, td) {
  629                 ruxagg(p, td);
  630         }
  631         PROC_STATUNLOCK(p);
  632         if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
  633                 lim_rlimit_proc(p, RLIMIT_CPU, &rlim);
  634                 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
  635                         killproc(p, "exceeded maximum CPU limit");
  636                 } else {
  637                         if (p->p_cpulimit < rlim.rlim_max)
  638                                 p->p_cpulimit += 5;
  639                         kern_psignal(p, SIGXCPU);
  640                 }
  641         }
  642         if ((p->p_flag & P_WEXIT) == 0)
  643                 callout_reset_sbt(&p->p_limco, SBT_1S, 0,
  644                     lim_cb, p, C_PREL(1));
  645 }
  646 
  647 int
  648 kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp)
  649 {
  650 
  651         return (kern_proc_setrlimit(td, td->td_proc, which, limp));
  652 }
  653 
  654 int
  655 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
  656     struct rlimit *limp)
  657 {
  658         struct plimit *newlim, *oldlim;
  659         struct rlimit *alimp;
  660         struct rlimit oldssiz;
  661         int error;
  662 
  663         if (which >= RLIM_NLIMITS)
  664                 return (EINVAL);
  665 
  666         /*
  667          * Preserve historical bugs by treating negative limits as unsigned.
  668          */
  669         if (limp->rlim_cur < 0)
  670                 limp->rlim_cur = RLIM_INFINITY;
  671         if (limp->rlim_max < 0)
  672                 limp->rlim_max = RLIM_INFINITY;
  673 
  674         oldssiz.rlim_cur = 0;
  675         newlim = lim_alloc();
  676         PROC_LOCK(p);
  677         oldlim = p->p_limit;
  678         alimp = &oldlim->pl_rlimit[which];
  679         if (limp->rlim_cur > alimp->rlim_max ||
  680             limp->rlim_max > alimp->rlim_max)
  681                 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
  682                         PROC_UNLOCK(p);
  683                         lim_free(newlim);
  684                         return (error);
  685                 }
  686         if (limp->rlim_cur > limp->rlim_max)
  687                 limp->rlim_cur = limp->rlim_max;
  688         lim_copy(newlim, oldlim);
  689         alimp = &newlim->pl_rlimit[which];
  690 
  691         switch (which) {
  692         case RLIMIT_CPU:
  693                 if (limp->rlim_cur != RLIM_INFINITY &&
  694                     p->p_cpulimit == RLIM_INFINITY)
  695                         callout_reset_sbt(&p->p_limco, SBT_1S, 0,
  696                             lim_cb, p, C_PREL(1));
  697                 p->p_cpulimit = limp->rlim_cur;
  698                 break;
  699         case RLIMIT_DATA:
  700                 if (limp->rlim_cur > maxdsiz)
  701                         limp->rlim_cur = maxdsiz;
  702                 if (limp->rlim_max > maxdsiz)
  703                         limp->rlim_max = maxdsiz;
  704                 break;
  705 
  706         case RLIMIT_STACK:
  707                 if (limp->rlim_cur > maxssiz)
  708                         limp->rlim_cur = maxssiz;
  709                 if (limp->rlim_max > maxssiz)
  710                         limp->rlim_max = maxssiz;
  711                 oldssiz = *alimp;
  712                 if (p->p_sysent->sv_fixlimit != NULL)
  713                         p->p_sysent->sv_fixlimit(&oldssiz,
  714                             RLIMIT_STACK);
  715                 break;
  716 
  717         case RLIMIT_NOFILE:
  718                 if (limp->rlim_cur > maxfilesperproc)
  719                         limp->rlim_cur = maxfilesperproc;
  720                 if (limp->rlim_max > maxfilesperproc)
  721                         limp->rlim_max = maxfilesperproc;
  722                 break;
  723 
  724         case RLIMIT_NPROC:
  725                 if (limp->rlim_cur > maxprocperuid)
  726                         limp->rlim_cur = maxprocperuid;
  727                 if (limp->rlim_max > maxprocperuid)
  728                         limp->rlim_max = maxprocperuid;
  729                 if (limp->rlim_cur < 1)
  730                         limp->rlim_cur = 1;
  731                 if (limp->rlim_max < 1)
  732                         limp->rlim_max = 1;
  733                 break;
  734         }
  735         if (p->p_sysent->sv_fixlimit != NULL)
  736                 p->p_sysent->sv_fixlimit(limp, which);
  737         *alimp = *limp;
  738         p->p_limit = newlim;
  739         PROC_UPDATE_COW(p);
  740         PROC_UNLOCK(p);
  741         lim_free(oldlim);
  742 
  743         if (which == RLIMIT_STACK &&
  744             /*
  745              * Skip calls from exec_new_vmspace(), done when stack is
  746              * not mapped yet.
  747              */
  748             (td != curthread || (p->p_flag & P_INEXEC) == 0)) {
  749                 /*
  750                  * Stack is allocated to the max at exec time with only
  751                  * "rlim_cur" bytes accessible.  If stack limit is going
  752                  * up make more accessible, if going down make inaccessible.
  753                  */
  754                 if (limp->rlim_cur != oldssiz.rlim_cur) {
  755                         vm_offset_t addr;
  756                         vm_size_t size;
  757                         vm_prot_t prot;
  758 
  759                         if (limp->rlim_cur > oldssiz.rlim_cur) {
  760                                 prot = p->p_sysent->sv_stackprot;
  761                                 size = limp->rlim_cur - oldssiz.rlim_cur;
  762                                 addr = p->p_sysent->sv_usrstack -
  763                                     limp->rlim_cur;
  764                         } else {
  765                                 prot = VM_PROT_NONE;
  766                                 size = oldssiz.rlim_cur - limp->rlim_cur;
  767                                 addr = p->p_sysent->sv_usrstack -
  768                                     oldssiz.rlim_cur;
  769                         }
  770                         addr = trunc_page(addr);
  771                         size = round_page(size);
  772                         (void)vm_map_protect(&p->p_vmspace->vm_map,
  773                             addr, addr + size, prot, 0,
  774                             VM_MAP_PROTECT_SET_PROT);
  775                 }
  776         }
  777 
  778         return (0);
  779 }
  780 
  781 #ifndef _SYS_SYSPROTO_H_
  782 struct __getrlimit_args {
  783         u_int   which;
  784         struct  rlimit *rlp;
  785 };
  786 #endif
  787 /* ARGSUSED */
  788 int
  789 sys_getrlimit(struct thread *td, struct __getrlimit_args *uap)
  790 {
  791         struct rlimit rlim;
  792         int error;
  793 
  794         if (uap->which >= RLIM_NLIMITS)
  795                 return (EINVAL);
  796         lim_rlimit(td, uap->which, &rlim);
  797         error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
  798         return (error);
  799 }
  800 
  801 /*
  802  * Transform the running time and tick information for children of proc p
  803  * into user and system time usage.
  804  */
  805 void
  806 calccru(struct proc *p, struct timeval *up, struct timeval *sp)
  807 {
  808 
  809         PROC_LOCK_ASSERT(p, MA_OWNED);
  810         calcru1(p, &p->p_crux, up, sp);
  811 }
  812 
  813 /*
  814  * Transform the running time and tick information in proc p into user
  815  * and system time usage.  If appropriate, include the current time slice
  816  * on this CPU.
  817  */
  818 void
  819 calcru(struct proc *p, struct timeval *up, struct timeval *sp)
  820 {
  821         struct thread *td;
  822         uint64_t runtime, u;
  823 
  824         PROC_LOCK_ASSERT(p, MA_OWNED);
  825         PROC_STATLOCK_ASSERT(p, MA_OWNED);
  826         /*
  827          * If we are getting stats for the current process, then add in the
  828          * stats that this thread has accumulated in its current time slice.
  829          * We reset the thread and CPU state as if we had performed a context
  830          * switch right here.
  831          */
  832         td = curthread;
  833         if (td->td_proc == p) {
  834                 u = cpu_ticks();
  835                 runtime = u - PCPU_GET(switchtime);
  836                 td->td_runtime += runtime;
  837                 td->td_incruntime += runtime;
  838                 PCPU_SET(switchtime, u);
  839         }
  840         /* Make sure the per-thread stats are current. */
  841         FOREACH_THREAD_IN_PROC(p, td) {
  842                 if (td->td_incruntime == 0)
  843                         continue;
  844                 ruxagg(p, td);
  845         }
  846         calcru1(p, &p->p_rux, up, sp);
  847 }
  848 
  849 /* Collect resource usage for a single thread. */
  850 void
  851 rufetchtd(struct thread *td, struct rusage *ru)
  852 {
  853         struct proc *p;
  854         uint64_t runtime, u;
  855 
  856         p = td->td_proc;
  857         PROC_STATLOCK_ASSERT(p, MA_OWNED);
  858         THREAD_LOCK_ASSERT(td, MA_OWNED);
  859         /*
  860          * If we are getting stats for the current thread, then add in the
  861          * stats that this thread has accumulated in its current time slice.
  862          * We reset the thread and CPU state as if we had performed a context
  863          * switch right here.
  864          */
  865         if (td == curthread) {
  866                 u = cpu_ticks();
  867                 runtime = u - PCPU_GET(switchtime);
  868                 td->td_runtime += runtime;
  869                 td->td_incruntime += runtime;
  870                 PCPU_SET(switchtime, u);
  871         }
  872         ruxagg_locked(p, td);
  873         *ru = td->td_ru;
  874         calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
  875 }
  876 
  877 /* XXX: the MI version is too slow to use: */
  878 #ifndef __HAVE_INLINE_FLSLL
  879 #define flsll(x)        (fls((x) >> 32) != 0 ? fls((x) >> 32) + 32 : fls(x))
  880 #endif
  881 
  882 static uint64_t
  883 mul64_by_fraction(uint64_t a, uint64_t b, uint64_t c)
  884 {
  885         uint64_t acc, bh, bl;
  886         int i, s, sa, sb;
  887 
  888         /*
  889          * Calculate (a * b) / c accurately enough without overflowing.  c
  890          * must be nonzero, and its top bit must be 0.  a or b must be
  891          * <= c, and the implementation is tuned for b <= c.
  892          *
  893          * The comments about times are for use in calcru1() with units of
  894          * microseconds for 'a' and stathz ticks at 128 Hz for b and c.
  895          *
  896          * Let n be the number of top zero bits in c.  Each iteration
  897          * either returns, or reduces b by right shifting it by at least n.
  898          * The number of iterations is at most 1 + 64 / n, and the error is
  899          * at most the number of iterations.
  900          *
  901          * It is very unusual to need even 2 iterations.  Previous
  902          * implementations overflowed essentially by returning early in the
  903          * first iteration, with n = 38 giving overflow at 105+ hours and
  904          * n = 32 giving overlow at at 388+ days despite a more careful
  905          * calculation.  388 days is a reasonable uptime, and the calculation
  906          * needs to work for the uptime times the number of CPUs since 'a'
  907          * is per-process.
  908          */
  909         if (a >= (uint64_t)1 << 63)
  910                 return (0);             /* Unsupported arg -- can't happen. */
  911         acc = 0;
  912         for (i = 0; i < 128; i++) {
  913                 sa = flsll(a);
  914                 sb = flsll(b);
  915                 if (sa + sb <= 64)
  916                         /* Up to 105 hours on first iteration. */
  917                         return (acc + (a * b) / c);
  918                 if (a >= c) {
  919                         /*
  920                          * This reduction is based on a = q * c + r, with the
  921                          * remainder r < c.  'a' may be large to start, and
  922                          * moving bits from b into 'a' at the end of the loop
  923                          * sets the top bit of 'a', so the reduction makes
  924                          * significant progress.
  925                          */
  926                         acc += (a / c) * b;
  927                         a %= c;
  928                         sa = flsll(a);
  929                         if (sa + sb <= 64)
  930                                 /* Up to 388 days on first iteration. */
  931                                 return (acc + (a * b) / c);
  932                 }
  933 
  934                 /*
  935                  * This step writes a * b as a * ((bh << s) + bl) =
  936                  * a * (bh << s) + a * bl = (a << s) * bh + a * bl.  The 2
  937                  * additive terms are handled separately.  Splitting in
  938                  * this way is linear except for rounding errors.
  939                  *
  940                  * s = 64 - sa is the maximum such that a << s fits in 64
  941                  * bits.  Since a < c and c has at least 1 zero top bit,
  942                  * sa < 64 and s > 0.  Thus this step makes progress by
  943                  * reducing b (it increases 'a', but taking remainders on
  944                  * the next iteration completes the reduction).
  945                  *
  946                  * Finally, the choice for s is just what is needed to keep
  947                  * a * bl from overflowing, so we don't need complications
  948                  * like a recursive call mul64_by_fraction(a, bl, c) to
  949                  * handle the second additive term.
  950                  */
  951                 s = 64 - sa;
  952                 bh = b >> s;
  953                 bl = b - (bh << s);
  954                 acc += (a * bl) / c;
  955                 a <<= s;
  956                 b = bh;
  957         }
  958         return (0);             /* Algorithm failure -- can't happen. */
  959 }
  960 
  961 static void
  962 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
  963     struct timeval *sp)
  964 {
  965         /* {user, system, interrupt, total} {ticks, usec}: */
  966         uint64_t ut, uu, st, su, it, tt, tu;
  967 
  968         ut = ruxp->rux_uticks;
  969         st = ruxp->rux_sticks;
  970         it = ruxp->rux_iticks;
  971         tt = ut + st + it;
  972         if (tt == 0) {
  973                 /* Avoid divide by zero */
  974                 st = 1;
  975                 tt = 1;
  976         }
  977         tu = cputick2usec(ruxp->rux_runtime);
  978         if ((int64_t)tu < 0) {
  979                 /* XXX: this should be an assert /phk */
  980                 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
  981                     (intmax_t)tu, p->p_pid, p->p_comm);
  982                 tu = ruxp->rux_tu;
  983         }
  984 
  985         /* Subdivide tu.  Avoid overflow in the multiplications. */
  986         if (__predict_true(tu <= ((uint64_t)1 << 38) && tt <= (1 << 26))) {
  987                 /* Up to 76 hours when stathz is 128. */
  988                 uu = (tu * ut) / tt;
  989                 su = (tu * st) / tt;
  990         } else {
  991                 uu = mul64_by_fraction(tu, ut, tt);
  992                 su = mul64_by_fraction(tu, st, tt);
  993         }
  994 
  995         if (tu >= ruxp->rux_tu) {
  996                 /*
  997                  * The normal case, time increased.
  998                  * Enforce monotonicity of bucketed numbers.
  999                  */
 1000                 if (uu < ruxp->rux_uu)
 1001                         uu = ruxp->rux_uu;
 1002                 if (su < ruxp->rux_su)
 1003                         su = ruxp->rux_su;
 1004         } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
 1005                 /*
 1006                  * When we calibrate the cputicker, it is not uncommon to
 1007                  * see the presumably fixed frequency increase slightly over
 1008                  * time as a result of thermal stabilization and NTP
 1009                  * discipline (of the reference clock).  We therefore ignore
 1010                  * a bit of backwards slop because we  expect to catch up
 1011                  * shortly.  We use a 3 microsecond limit to catch low
 1012                  * counts and a 1% limit for high counts.
 1013                  */
 1014                 uu = ruxp->rux_uu;
 1015                 su = ruxp->rux_su;
 1016                 tu = ruxp->rux_tu;
 1017         } else { /* tu < ruxp->rux_tu */
 1018                 /*
 1019                  * What happened here was likely that a laptop, which ran at
 1020                  * a reduced clock frequency at boot, kicked into high gear.
 1021                  * The wisdom of spamming this message in that case is
 1022                  * dubious, but it might also be indicative of something
 1023                  * serious, so lets keep it and hope laptops can be made
 1024                  * more truthful about their CPU speed via ACPI.
 1025                  */
 1026                 printf("calcru: runtime went backwards from %ju usec "
 1027                     "to %ju usec for pid %d (%s)\n",
 1028                     (uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
 1029                     p->p_pid, p->p_comm);
 1030         }
 1031 
 1032         ruxp->rux_uu = uu;
 1033         ruxp->rux_su = su;
 1034         ruxp->rux_tu = tu;
 1035 
 1036         up->tv_sec = uu / 1000000;
 1037         up->tv_usec = uu % 1000000;
 1038         sp->tv_sec = su / 1000000;
 1039         sp->tv_usec = su % 1000000;
 1040 }
 1041 
 1042 #ifndef _SYS_SYSPROTO_H_
 1043 struct getrusage_args {
 1044         int     who;
 1045         struct  rusage *rusage;
 1046 };
 1047 #endif
 1048 int
 1049 sys_getrusage(struct thread *td, struct getrusage_args *uap)
 1050 {
 1051         struct rusage ru;
 1052         int error;
 1053 
 1054         error = kern_getrusage(td, uap->who, &ru);
 1055         if (error == 0)
 1056                 error = copyout(&ru, uap->rusage, sizeof(struct rusage));
 1057         return (error);
 1058 }
 1059 
 1060 int
 1061 kern_getrusage(struct thread *td, int who, struct rusage *rup)
 1062 {
 1063         struct proc *p;
 1064         int error;
 1065 
 1066         error = 0;
 1067         p = td->td_proc;
 1068         PROC_LOCK(p);
 1069         switch (who) {
 1070         case RUSAGE_SELF:
 1071                 rufetchcalc(p, rup, &rup->ru_utime,
 1072                     &rup->ru_stime);
 1073                 break;
 1074 
 1075         case RUSAGE_CHILDREN:
 1076                 *rup = p->p_stats->p_cru;
 1077                 calccru(p, &rup->ru_utime, &rup->ru_stime);
 1078                 break;
 1079 
 1080         case RUSAGE_THREAD:
 1081                 PROC_STATLOCK(p);
 1082                 thread_lock(td);
 1083                 rufetchtd(td, rup);
 1084                 thread_unlock(td);
 1085                 PROC_STATUNLOCK(p);
 1086                 break;
 1087 
 1088         default:
 1089                 error = EINVAL;
 1090         }
 1091         PROC_UNLOCK(p);
 1092         return (error);
 1093 }
 1094 
 1095 void
 1096 rucollect(struct rusage *ru, struct rusage *ru2)
 1097 {
 1098         long *ip, *ip2;
 1099         int i;
 1100 
 1101         if (ru->ru_maxrss < ru2->ru_maxrss)
 1102                 ru->ru_maxrss = ru2->ru_maxrss;
 1103         ip = &ru->ru_first;
 1104         ip2 = &ru2->ru_first;
 1105         for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
 1106                 *ip++ += *ip2++;
 1107 }
 1108 
 1109 void
 1110 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
 1111     struct rusage_ext *rux2)
 1112 {
 1113 
 1114         rux->rux_runtime += rux2->rux_runtime;
 1115         rux->rux_uticks += rux2->rux_uticks;
 1116         rux->rux_sticks += rux2->rux_sticks;
 1117         rux->rux_iticks += rux2->rux_iticks;
 1118         rux->rux_uu += rux2->rux_uu;
 1119         rux->rux_su += rux2->rux_su;
 1120         rux->rux_tu += rux2->rux_tu;
 1121         rucollect(ru, ru2);
 1122 }
 1123 
 1124 /*
 1125  * Aggregate tick counts into the proc's rusage_ext.
 1126  */
 1127 static void
 1128 ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td)
 1129 {
 1130 
 1131         rux->rux_runtime += td->td_incruntime;
 1132         rux->rux_uticks += td->td_uticks;
 1133         rux->rux_sticks += td->td_sticks;
 1134         rux->rux_iticks += td->td_iticks;
 1135 }
 1136 
 1137 void
 1138 ruxagg_locked(struct proc *p, struct thread *td)
 1139 {
 1140         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1141         PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
 1142 
 1143         ruxagg_ext_locked(&p->p_rux, td);
 1144         ruxagg_ext_locked(&td->td_rux, td);
 1145         td->td_incruntime = 0;
 1146         td->td_uticks = 0;
 1147         td->td_iticks = 0;
 1148         td->td_sticks = 0;
 1149 }
 1150 
 1151 void
 1152 ruxagg(struct proc *p, struct thread *td)
 1153 {
 1154 
 1155         thread_lock(td);
 1156         ruxagg_locked(p, td);
 1157         thread_unlock(td);
 1158 }
 1159 
 1160 /*
 1161  * Update the rusage_ext structure and fetch a valid aggregate rusage
 1162  * for proc p if storage for one is supplied.
 1163  */
 1164 void
 1165 rufetch(struct proc *p, struct rusage *ru)
 1166 {
 1167         struct thread *td;
 1168 
 1169         PROC_STATLOCK_ASSERT(p, MA_OWNED);
 1170 
 1171         *ru = p->p_ru;
 1172         if (p->p_numthreads > 0)  {
 1173                 FOREACH_THREAD_IN_PROC(p, td) {
 1174                         ruxagg(p, td);
 1175                         rucollect(ru, &td->td_ru);
 1176                 }
 1177         }
 1178 }
 1179 
 1180 /*
 1181  * Atomically perform a rufetch and a calcru together.
 1182  * Consumers, can safely assume the calcru is executed only once
 1183  * rufetch is completed.
 1184  */
 1185 void
 1186 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
 1187     struct timeval *sp)
 1188 {
 1189 
 1190         PROC_STATLOCK(p);
 1191         rufetch(p, ru);
 1192         calcru(p, up, sp);
 1193         PROC_STATUNLOCK(p);
 1194 }
 1195 
 1196 /*
 1197  * Allocate a new resource limits structure and initialize its
 1198  * reference count and mutex pointer.
 1199  */
 1200 struct plimit *
 1201 lim_alloc()
 1202 {
 1203         struct plimit *limp;
 1204 
 1205         limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
 1206         refcount_init(&limp->pl_refcnt, 1);
 1207         return (limp);
 1208 }
 1209 
 1210 struct plimit *
 1211 lim_hold(struct plimit *limp)
 1212 {
 1213 
 1214         refcount_acquire(&limp->pl_refcnt);
 1215         return (limp);
 1216 }
 1217 
 1218 void
 1219 lim_fork(struct proc *p1, struct proc *p2)
 1220 {
 1221 
 1222         PROC_LOCK_ASSERT(p1, MA_OWNED);
 1223         PROC_LOCK_ASSERT(p2, MA_OWNED);
 1224 
 1225         p2->p_limit = lim_hold(p1->p_limit);
 1226         callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
 1227         if (p1->p_cpulimit != RLIM_INFINITY)
 1228                 callout_reset_sbt(&p2->p_limco, SBT_1S, 0,
 1229                     lim_cb, p2, C_PREL(1));
 1230 }
 1231 
 1232 void
 1233 lim_free(struct plimit *limp)
 1234 {
 1235 
 1236         if (refcount_release(&limp->pl_refcnt))
 1237                 free((void *)limp, M_PLIMIT);
 1238 }
 1239 
 1240 void
 1241 lim_freen(struct plimit *limp, int n)
 1242 {
 1243 
 1244         if (refcount_releasen(&limp->pl_refcnt, n))
 1245                 free((void *)limp, M_PLIMIT);
 1246 }
 1247 
 1248 /*
 1249  * Make a copy of the plimit structure.
 1250  * We share these structures copy-on-write after fork.
 1251  */
 1252 void
 1253 lim_copy(struct plimit *dst, struct plimit *src)
 1254 {
 1255 
 1256         KASSERT(dst->pl_refcnt <= 1, ("lim_copy to shared limit"));
 1257         bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
 1258 }
 1259 
 1260 /*
 1261  * Return the hard limit for a particular system resource.  The
 1262  * which parameter specifies the index into the rlimit array.
 1263  */
 1264 rlim_t
 1265 lim_max(struct thread *td, int which)
 1266 {
 1267         struct rlimit rl;
 1268 
 1269         lim_rlimit(td, which, &rl);
 1270         return (rl.rlim_max);
 1271 }
 1272 
 1273 rlim_t
 1274 lim_max_proc(struct proc *p, int which)
 1275 {
 1276         struct rlimit rl;
 1277 
 1278         lim_rlimit_proc(p, which, &rl);
 1279         return (rl.rlim_max);
 1280 }
 1281 
 1282 /*
 1283  * Return the current (soft) limit for a particular system resource.
 1284  * The which parameter which specifies the index into the rlimit array
 1285  */
 1286 rlim_t
 1287 (lim_cur)(struct thread *td, int which)
 1288 {
 1289         struct rlimit rl;
 1290 
 1291         lim_rlimit(td, which, &rl);
 1292         return (rl.rlim_cur);
 1293 }
 1294 
 1295 rlim_t
 1296 lim_cur_proc(struct proc *p, int which)
 1297 {
 1298         struct rlimit rl;
 1299 
 1300         lim_rlimit_proc(p, which, &rl);
 1301         return (rl.rlim_cur);
 1302 }
 1303 
 1304 /*
 1305  * Return a copy of the entire rlimit structure for the system limit
 1306  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
 1307  */
 1308 void
 1309 lim_rlimit(struct thread *td, int which, struct rlimit *rlp)
 1310 {
 1311         struct proc *p = td->td_proc;
 1312 
 1313         MPASS(td == curthread);
 1314         KASSERT(which >= 0 && which < RLIM_NLIMITS,
 1315             ("request for invalid resource limit"));
 1316         *rlp = td->td_limit->pl_rlimit[which];
 1317         if (p->p_sysent->sv_fixlimit != NULL)
 1318                 p->p_sysent->sv_fixlimit(rlp, which);
 1319 }
 1320 
 1321 void
 1322 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp)
 1323 {
 1324 
 1325         PROC_LOCK_ASSERT(p, MA_OWNED);
 1326         KASSERT(which >= 0 && which < RLIM_NLIMITS,
 1327             ("request for invalid resource limit"));
 1328         *rlp = p->p_limit->pl_rlimit[which];
 1329         if (p->p_sysent->sv_fixlimit != NULL)
 1330                 p->p_sysent->sv_fixlimit(rlp, which);
 1331 }
 1332 
 1333 void
 1334 uihashinit()
 1335 {
 1336 
 1337         uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
 1338         rw_init(&uihashtbl_lock, "uidinfo hash");
 1339 }
 1340 
 1341 /*
 1342  * Look up a uidinfo struct for the parameter uid.
 1343  * uihashtbl_lock must be locked.
 1344  * Increase refcount on uidinfo struct returned.
 1345  */
 1346 static struct uidinfo *
 1347 uilookup(uid_t uid)
 1348 {
 1349         struct uihashhead *uipp;
 1350         struct uidinfo *uip;
 1351 
 1352         rw_assert(&uihashtbl_lock, RA_LOCKED);
 1353         uipp = UIHASH(uid);
 1354         LIST_FOREACH(uip, uipp, ui_hash)
 1355                 if (uip->ui_uid == uid) {
 1356                         uihold(uip);
 1357                         break;
 1358                 }
 1359 
 1360         return (uip);
 1361 }
 1362 
 1363 /*
 1364  * Find or allocate a struct uidinfo for a particular uid.
 1365  * Returns with uidinfo struct referenced.
 1366  * uifree() should be called on a struct uidinfo when released.
 1367  */
 1368 struct uidinfo *
 1369 uifind(uid_t uid)
 1370 {
 1371         struct uidinfo *new_uip, *uip;
 1372         struct ucred *cred;
 1373 
 1374         cred = curthread->td_ucred;
 1375         if (cred->cr_uidinfo->ui_uid == uid) {
 1376                 uip = cred->cr_uidinfo;
 1377                 uihold(uip);
 1378                 return (uip);
 1379         } else if (cred->cr_ruidinfo->ui_uid == uid) {
 1380                 uip = cred->cr_ruidinfo;
 1381                 uihold(uip);
 1382                 return (uip);
 1383         }
 1384 
 1385         rw_rlock(&uihashtbl_lock);
 1386         uip = uilookup(uid);
 1387         rw_runlock(&uihashtbl_lock);
 1388         if (uip != NULL)
 1389                 return (uip);
 1390 
 1391         new_uip = malloc(sizeof(*new_uip), M_UIDINFO, M_WAITOK | M_ZERO);
 1392         racct_create(&new_uip->ui_racct);
 1393         refcount_init(&new_uip->ui_ref, 1);
 1394         new_uip->ui_uid = uid;
 1395 
 1396         rw_wlock(&uihashtbl_lock);
 1397         /*
 1398          * There's a chance someone created our uidinfo while we
 1399          * were in malloc and not holding the lock, so we have to
 1400          * make sure we don't insert a duplicate uidinfo.
 1401          */
 1402         if ((uip = uilookup(uid)) == NULL) {
 1403                 LIST_INSERT_HEAD(UIHASH(uid), new_uip, ui_hash);
 1404                 rw_wunlock(&uihashtbl_lock);
 1405                 uip = new_uip;
 1406         } else {
 1407                 rw_wunlock(&uihashtbl_lock);
 1408                 racct_destroy(&new_uip->ui_racct);
 1409                 free(new_uip, M_UIDINFO);
 1410         }
 1411         return (uip);
 1412 }
 1413 
 1414 /*
 1415  * Place another refcount on a uidinfo struct.
 1416  */
 1417 void
 1418 uihold(struct uidinfo *uip)
 1419 {
 1420 
 1421         refcount_acquire(&uip->ui_ref);
 1422 }
 1423 
 1424 /*-
 1425  * Since uidinfo structs have a long lifetime, we use an
 1426  * opportunistic refcounting scheme to avoid locking the lookup hash
 1427  * for each release.
 1428  *
 1429  * If the refcount hits 0, we need to free the structure,
 1430  * which means we need to lock the hash.
 1431  * Optimal case:
 1432  *   After locking the struct and lowering the refcount, if we find
 1433  *   that we don't need to free, simply unlock and return.
 1434  * Suboptimal case:
 1435  *   If refcount lowering results in need to free, bump the count
 1436  *   back up, lose the lock and acquire the locks in the proper
 1437  *   order to try again.
 1438  */
 1439 void
 1440 uifree(struct uidinfo *uip)
 1441 {
 1442 
 1443         if (refcount_release_if_not_last(&uip->ui_ref))
 1444                 return;
 1445 
 1446         rw_wlock(&uihashtbl_lock);
 1447         if (refcount_release(&uip->ui_ref) == 0) {
 1448                 rw_wunlock(&uihashtbl_lock);
 1449                 return;
 1450         }
 1451 
 1452         racct_destroy(&uip->ui_racct);
 1453         LIST_REMOVE(uip, ui_hash);
 1454         rw_wunlock(&uihashtbl_lock);
 1455 
 1456         if (uip->ui_sbsize != 0)
 1457                 printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
 1458                     uip->ui_uid, uip->ui_sbsize);
 1459         if (uip->ui_proccnt != 0)
 1460                 printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
 1461                     uip->ui_uid, uip->ui_proccnt);
 1462         if (uip->ui_vmsize != 0)
 1463                 printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
 1464                     uip->ui_uid, (unsigned long long)uip->ui_vmsize);
 1465         free(uip, M_UIDINFO);
 1466 }
 1467 
 1468 #ifdef RACCT
 1469 void
 1470 ui_racct_foreach(void (*callback)(struct racct *racct,
 1471     void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
 1472     void *arg2, void *arg3)
 1473 {
 1474         struct uidinfo *uip;
 1475         struct uihashhead *uih;
 1476 
 1477         rw_rlock(&uihashtbl_lock);
 1478         if (pre != NULL)
 1479                 (pre)();
 1480         for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
 1481                 LIST_FOREACH(uip, uih, ui_hash) {
 1482                         (callback)(uip->ui_racct, arg2, arg3);
 1483                 }
 1484         }
 1485         if (post != NULL)
 1486                 (post)();
 1487         rw_runlock(&uihashtbl_lock);
 1488 }
 1489 #endif
 1490 
 1491 static inline int
 1492 chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name)
 1493 {
 1494         long new;
 1495 
 1496         /* Don't allow them to exceed max, but allow subtraction. */
 1497         new = atomic_fetchadd_long(limit, (long)diff) + diff;
 1498         if (diff > 0 && max != 0) {
 1499                 if (new < 0 || new > max) {
 1500                         atomic_subtract_long(limit, (long)diff);
 1501                         return (0);
 1502                 }
 1503         } else if (new < 0)
 1504                 printf("negative %s for uid = %d\n", name, uip->ui_uid);
 1505         return (1);
 1506 }
 1507 
 1508 /*
 1509  * Change the count associated with number of processes
 1510  * a given user is using.  When 'max' is 0, don't enforce a limit
 1511  */
 1512 int
 1513 chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
 1514 {
 1515 
 1516         return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt"));
 1517 }
 1518 
 1519 /*
 1520  * Change the total socket buffer size a user has used.
 1521  */
 1522 int
 1523 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max)
 1524 {
 1525         int diff, rv;
 1526 
 1527         diff = to - *hiwat;
 1528         if (diff > 0 && max == 0) {
 1529                 rv = 0;
 1530         } else {
 1531                 rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize");
 1532                 if (rv != 0)
 1533                         *hiwat = to;
 1534         }
 1535         return (rv);
 1536 }
 1537 
 1538 /*
 1539  * Change the count associated with number of pseudo-terminals
 1540  * a given user is using.  When 'max' is 0, don't enforce a limit
 1541  */
 1542 int
 1543 chgptscnt(struct uidinfo *uip, int diff, rlim_t max)
 1544 {
 1545 
 1546         return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt"));
 1547 }
 1548 
 1549 int
 1550 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max)
 1551 {
 1552 
 1553         return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt"));
 1554 }
 1555 
 1556 int
 1557 chgumtxcnt(struct uidinfo *uip, int diff, rlim_t max)
 1558 {
 1559 
 1560         return (chglimit(uip, &uip->ui_umtxcnt, diff, max, "umtxcnt"));
 1561 }

Cache object: b1c4de7eac23aed4675da8b87202b1c0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.