The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2010 The FreeBSD Foundation
    5  * All rights reserved.
    6  *
    7  * This software was developed by Edward Tomasz Napierala under sponsorship
    8  * from the FreeBSD Foundation.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  * $FreeBSD$
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include <sys/param.h>
   38 #include <sys/bus.h>
   39 #include <sys/malloc.h>
   40 #include <sys/queue.h>
   41 #include <sys/refcount.h>
   42 #include <sys/jail.h>
   43 #include <sys/kernel.h>
   44 #include <sys/limits.h>
   45 #include <sys/loginclass.h>
   46 #include <sys/priv.h>
   47 #include <sys/proc.h>
   48 #include <sys/racct.h>
   49 #include <sys/rctl.h>
   50 #include <sys/resourcevar.h>
   51 #include <sys/sx.h>
   52 #include <sys/sysent.h>
   53 #include <sys/sysproto.h>
   54 #include <sys/systm.h>
   55 #include <sys/types.h>
   56 #include <sys/eventhandler.h>
   57 #include <sys/lock.h>
   58 #include <sys/mutex.h>
   59 #include <sys/rwlock.h>
   60 #include <sys/sbuf.h>
   61 #include <sys/taskqueue.h>
   62 #include <sys/tree.h>
   63 #include <vm/uma.h>
   64 
   65 #ifdef RCTL
   66 #ifndef RACCT
   67 #error "The RCTL option requires the RACCT option"
   68 #endif
   69 
   70 FEATURE(rctl, "Resource Limits");
   71 
   72 #define HRF_DEFAULT             0
   73 #define HRF_DONT_INHERIT        1
   74 #define HRF_DONT_ACCUMULATE     2
   75 
   76 #define RCTL_MAX_INBUFSIZE      4 * 1024
   77 #define RCTL_MAX_OUTBUFSIZE     16 * 1024 * 1024
   78 #define RCTL_LOG_BUFSIZE        128
   79 
   80 #define RCTL_PCPU_SHIFT         (10 * 1000000)
   81 
   82 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
   83 static int rctl_log_rate_limit = 10;
   84 static int rctl_devctl_rate_limit = 10;
   85 
   86 /*
   87  * Values below are initialized in rctl_init().
   88  */
   89 static int rctl_throttle_min = -1;
   90 static int rctl_throttle_max = -1;
   91 static int rctl_throttle_pct = -1;
   92 static int rctl_throttle_pct2 = -1;
   93 
   94 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
   95 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
   96 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
   97 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
   98 
   99 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
  100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
  101     &rctl_maxbufsize, 0, "Maximum output buffer size");
  102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
  103     &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
  104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
  105     &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
  106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
  107     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU",
  108     "Shortest throttling duration, in hz");
  109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
  110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
  111     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU",
  112     "Longest throttling duration, in hz");
  113 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
  114 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
  115     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU",
  116     "Throttling penalty for process consumption, in percent");
  117 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
  118 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
  119     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU",
  120     "Throttling penalty for container consumption, in percent");
  121 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
  122 
  123 /*
  124  * 'rctl_rule_link' connects a rule with every racct it's related to.
  125  * For example, rule 'user:X:openfiles:deny=N/process' is linked
  126  * with uidinfo for user X, and to each process of that user.
  127  */
  128 struct rctl_rule_link {
  129         LIST_ENTRY(rctl_rule_link)      rrl_next;
  130         struct rctl_rule                *rrl_rule;
  131         int                             rrl_exceeded;
  132 };
  133 
  134 struct dict {
  135         const char      *d_name;
  136         int             d_value;
  137 };
  138 
  139 static struct dict subjectnames[] = {
  140         { "process", RCTL_SUBJECT_TYPE_PROCESS },
  141         { "user", RCTL_SUBJECT_TYPE_USER },
  142         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
  143         { "jail", RCTL_SUBJECT_TYPE_JAIL },
  144         { NULL, -1 }};
  145 
  146 static struct dict resourcenames[] = {
  147         { "cputime", RACCT_CPU },
  148         { "datasize", RACCT_DATA },
  149         { "stacksize", RACCT_STACK },
  150         { "coredumpsize", RACCT_CORE },
  151         { "memoryuse", RACCT_RSS },
  152         { "memorylocked", RACCT_MEMLOCK },
  153         { "maxproc", RACCT_NPROC },
  154         { "openfiles", RACCT_NOFILE },
  155         { "vmemoryuse", RACCT_VMEM },
  156         { "pseudoterminals", RACCT_NPTS },
  157         { "swapuse", RACCT_SWAP },
  158         { "nthr", RACCT_NTHR },
  159         { "msgqqueued", RACCT_MSGQQUEUED },
  160         { "msgqsize", RACCT_MSGQSIZE },
  161         { "nmsgq", RACCT_NMSGQ },
  162         { "nsem", RACCT_NSEM },
  163         { "nsemop", RACCT_NSEMOP },
  164         { "nshm", RACCT_NSHM },
  165         { "shmsize", RACCT_SHMSIZE },
  166         { "wallclock", RACCT_WALLCLOCK },
  167         { "pcpu", RACCT_PCTCPU },
  168         { "readbps", RACCT_READBPS },
  169         { "writebps", RACCT_WRITEBPS },
  170         { "readiops", RACCT_READIOPS },
  171         { "writeiops", RACCT_WRITEIOPS },
  172         { NULL, -1 }};
  173 
  174 static struct dict actionnames[] = {
  175         { "sighup", RCTL_ACTION_SIGHUP },
  176         { "sigint", RCTL_ACTION_SIGINT },
  177         { "sigquit", RCTL_ACTION_SIGQUIT },
  178         { "sigill", RCTL_ACTION_SIGILL },
  179         { "sigtrap", RCTL_ACTION_SIGTRAP },
  180         { "sigabrt", RCTL_ACTION_SIGABRT },
  181         { "sigemt", RCTL_ACTION_SIGEMT },
  182         { "sigfpe", RCTL_ACTION_SIGFPE },
  183         { "sigkill", RCTL_ACTION_SIGKILL },
  184         { "sigbus", RCTL_ACTION_SIGBUS },
  185         { "sigsegv", RCTL_ACTION_SIGSEGV },
  186         { "sigsys", RCTL_ACTION_SIGSYS },
  187         { "sigpipe", RCTL_ACTION_SIGPIPE },
  188         { "sigalrm", RCTL_ACTION_SIGALRM },
  189         { "sigterm", RCTL_ACTION_SIGTERM },
  190         { "sigurg", RCTL_ACTION_SIGURG },
  191         { "sigstop", RCTL_ACTION_SIGSTOP },
  192         { "sigtstp", RCTL_ACTION_SIGTSTP },
  193         { "sigchld", RCTL_ACTION_SIGCHLD },
  194         { "sigttin", RCTL_ACTION_SIGTTIN },
  195         { "sigttou", RCTL_ACTION_SIGTTOU },
  196         { "sigio", RCTL_ACTION_SIGIO },
  197         { "sigxcpu", RCTL_ACTION_SIGXCPU },
  198         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
  199         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
  200         { "sigprof", RCTL_ACTION_SIGPROF },
  201         { "sigwinch", RCTL_ACTION_SIGWINCH },
  202         { "siginfo", RCTL_ACTION_SIGINFO },
  203         { "sigusr1", RCTL_ACTION_SIGUSR1 },
  204         { "sigusr2", RCTL_ACTION_SIGUSR2 },
  205         { "sigthr", RCTL_ACTION_SIGTHR },
  206         { "deny", RCTL_ACTION_DENY },
  207         { "log", RCTL_ACTION_LOG },
  208         { "devctl", RCTL_ACTION_DEVCTL },
  209         { "throttle", RCTL_ACTION_THROTTLE },
  210         { NULL, -1 }};
  211 
  212 static void rctl_init(void);
  213 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
  214 
  215 static uma_zone_t rctl_rule_zone;
  216 static uma_zone_t rctl_rule_link_zone;
  217 
  218 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
  219 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
  220 
  221 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
  222 
  223 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
  224 {
  225         int error, val = rctl_throttle_min;
  226 
  227         error = sysctl_handle_int(oidp, &val, 0, req);
  228         if (error || !req->newptr)
  229                 return (error);
  230         if (val < 1 || val > rctl_throttle_max)
  231                 return (EINVAL);
  232 
  233         RACCT_LOCK();
  234         rctl_throttle_min = val;
  235         RACCT_UNLOCK();
  236 
  237         return (0);
  238 }
  239 
  240 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
  241 {
  242         int error, val = rctl_throttle_max;
  243 
  244         error = sysctl_handle_int(oidp, &val, 0, req);
  245         if (error || !req->newptr)
  246                 return (error);
  247         if (val < rctl_throttle_min)
  248                 return (EINVAL);
  249 
  250         RACCT_LOCK();
  251         rctl_throttle_max = val;
  252         RACCT_UNLOCK();
  253 
  254         return (0);
  255 }
  256 
  257 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
  258 {
  259         int error, val = rctl_throttle_pct;
  260 
  261         error = sysctl_handle_int(oidp, &val, 0, req);
  262         if (error || !req->newptr)
  263                 return (error);
  264         if (val < 0)
  265                 return (EINVAL);
  266 
  267         RACCT_LOCK();
  268         rctl_throttle_pct = val;
  269         RACCT_UNLOCK();
  270 
  271         return (0);
  272 }
  273 
  274 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
  275 {
  276         int error, val = rctl_throttle_pct2;
  277 
  278         error = sysctl_handle_int(oidp, &val, 0, req);
  279         if (error || !req->newptr)
  280                 return (error);
  281         if (val < 0)
  282                 return (EINVAL);
  283 
  284         RACCT_LOCK();
  285         rctl_throttle_pct2 = val;
  286         RACCT_UNLOCK();
  287 
  288         return (0);
  289 }
  290 
  291 static const char *
  292 rctl_subject_type_name(int subject)
  293 {
  294         int i;
  295 
  296         for (i = 0; subjectnames[i].d_name != NULL; i++) {
  297                 if (subjectnames[i].d_value == subject)
  298                         return (subjectnames[i].d_name);
  299         }
  300 
  301         panic("rctl_subject_type_name: unknown subject type %d", subject);
  302 }
  303 
  304 static const char *
  305 rctl_action_name(int action)
  306 {
  307         int i;
  308 
  309         for (i = 0; actionnames[i].d_name != NULL; i++) {
  310                 if (actionnames[i].d_value == action)
  311                         return (actionnames[i].d_name);
  312         }
  313 
  314         panic("rctl_action_name: unknown action %d", action);
  315 }
  316 
  317 const char *
  318 rctl_resource_name(int resource)
  319 {
  320         int i;
  321 
  322         for (i = 0; resourcenames[i].d_name != NULL; i++) {
  323                 if (resourcenames[i].d_value == resource)
  324                         return (resourcenames[i].d_name);
  325         }
  326 
  327         panic("rctl_resource_name: unknown resource %d", resource);
  328 }
  329 
  330 static struct racct *
  331 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
  332 {
  333         struct ucred *cred = p->p_ucred;
  334 
  335         ASSERT_RACCT_ENABLED();
  336         RACCT_LOCK_ASSERT();
  337 
  338         switch (rule->rr_per) {
  339         case RCTL_SUBJECT_TYPE_PROCESS:
  340                 return (p->p_racct);
  341         case RCTL_SUBJECT_TYPE_USER:
  342                 return (cred->cr_ruidinfo->ui_racct);
  343         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  344                 return (cred->cr_loginclass->lc_racct);
  345         case RCTL_SUBJECT_TYPE_JAIL:
  346                 return (cred->cr_prison->pr_prison_racct->prr_racct);
  347         default:
  348                 panic("%s: unknown per %d", __func__, rule->rr_per);
  349         }
  350 }
  351 
  352 /*
  353  * Return the amount of resource that can be allocated by 'p' before
  354  * hitting 'rule'.
  355  */
  356 static int64_t
  357 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
  358 {
  359         const struct racct *racct;
  360         int64_t available;
  361 
  362         ASSERT_RACCT_ENABLED();
  363         RACCT_LOCK_ASSERT();
  364 
  365         racct = rctl_proc_rule_to_racct(p, rule);
  366         available = rule->rr_amount - racct->r_resources[rule->rr_resource];
  367 
  368         return (available);
  369 }
  370 
  371 /*
  372  * Called every second for proc, uidinfo, loginclass, and jail containers.
  373  * If the limit isn't exceeded, it decreases the usage amount to zero.
  374  * Otherwise, it decreases it by the value of the limit.  This way
  375  * resource consumption exceeding the limit "carries over" to the next
  376  * period.
  377  */
  378 void
  379 rctl_throttle_decay(struct racct *racct, int resource)
  380 {
  381         struct rctl_rule *rule;
  382         struct rctl_rule_link *link;
  383         int64_t minavailable;
  384 
  385         ASSERT_RACCT_ENABLED();
  386         RACCT_LOCK_ASSERT();
  387 
  388         minavailable = INT64_MAX;
  389 
  390         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
  391                 rule = link->rrl_rule;
  392 
  393                 if (rule->rr_resource != resource)
  394                         continue;
  395                 if (rule->rr_action != RCTL_ACTION_THROTTLE)
  396                         continue;
  397 
  398                 if (rule->rr_amount < minavailable)
  399                         minavailable = rule->rr_amount;
  400         }
  401 
  402         if (racct->r_resources[resource] < minavailable) {
  403                 racct->r_resources[resource] = 0;
  404         } else {
  405                 /*
  406                  * Cap utilization counter at ten times the limit.  Otherwise,
  407                  * if we changed the rule lowering the allowed amount, it could
  408                  * take unreasonably long time for the accumulated resource
  409                  * usage to drop.
  410                  */
  411                 if (racct->r_resources[resource] > minavailable * 10)
  412                         racct->r_resources[resource] = minavailable * 10;
  413 
  414                 racct->r_resources[resource] -= minavailable;
  415         }
  416 }
  417 
  418 /*
  419  * Special version of rctl_get_available() for the %CPU resource.
  420  * We slightly cheat here and return less than we normally would.
  421  */
  422 int64_t
  423 rctl_pcpu_available(const struct proc *p) {
  424         struct rctl_rule *rule;
  425         struct rctl_rule_link *link;
  426         int64_t available, minavailable, limit;
  427 
  428         ASSERT_RACCT_ENABLED();
  429         RACCT_LOCK_ASSERT();
  430 
  431         minavailable = INT64_MAX;
  432         limit = 0;
  433 
  434         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  435                 rule = link->rrl_rule;
  436                 if (rule->rr_resource != RACCT_PCTCPU)
  437                         continue;
  438                 if (rule->rr_action != RCTL_ACTION_DENY)
  439                         continue;
  440                 available = rctl_available_resource(p, rule);
  441                 if (available < minavailable) {
  442                         minavailable = available;
  443                         limit = rule->rr_amount;
  444                 }
  445         }
  446 
  447         /*
  448          * Return slightly less than actual value of the available
  449          * %cpu resource.  This makes %cpu throttling more aggressive
  450          * and lets us act sooner than the limits are already exceeded.
  451          */
  452         if (limit != 0) {
  453                 if (limit > 2 * RCTL_PCPU_SHIFT)
  454                         minavailable -= RCTL_PCPU_SHIFT;
  455                 else
  456                         minavailable -= (limit / 2);
  457         }
  458 
  459         return (minavailable);
  460 }
  461 
  462 static uint64_t
  463 xadd(uint64_t a, uint64_t b)
  464 {
  465         uint64_t c;
  466 
  467         c = a + b;
  468 
  469         /*
  470          * Detect overflow.
  471          */
  472         if (c < a || c < b)
  473                 return (UINT64_MAX);
  474 
  475         return (c);
  476 }
  477 
  478 static uint64_t
  479 xmul(uint64_t a, uint64_t b)
  480 {
  481 
  482         if (b != 0 && a > UINT64_MAX / b)
  483                 return (UINT64_MAX);
  484 
  485         return (a * b);
  486 }
  487 
  488 /*
  489  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
  490  * to what it keeps allocated now.  Returns non-zero if the allocation should
  491  * be denied, 0 otherwise.
  492  */
  493 int
  494 rctl_enforce(struct proc *p, int resource, uint64_t amount)
  495 {
  496         static struct timeval log_lasttime, devctl_lasttime;
  497         static int log_curtime = 0, devctl_curtime = 0;
  498         struct rctl_rule *rule;
  499         struct rctl_rule_link *link;
  500         struct sbuf sb;
  501         char *buf;
  502         int64_t available;
  503         uint64_t sleep_ms, sleep_ratio;
  504         int should_deny = 0;
  505 
  506         ASSERT_RACCT_ENABLED();
  507         RACCT_LOCK_ASSERT();
  508 
  509         /*
  510          * There may be more than one matching rule; go through all of them.
  511          * Denial should be done last, after logging and sending signals.
  512          */
  513         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  514                 rule = link->rrl_rule;
  515                 if (rule->rr_resource != resource)
  516                         continue;
  517 
  518                 available = rctl_available_resource(p, rule);
  519                 if (available >= (int64_t)amount) {
  520                         link->rrl_exceeded = 0;
  521                         continue;
  522                 }
  523 
  524                 switch (rule->rr_action) {
  525                 case RCTL_ACTION_DENY:
  526                         should_deny = 1;
  527                         continue;
  528                 case RCTL_ACTION_LOG:
  529                         /*
  530                          * If rrl_exceeded != 0, it means we've already
  531                          * logged a warning for this process.
  532                          */
  533                         if (link->rrl_exceeded != 0)
  534                                 continue;
  535 
  536                         /*
  537                          * If the process state is not fully initialized yet,
  538                          * we can't access most of the required fields, e.g.
  539                          * p->p_comm.  This happens when called from fork1().
  540                          * Ignore this rule for now; it will be processed just
  541                          * after fork, when called from racct_proc_fork_done().
  542                          */
  543                         if (p->p_state != PRS_NORMAL)
  544                                 continue;
  545 
  546                         if (!ppsratecheck(&log_lasttime, &log_curtime,
  547                             rctl_log_rate_limit))
  548                                 continue;
  549 
  550                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  551                         if (buf == NULL) {
  552                                 printf("rctl_enforce: out of memory\n");
  553                                 continue;
  554                         }
  555                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  556                         rctl_rule_to_sbuf(&sb, rule);
  557                         sbuf_finish(&sb);
  558                         printf("rctl: rule \"%s\" matched by pid %d "
  559                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
  560                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
  561                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  562                         sbuf_delete(&sb);
  563                         free(buf, M_RCTL);
  564                         link->rrl_exceeded = 1;
  565                         continue;
  566                 case RCTL_ACTION_DEVCTL:
  567                         if (link->rrl_exceeded != 0)
  568                                 continue;
  569 
  570                         if (p->p_state != PRS_NORMAL)
  571                                 continue;
  572 
  573                         if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
  574                             rctl_devctl_rate_limit))
  575                                 continue;
  576 
  577                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  578                         if (buf == NULL) {
  579                                 printf("rctl_enforce: out of memory\n");
  580                                 continue;
  581                         }
  582                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  583                         sbuf_printf(&sb, "rule=");
  584                         rctl_rule_to_sbuf(&sb, rule);
  585                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
  586                             p->p_pid, p->p_ucred->cr_ruid,
  587                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  588                         sbuf_finish(&sb);
  589                         devctl_notify_f("RCTL", "rule", "matched",
  590                             sbuf_data(&sb), M_NOWAIT);
  591                         sbuf_delete(&sb);
  592                         free(buf, M_RCTL);
  593                         link->rrl_exceeded = 1;
  594                         continue;
  595                 case RCTL_ACTION_THROTTLE:
  596                         if (p->p_state != PRS_NORMAL)
  597                                 continue;
  598 
  599                         if (rule->rr_amount == 0) {
  600                                 racct_proc_throttle(p, rctl_throttle_max);
  601                                 continue;
  602                         }
  603 
  604                         /*
  605                          * Make the process sleep for a fraction of second
  606                          * proportional to the ratio of process' resource
  607                          * utilization compared to the limit.  The point is
  608                          * to penalize resource hogs: processes that consume
  609                          * more of the available resources sleep for longer.
  610                          *
  611                          * We're trying to defer division until the very end,
  612                          * to minimize the rounding effects.  The following
  613                          * calculation could have been written in a clearer
  614                          * way like this:
  615                          *
  616                          * sleep_ms = hz * p->p_racct->r_resources[resource] /
  617                          *     rule->rr_amount;
  618                          * sleep_ms *= rctl_throttle_pct / 100;
  619                          * if (sleep_ms < rctl_throttle_min)
  620                          *         sleep_ms = rctl_throttle_min;
  621                          *
  622                          */
  623                         sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
  624                         sleep_ms = xmul(sleep_ms,  rctl_throttle_pct) / 100;
  625                         if (sleep_ms < rctl_throttle_min * rule->rr_amount)
  626                                 sleep_ms = rctl_throttle_min * rule->rr_amount;
  627 
  628                         /*
  629                          * Multiply that by the ratio of the resource
  630                          * consumption for the container compared to the limit,
  631                          * squared.  In other words, a process in a container
  632                          * that is two times over the limit will be throttled
  633                          * four times as much for hitting the same rule.  The
  634                          * point is to penalize processes more if the container
  635                          * itself (eg certain UID or jail) is above the limit.
  636                          */
  637                         if (available < 0)
  638                                 sleep_ratio = -available / rule->rr_amount;
  639                         else
  640                                 sleep_ratio = 0;
  641                         sleep_ratio = xmul(sleep_ratio, sleep_ratio);
  642                         sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
  643                         sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
  644 
  645                         /*
  646                          * Finally the division.
  647                          */
  648                         sleep_ms /= rule->rr_amount;
  649 
  650                         if (sleep_ms > rctl_throttle_max)
  651                                 sleep_ms = rctl_throttle_max;
  652 #if 0
  653                         printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
  654                            __func__, p->p_pid, p->p_comm,
  655                            p->p_racct->r_resources[resource],
  656                            rule->rr_amount, (uintmax_t)sleep_ms,
  657                            (uintmax_t)sleep_ratio, (intmax_t)available);
  658 #endif
  659 
  660                         KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
  661                             __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
  662                         racct_proc_throttle(p, sleep_ms);
  663                         continue;
  664                 default:
  665                         if (link->rrl_exceeded != 0)
  666                                 continue;
  667 
  668                         if (p->p_state != PRS_NORMAL)
  669                                 continue;
  670 
  671                         KASSERT(rule->rr_action > 0 &&
  672                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
  673                             ("rctl_enforce: unknown action %d",
  674                              rule->rr_action));
  675 
  676                         /*
  677                          * We're using the fact that RCTL_ACTION_SIG* values
  678                          * are equal to their counterparts from sys/signal.h.
  679                          */
  680                         kern_psignal(p, rule->rr_action);
  681                         link->rrl_exceeded = 1;
  682                         continue;
  683                 }
  684         }
  685 
  686         if (should_deny) {
  687                 /*
  688                  * Return fake error code; the caller should change it
  689                  * into one proper for the situation - EFSIZ, ENOMEM etc.
  690                  */
  691                 return (EDOOFUS);
  692         }
  693 
  694         return (0);
  695 }
  696 
  697 uint64_t
  698 rctl_get_limit(struct proc *p, int resource)
  699 {
  700         struct rctl_rule *rule;
  701         struct rctl_rule_link *link;
  702         uint64_t amount = UINT64_MAX;
  703 
  704         ASSERT_RACCT_ENABLED();
  705         RACCT_LOCK_ASSERT();
  706 
  707         /*
  708          * There may be more than one matching rule; go through all of them.
  709          * Denial should be done last, after logging and sending signals.
  710          */
  711         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  712                 rule = link->rrl_rule;
  713                 if (rule->rr_resource != resource)
  714                         continue;
  715                 if (rule->rr_action != RCTL_ACTION_DENY)
  716                         continue;
  717                 if (rule->rr_amount < amount)
  718                         amount = rule->rr_amount;
  719         }
  720 
  721         return (amount);
  722 }
  723 
  724 uint64_t
  725 rctl_get_available(struct proc *p, int resource)
  726 {
  727         struct rctl_rule *rule;
  728         struct rctl_rule_link *link;
  729         int64_t available, minavailable, allocated;
  730 
  731         minavailable = INT64_MAX;
  732 
  733         ASSERT_RACCT_ENABLED();
  734         RACCT_LOCK_ASSERT();
  735 
  736         /*
  737          * There may be more than one matching rule; go through all of them.
  738          * Denial should be done last, after logging and sending signals.
  739          */
  740         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  741                 rule = link->rrl_rule;
  742                 if (rule->rr_resource != resource)
  743                         continue;
  744                 if (rule->rr_action != RCTL_ACTION_DENY)
  745                         continue;
  746                 available = rctl_available_resource(p, rule);
  747                 if (available < minavailable)
  748                         minavailable = available;
  749         }
  750 
  751         /*
  752          * XXX: Think about this _hard_.
  753          */
  754         allocated = p->p_racct->r_resources[resource];
  755         if (minavailable < INT64_MAX - allocated)
  756                 minavailable += allocated;
  757         if (minavailable < 0)
  758                 minavailable = 0;
  759 
  760         return (minavailable);
  761 }
  762 
  763 static int
  764 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
  765 {
  766 
  767         ASSERT_RACCT_ENABLED();
  768 
  769         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
  770                 if (rule->rr_subject_type != filter->rr_subject_type)
  771                         return (0);
  772 
  773                 switch (filter->rr_subject_type) {
  774                 case RCTL_SUBJECT_TYPE_PROCESS:
  775                         if (filter->rr_subject.rs_proc != NULL &&
  776                             rule->rr_subject.rs_proc !=
  777                             filter->rr_subject.rs_proc)
  778                                 return (0);
  779                         break;
  780                 case RCTL_SUBJECT_TYPE_USER:
  781                         if (filter->rr_subject.rs_uip != NULL &&
  782                             rule->rr_subject.rs_uip !=
  783                             filter->rr_subject.rs_uip)
  784                                 return (0);
  785                         break;
  786                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
  787                         if (filter->rr_subject.rs_loginclass != NULL &&
  788                             rule->rr_subject.rs_loginclass !=
  789                             filter->rr_subject.rs_loginclass)
  790                                 return (0);
  791                         break;
  792                 case RCTL_SUBJECT_TYPE_JAIL:
  793                         if (filter->rr_subject.rs_prison_racct != NULL &&
  794                             rule->rr_subject.rs_prison_racct !=
  795                             filter->rr_subject.rs_prison_racct)
  796                                 return (0);
  797                         break;
  798                 default:
  799                         panic("rctl_rule_matches: unknown subject type %d",
  800                             filter->rr_subject_type);
  801                 }
  802         }
  803 
  804         if (filter->rr_resource != RACCT_UNDEFINED) {
  805                 if (rule->rr_resource != filter->rr_resource)
  806                         return (0);
  807         }
  808 
  809         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
  810                 if (rule->rr_action != filter->rr_action)
  811                         return (0);
  812         }
  813 
  814         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
  815                 if (rule->rr_amount != filter->rr_amount)
  816                         return (0);
  817         }
  818 
  819         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
  820                 if (rule->rr_per != filter->rr_per)
  821                         return (0);
  822         }
  823 
  824         return (1);
  825 }
  826 
  827 static int
  828 str2value(const char *str, int *value, struct dict *table)
  829 {
  830         int i;
  831 
  832         if (value == NULL)
  833                 return (EINVAL);
  834 
  835         for (i = 0; table[i].d_name != NULL; i++) {
  836                 if (strcasecmp(table[i].d_name, str) == 0) {
  837                         *value =  table[i].d_value;
  838                         return (0);
  839                 }
  840         }
  841 
  842         return (EINVAL);
  843 }
  844 
  845 static int
  846 str2id(const char *str, id_t *value)
  847 {
  848         char *end;
  849 
  850         if (str == NULL)
  851                 return (EINVAL);
  852 
  853         *value = strtoul(str, &end, 10);
  854         if ((size_t)(end - str) != strlen(str))
  855                 return (EINVAL);
  856 
  857         return (0);
  858 }
  859 
  860 static int
  861 str2int64(const char *str, int64_t *value)
  862 {
  863         char *end;
  864 
  865         if (str == NULL)
  866                 return (EINVAL);
  867 
  868         *value = strtoul(str, &end, 10);
  869         if ((size_t)(end - str) != strlen(str))
  870                 return (EINVAL);
  871 
  872         if (*value < 0)
  873                 return (ERANGE);
  874 
  875         return (0);
  876 }
  877 
  878 /*
  879  * Connect the rule to the racct, increasing refcount for the rule.
  880  */
  881 static void
  882 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
  883 {
  884         struct rctl_rule_link *link;
  885 
  886         ASSERT_RACCT_ENABLED();
  887         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  888 
  889         rctl_rule_acquire(rule);
  890         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
  891         link->rrl_rule = rule;
  892         link->rrl_exceeded = 0;
  893 
  894         RACCT_LOCK();
  895         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  896         RACCT_UNLOCK();
  897 }
  898 
  899 static int
  900 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
  901 {
  902         struct rctl_rule_link *link;
  903 
  904         ASSERT_RACCT_ENABLED();
  905         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  906         RACCT_LOCK_ASSERT();
  907 
  908         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
  909         if (link == NULL)
  910                 return (ENOMEM);
  911         rctl_rule_acquire(rule);
  912         link->rrl_rule = rule;
  913         link->rrl_exceeded = 0;
  914 
  915         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  916 
  917         return (0);
  918 }
  919 
  920 /*
  921  * Remove limits for a rules matching the filter and release
  922  * the refcounts for the rules, possibly freeing them.  Returns
  923  * the number of limit structures removed.
  924  */
  925 static int
  926 rctl_racct_remove_rules(struct racct *racct,
  927     const struct rctl_rule *filter)
  928 {
  929         struct rctl_rule_link *link, *linktmp;
  930         int removed = 0;
  931 
  932         ASSERT_RACCT_ENABLED();
  933         RACCT_LOCK_ASSERT();
  934 
  935         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
  936                 if (!rctl_rule_matches(link->rrl_rule, filter))
  937                         continue;
  938 
  939                 LIST_REMOVE(link, rrl_next);
  940                 rctl_rule_release(link->rrl_rule);
  941                 uma_zfree(rctl_rule_link_zone, link);
  942                 removed++;
  943         }
  944         return (removed);
  945 }
  946 
  947 static void
  948 rctl_rule_acquire_subject(struct rctl_rule *rule)
  949 {
  950 
  951         ASSERT_RACCT_ENABLED();
  952 
  953         switch (rule->rr_subject_type) {
  954         case RCTL_SUBJECT_TYPE_UNDEFINED:
  955         case RCTL_SUBJECT_TYPE_PROCESS:
  956                 break;
  957         case RCTL_SUBJECT_TYPE_JAIL:
  958                 if (rule->rr_subject.rs_prison_racct != NULL)
  959                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
  960                 break;
  961         case RCTL_SUBJECT_TYPE_USER:
  962                 if (rule->rr_subject.rs_uip != NULL)
  963                         uihold(rule->rr_subject.rs_uip);
  964                 break;
  965         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  966                 if (rule->rr_subject.rs_loginclass != NULL)
  967                         loginclass_hold(rule->rr_subject.rs_loginclass);
  968                 break;
  969         default:
  970                 panic("rctl_rule_acquire_subject: unknown subject type %d",
  971                     rule->rr_subject_type);
  972         }
  973 }
  974 
  975 static void
  976 rctl_rule_release_subject(struct rctl_rule *rule)
  977 {
  978 
  979         ASSERT_RACCT_ENABLED();
  980 
  981         switch (rule->rr_subject_type) {
  982         case RCTL_SUBJECT_TYPE_UNDEFINED:
  983         case RCTL_SUBJECT_TYPE_PROCESS:
  984                 break;
  985         case RCTL_SUBJECT_TYPE_JAIL:
  986                 if (rule->rr_subject.rs_prison_racct != NULL)
  987                         prison_racct_free(rule->rr_subject.rs_prison_racct);
  988                 break;
  989         case RCTL_SUBJECT_TYPE_USER:
  990                 if (rule->rr_subject.rs_uip != NULL)
  991                         uifree(rule->rr_subject.rs_uip);
  992                 break;
  993         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  994                 if (rule->rr_subject.rs_loginclass != NULL)
  995                         loginclass_free(rule->rr_subject.rs_loginclass);
  996                 break;
  997         default:
  998                 panic("rctl_rule_release_subject: unknown subject type %d",
  999                     rule->rr_subject_type);
 1000         }
 1001 }
 1002 
 1003 struct rctl_rule *
 1004 rctl_rule_alloc(int flags)
 1005 {
 1006         struct rctl_rule *rule;
 1007 
 1008         ASSERT_RACCT_ENABLED();
 1009 
 1010         rule = uma_zalloc(rctl_rule_zone, flags);
 1011         if (rule == NULL)
 1012                 return (NULL);
 1013         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1014         rule->rr_subject.rs_proc = NULL;
 1015         rule->rr_subject.rs_uip = NULL;
 1016         rule->rr_subject.rs_loginclass = NULL;
 1017         rule->rr_subject.rs_prison_racct = NULL;
 1018         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1019         rule->rr_resource = RACCT_UNDEFINED;
 1020         rule->rr_action = RCTL_ACTION_UNDEFINED;
 1021         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1022         refcount_init(&rule->rr_refcount, 1);
 1023 
 1024         return (rule);
 1025 }
 1026 
 1027 struct rctl_rule *
 1028 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
 1029 {
 1030         struct rctl_rule *copy;
 1031 
 1032         ASSERT_RACCT_ENABLED();
 1033 
 1034         copy = uma_zalloc(rctl_rule_zone, flags);
 1035         if (copy == NULL)
 1036                 return (NULL);
 1037         copy->rr_subject_type = rule->rr_subject_type;
 1038         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
 1039         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
 1040         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
 1041         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
 1042         copy->rr_per = rule->rr_per;
 1043         copy->rr_resource = rule->rr_resource;
 1044         copy->rr_action = rule->rr_action;
 1045         copy->rr_amount = rule->rr_amount;
 1046         refcount_init(&copy->rr_refcount, 1);
 1047         rctl_rule_acquire_subject(copy);
 1048 
 1049         return (copy);
 1050 }
 1051 
 1052 void
 1053 rctl_rule_acquire(struct rctl_rule *rule)
 1054 {
 1055 
 1056         ASSERT_RACCT_ENABLED();
 1057         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1058 
 1059         refcount_acquire(&rule->rr_refcount);
 1060 }
 1061 
 1062 static void
 1063 rctl_rule_free(void *context, int pending)
 1064 {
 1065         struct rctl_rule *rule;
 1066         
 1067         rule = (struct rctl_rule *)context;
 1068 
 1069         ASSERT_RACCT_ENABLED();
 1070         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
 1071         
 1072         /*
 1073          * We don't need locking here; rule is guaranteed to be inaccessible.
 1074          */
 1075         
 1076         rctl_rule_release_subject(rule);
 1077         uma_zfree(rctl_rule_zone, rule);
 1078 }
 1079 
 1080 void
 1081 rctl_rule_release(struct rctl_rule *rule)
 1082 {
 1083 
 1084         ASSERT_RACCT_ENABLED();
 1085         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1086 
 1087         if (refcount_release(&rule->rr_refcount)) {
 1088                 /*
 1089                  * rctl_rule_release() is often called when iterating
 1090                  * over all the uidinfo structures in the system,
 1091                  * holding uihashtbl_lock.  Since rctl_rule_free()
 1092                  * might end up calling uifree(), this would lead
 1093                  * to lock recursion.  Use taskqueue to avoid this.
 1094                  */
 1095                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
 1096                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
 1097         }
 1098 }
 1099 
 1100 static int
 1101 rctl_rule_fully_specified(const struct rctl_rule *rule)
 1102 {
 1103 
 1104         ASSERT_RACCT_ENABLED();
 1105 
 1106         switch (rule->rr_subject_type) {
 1107         case RCTL_SUBJECT_TYPE_UNDEFINED:
 1108                 return (0);
 1109         case RCTL_SUBJECT_TYPE_PROCESS:
 1110                 if (rule->rr_subject.rs_proc == NULL)
 1111                         return (0);
 1112                 break;
 1113         case RCTL_SUBJECT_TYPE_USER:
 1114                 if (rule->rr_subject.rs_uip == NULL)
 1115                         return (0);
 1116                 break;
 1117         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1118                 if (rule->rr_subject.rs_loginclass == NULL)
 1119                         return (0);
 1120                 break;
 1121         case RCTL_SUBJECT_TYPE_JAIL:
 1122                 if (rule->rr_subject.rs_prison_racct == NULL)
 1123                         return (0);
 1124                 break;
 1125         default:
 1126                 panic("rctl_rule_fully_specified: unknown subject type %d",
 1127                     rule->rr_subject_type);
 1128         }
 1129         if (rule->rr_resource == RACCT_UNDEFINED)
 1130                 return (0);
 1131         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
 1132                 return (0);
 1133         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
 1134                 return (0);
 1135         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
 1136                 return (0);
 1137 
 1138         return (1);
 1139 }
 1140 
 1141 static int
 1142 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
 1143 {
 1144         struct rctl_rule *rule;
 1145         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
 1146              *amountstr, *perstr;
 1147         id_t id;
 1148         int error = 0;
 1149 
 1150         ASSERT_RACCT_ENABLED();
 1151 
 1152         rule = rctl_rule_alloc(M_WAITOK);
 1153 
 1154         subjectstr = strsep(&rulestr, ":");
 1155         subject_idstr = strsep(&rulestr, ":");
 1156         resourcestr = strsep(&rulestr, ":");
 1157         actionstr = strsep(&rulestr, "=/");
 1158         amountstr = strsep(&rulestr, "/");
 1159         perstr = rulestr;
 1160 
 1161         if (subjectstr == NULL || subjectstr[0] == '\0')
 1162                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1163         else {
 1164                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
 1165                 if (error != 0)
 1166                         goto out;
 1167         }
 1168 
 1169         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
 1170                 rule->rr_subject.rs_proc = NULL;
 1171                 rule->rr_subject.rs_uip = NULL;
 1172                 rule->rr_subject.rs_loginclass = NULL;
 1173                 rule->rr_subject.rs_prison_racct = NULL;
 1174         } else {
 1175                 switch (rule->rr_subject_type) {
 1176                 case RCTL_SUBJECT_TYPE_UNDEFINED:
 1177                         error = EINVAL;
 1178                         goto out;
 1179                 case RCTL_SUBJECT_TYPE_PROCESS:
 1180                         error = str2id(subject_idstr, &id);
 1181                         if (error != 0)
 1182                                 goto out;
 1183                         sx_assert(&allproc_lock, SA_LOCKED);
 1184                         rule->rr_subject.rs_proc = pfind(id);
 1185                         if (rule->rr_subject.rs_proc == NULL) {
 1186                                 error = ESRCH;
 1187                                 goto out;
 1188                         }
 1189                         PROC_UNLOCK(rule->rr_subject.rs_proc);
 1190                         break;
 1191                 case RCTL_SUBJECT_TYPE_USER:
 1192                         error = str2id(subject_idstr, &id);
 1193                         if (error != 0)
 1194                                 goto out;
 1195                         rule->rr_subject.rs_uip = uifind(id);
 1196                         break;
 1197                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1198                         rule->rr_subject.rs_loginclass =
 1199                             loginclass_find(subject_idstr);
 1200                         if (rule->rr_subject.rs_loginclass == NULL) {
 1201                                 error = ENAMETOOLONG;
 1202                                 goto out;
 1203                         }
 1204                         break;
 1205                 case RCTL_SUBJECT_TYPE_JAIL:
 1206                         rule->rr_subject.rs_prison_racct =
 1207                             prison_racct_find(subject_idstr);
 1208                         if (rule->rr_subject.rs_prison_racct == NULL) {
 1209                                 error = ENAMETOOLONG;
 1210                                 goto out;
 1211                         }
 1212                         break;
 1213                default:
 1214                        panic("rctl_string_to_rule: unknown subject type %d",
 1215                            rule->rr_subject_type);
 1216                }
 1217         }
 1218 
 1219         if (resourcestr == NULL || resourcestr[0] == '\0')
 1220                 rule->rr_resource = RACCT_UNDEFINED;
 1221         else {
 1222                 error = str2value(resourcestr, &rule->rr_resource,
 1223                     resourcenames);
 1224                 if (error != 0)
 1225                         goto out;
 1226         }
 1227 
 1228         if (actionstr == NULL || actionstr[0] == '\0')
 1229                 rule->rr_action = RCTL_ACTION_UNDEFINED;
 1230         else {
 1231                 error = str2value(actionstr, &rule->rr_action, actionnames);
 1232                 if (error != 0)
 1233                         goto out;
 1234         }
 1235 
 1236         if (amountstr == NULL || amountstr[0] == '\0')
 1237                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1238         else {
 1239                 error = str2int64(amountstr, &rule->rr_amount);
 1240                 if (error != 0)
 1241                         goto out;
 1242                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
 1243                         if (rule->rr_amount > INT64_MAX / 1000000) {
 1244                                 error = ERANGE;
 1245                                 goto out;
 1246                         }
 1247                         rule->rr_amount *= 1000000;
 1248                 }
 1249         }
 1250 
 1251         if (perstr == NULL || perstr[0] == '\0')
 1252                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1253         else {
 1254                 error = str2value(perstr, &rule->rr_per, subjectnames);
 1255                 if (error != 0)
 1256                         goto out;
 1257         }
 1258 
 1259 out:
 1260         if (error == 0)
 1261                 *rulep = rule;
 1262         else
 1263                 rctl_rule_release(rule);
 1264 
 1265         return (error);
 1266 }
 1267 
 1268 /*
 1269  * Link a rule with all the subjects it applies to.
 1270  */
 1271 int
 1272 rctl_rule_add(struct rctl_rule *rule)
 1273 {
 1274         struct proc *p;
 1275         struct ucred *cred;
 1276         struct uidinfo *uip;
 1277         struct prison *pr;
 1278         struct prison_racct *prr;
 1279         struct loginclass *lc;
 1280         struct rctl_rule *rule2;
 1281         int match;
 1282 
 1283         ASSERT_RACCT_ENABLED();
 1284         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 1285 
 1286         /*
 1287          * Some rules just don't make sense, like "deny" rule for an undeniable
 1288          * resource.  The exception are the RSS and %CPU resources - they are
 1289          * not deniable in the racct sense, but the limit is enforced in
 1290          * a different way.
 1291          */
 1292         if (rule->rr_action == RCTL_ACTION_DENY &&
 1293             !RACCT_IS_DENIABLE(rule->rr_resource) &&
 1294             rule->rr_resource != RACCT_RSS &&
 1295             rule->rr_resource != RACCT_PCTCPU) {
 1296                 return (EOPNOTSUPP);
 1297         }
 1298 
 1299         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1300             !RACCT_IS_DECAYING(rule->rr_resource)) {
 1301                 return (EOPNOTSUPP);
 1302         }
 1303 
 1304         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1305             rule->rr_resource == RACCT_PCTCPU) {
 1306                 return (EOPNOTSUPP);
 1307         }
 1308 
 1309         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
 1310             RACCT_IS_SLOPPY(rule->rr_resource)) {
 1311                 return (EOPNOTSUPP);
 1312         }
 1313 
 1314         /*
 1315          * Make sure there are no duplicated rules.  Also, for the "deny"
 1316          * rules, remove ones differing only by "amount".
 1317          */
 1318         if (rule->rr_action == RCTL_ACTION_DENY) {
 1319                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
 1320                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1321                 rctl_rule_remove(rule2);
 1322                 rctl_rule_release(rule2);
 1323         } else
 1324                 rctl_rule_remove(rule);
 1325 
 1326         switch (rule->rr_subject_type) {
 1327         case RCTL_SUBJECT_TYPE_PROCESS:
 1328                 p = rule->rr_subject.rs_proc;
 1329                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
 1330 
 1331                 rctl_racct_add_rule(p->p_racct, rule);
 1332                 /*
 1333                  * In case of per-process rule, we don't have anything more
 1334                  * to do.
 1335                  */
 1336                 return (0);
 1337 
 1338         case RCTL_SUBJECT_TYPE_USER:
 1339                 uip = rule->rr_subject.rs_uip;
 1340                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
 1341                 rctl_racct_add_rule(uip->ui_racct, rule);
 1342                 break;
 1343 
 1344         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1345                 lc = rule->rr_subject.rs_loginclass;
 1346                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
 1347                 rctl_racct_add_rule(lc->lc_racct, rule);
 1348                 break;
 1349 
 1350         case RCTL_SUBJECT_TYPE_JAIL:
 1351                 prr = rule->rr_subject.rs_prison_racct;
 1352                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
 1353                 rctl_racct_add_rule(prr->prr_racct, rule);
 1354                 break;
 1355 
 1356         default:
 1357                 panic("rctl_rule_add: unknown subject type %d",
 1358                     rule->rr_subject_type);
 1359         }
 1360 
 1361         /*
 1362          * Now go through all the processes and add the new rule to the ones
 1363          * it applies to.
 1364          */
 1365         sx_assert(&allproc_lock, SA_LOCKED);
 1366         FOREACH_PROC_IN_SYSTEM(p) {
 1367                 cred = p->p_ucred;
 1368                 switch (rule->rr_subject_type) {
 1369                 case RCTL_SUBJECT_TYPE_USER:
 1370                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
 1371                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
 1372                                 break;
 1373                         continue;
 1374                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1375                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
 1376                                 break;
 1377                         continue;
 1378                 case RCTL_SUBJECT_TYPE_JAIL:
 1379                         match = 0;
 1380                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
 1381                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
 1382                                         match = 1;
 1383                                         break;
 1384                                 }
 1385                         }
 1386                         if (match)
 1387                                 break;
 1388                         continue;
 1389                 default:
 1390                         panic("rctl_rule_add: unknown subject type %d",
 1391                             rule->rr_subject_type);
 1392                 }
 1393 
 1394                 rctl_racct_add_rule(p->p_racct, rule);
 1395         }
 1396 
 1397         return (0);
 1398 }
 1399 
 1400 static void
 1401 rctl_rule_pre_callback(void)
 1402 {
 1403 
 1404         RACCT_LOCK();
 1405 }
 1406 
 1407 static void
 1408 rctl_rule_post_callback(void)
 1409 {
 1410 
 1411         RACCT_UNLOCK();
 1412 }
 1413 
 1414 static void
 1415 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
 1416 {
 1417         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1418         int found = 0;
 1419 
 1420         ASSERT_RACCT_ENABLED();
 1421         RACCT_LOCK_ASSERT();
 1422 
 1423         found += rctl_racct_remove_rules(racct, filter);
 1424 
 1425         *((int *)arg3) += found;
 1426 }
 1427 
 1428 /*
 1429  * Remove all rules that match the filter.
 1430  */
 1431 int
 1432 rctl_rule_remove(struct rctl_rule *filter)
 1433 {
 1434         struct proc *p;
 1435         int found = 0;
 1436 
 1437         ASSERT_RACCT_ENABLED();
 1438 
 1439         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
 1440             filter->rr_subject.rs_proc != NULL) {
 1441                 p = filter->rr_subject.rs_proc;
 1442                 RACCT_LOCK();
 1443                 found = rctl_racct_remove_rules(p->p_racct, filter);
 1444                 RACCT_UNLOCK();
 1445                 if (found)
 1446                         return (0);
 1447                 return (ESRCH);
 1448         }
 1449 
 1450         loginclass_racct_foreach(rctl_rule_remove_callback,
 1451             rctl_rule_pre_callback, rctl_rule_post_callback,
 1452             filter, (void *)&found);
 1453         ui_racct_foreach(rctl_rule_remove_callback,
 1454             rctl_rule_pre_callback, rctl_rule_post_callback,
 1455             filter, (void *)&found);
 1456         prison_racct_foreach(rctl_rule_remove_callback,
 1457             rctl_rule_pre_callback, rctl_rule_post_callback,
 1458             filter, (void *)&found);
 1459 
 1460         sx_assert(&allproc_lock, SA_LOCKED);
 1461         RACCT_LOCK();
 1462         FOREACH_PROC_IN_SYSTEM(p) {
 1463                 found += rctl_racct_remove_rules(p->p_racct, filter);
 1464         }
 1465         RACCT_UNLOCK();
 1466 
 1467         if (found)
 1468                 return (0);
 1469         return (ESRCH);
 1470 }
 1471 
 1472 /*
 1473  * Appends a rule to the sbuf.
 1474  */
 1475 static void
 1476 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
 1477 {
 1478         int64_t amount;
 1479 
 1480         ASSERT_RACCT_ENABLED();
 1481 
 1482         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
 1483 
 1484         switch (rule->rr_subject_type) {
 1485         case RCTL_SUBJECT_TYPE_PROCESS:
 1486                 if (rule->rr_subject.rs_proc == NULL)
 1487                         sbuf_printf(sb, ":");
 1488                 else
 1489                         sbuf_printf(sb, "%d:",
 1490                             rule->rr_subject.rs_proc->p_pid);
 1491                 break;
 1492         case RCTL_SUBJECT_TYPE_USER:
 1493                 if (rule->rr_subject.rs_uip == NULL)
 1494                         sbuf_printf(sb, ":");
 1495                 else
 1496                         sbuf_printf(sb, "%d:",
 1497                             rule->rr_subject.rs_uip->ui_uid);
 1498                 break;
 1499         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1500                 if (rule->rr_subject.rs_loginclass == NULL)
 1501                         sbuf_printf(sb, ":");
 1502                 else
 1503                         sbuf_printf(sb, "%s:",
 1504                             rule->rr_subject.rs_loginclass->lc_name);
 1505                 break;
 1506         case RCTL_SUBJECT_TYPE_JAIL:
 1507                 if (rule->rr_subject.rs_prison_racct == NULL)
 1508                         sbuf_printf(sb, ":");
 1509                 else
 1510                         sbuf_printf(sb, "%s:",
 1511                             rule->rr_subject.rs_prison_racct->prr_name);
 1512                 break;
 1513         default:
 1514                 panic("rctl_rule_to_sbuf: unknown subject type %d",
 1515                     rule->rr_subject_type);
 1516         }
 1517 
 1518         amount = rule->rr_amount;
 1519         if (amount != RCTL_AMOUNT_UNDEFINED &&
 1520             RACCT_IS_IN_MILLIONS(rule->rr_resource))
 1521                 amount /= 1000000;
 1522 
 1523         sbuf_printf(sb, "%s:%s=%jd",
 1524             rctl_resource_name(rule->rr_resource),
 1525             rctl_action_name(rule->rr_action),
 1526             amount);
 1527 
 1528         if (rule->rr_per != rule->rr_subject_type)
 1529                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
 1530 }
 1531 
 1532 /*
 1533  * Routine used by RCTL syscalls to read in input string.
 1534  */
 1535 static int
 1536 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
 1537 {
 1538         char *str;
 1539         int error;
 1540 
 1541         ASSERT_RACCT_ENABLED();
 1542 
 1543         if (inbuflen <= 0)
 1544                 return (EINVAL);
 1545         if (inbuflen > RCTL_MAX_INBUFSIZE)
 1546                 return (E2BIG);
 1547 
 1548         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
 1549         error = copyinstr(inbufp, str, inbuflen, NULL);
 1550         if (error != 0) {
 1551                 free(str, M_RCTL);
 1552                 return (error);
 1553         }
 1554 
 1555         *inputstr = str;
 1556 
 1557         return (0);
 1558 }
 1559 
 1560 /*
 1561  * Routine used by RCTL syscalls to write out output string.
 1562  */
 1563 static int
 1564 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
 1565 {
 1566         int error;
 1567 
 1568         ASSERT_RACCT_ENABLED();
 1569 
 1570         if (outputsbuf == NULL)
 1571                 return (0);
 1572 
 1573         sbuf_finish(outputsbuf);
 1574         if (outbuflen < sbuf_len(outputsbuf) + 1) {
 1575                 sbuf_delete(outputsbuf);
 1576                 return (ERANGE);
 1577         }
 1578         error = copyout(sbuf_data(outputsbuf), outbufp,
 1579             sbuf_len(outputsbuf) + 1);
 1580         sbuf_delete(outputsbuf);
 1581         return (error);
 1582 }
 1583 
 1584 static struct sbuf *
 1585 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
 1586 {
 1587         struct sbuf *sb;
 1588         int64_t amount;
 1589         int i;
 1590 
 1591         ASSERT_RACCT_ENABLED();
 1592 
 1593         sb = sbuf_new_auto();
 1594         for (i = 0; i <= RACCT_MAX; i++) {
 1595                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
 1596                         continue;
 1597                 RACCT_LOCK();
 1598                 amount = racct->r_resources[i];
 1599                 RACCT_UNLOCK();
 1600                 if (RACCT_IS_IN_MILLIONS(i))
 1601                         amount /= 1000000;
 1602                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
 1603         }
 1604         sbuf_setpos(sb, sbuf_len(sb) - 1);
 1605         return (sb);
 1606 }
 1607 
 1608 int
 1609 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 1610 {
 1611         struct rctl_rule *filter;
 1612         struct sbuf *outputsbuf = NULL;
 1613         struct proc *p;
 1614         struct uidinfo *uip;
 1615         struct loginclass *lc;
 1616         struct prison_racct *prr;
 1617         char *inputstr;
 1618         int error;
 1619 
 1620         if (!racct_enable)
 1621                 return (ENOSYS);
 1622 
 1623         error = priv_check(td, PRIV_RCTL_GET_RACCT);
 1624         if (error != 0)
 1625                 return (error);
 1626 
 1627         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1628         if (error != 0)
 1629                 return (error);
 1630 
 1631         sx_slock(&allproc_lock);
 1632         error = rctl_string_to_rule(inputstr, &filter);
 1633         free(inputstr, M_RCTL);
 1634         if (error != 0) {
 1635                 sx_sunlock(&allproc_lock);
 1636                 return (error);
 1637         }
 1638 
 1639         switch (filter->rr_subject_type) {
 1640         case RCTL_SUBJECT_TYPE_PROCESS:
 1641                 p = filter->rr_subject.rs_proc;
 1642                 if (p == NULL) {
 1643                         error = EINVAL;
 1644                         goto out;
 1645                 }
 1646                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
 1647                 break;
 1648         case RCTL_SUBJECT_TYPE_USER:
 1649                 uip = filter->rr_subject.rs_uip;
 1650                 if (uip == NULL) {
 1651                         error = EINVAL;
 1652                         goto out;
 1653                 }
 1654                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
 1655                 break;
 1656         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1657                 lc = filter->rr_subject.rs_loginclass;
 1658                 if (lc == NULL) {
 1659                         error = EINVAL;
 1660                         goto out;
 1661                 }
 1662                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
 1663                 break;
 1664         case RCTL_SUBJECT_TYPE_JAIL:
 1665                 prr = filter->rr_subject.rs_prison_racct;
 1666                 if (prr == NULL) {
 1667                         error = EINVAL;
 1668                         goto out;
 1669                 }
 1670                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
 1671                 break;
 1672         default:
 1673                 error = EINVAL;
 1674         }
 1675 out:
 1676         rctl_rule_release(filter);
 1677         sx_sunlock(&allproc_lock);
 1678         if (error != 0)
 1679                 return (error);
 1680 
 1681         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
 1682 
 1683         return (error);
 1684 }
 1685 
 1686 static void
 1687 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
 1688 {
 1689         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1690         struct rctl_rule_link *link;
 1691         struct sbuf *sb = (struct sbuf *)arg3;
 1692 
 1693         ASSERT_RACCT_ENABLED();
 1694         RACCT_LOCK_ASSERT();
 1695 
 1696         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
 1697                 if (!rctl_rule_matches(link->rrl_rule, filter))
 1698                         continue;
 1699                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1700                 sbuf_printf(sb, ",");
 1701         }
 1702 }
 1703 
 1704 int
 1705 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 1706 {
 1707         struct sbuf *sb;
 1708         struct rctl_rule *filter;
 1709         struct rctl_rule_link *link;
 1710         struct proc *p;
 1711         char *inputstr, *buf;
 1712         size_t bufsize;
 1713         int error;
 1714 
 1715         if (!racct_enable)
 1716                 return (ENOSYS);
 1717 
 1718         error = priv_check(td, PRIV_RCTL_GET_RULES);
 1719         if (error != 0)
 1720                 return (error);
 1721 
 1722         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1723         if (error != 0)
 1724                 return (error);
 1725 
 1726         sx_slock(&allproc_lock);
 1727         error = rctl_string_to_rule(inputstr, &filter);
 1728         free(inputstr, M_RCTL);
 1729         if (error != 0) {
 1730                 sx_sunlock(&allproc_lock);
 1731                 return (error);
 1732         }
 1733 
 1734         bufsize = uap->outbuflen;
 1735         if (bufsize > rctl_maxbufsize) {
 1736                 sx_sunlock(&allproc_lock);
 1737                 return (E2BIG);
 1738         }
 1739 
 1740         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1741         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1742         KASSERT(sb != NULL, ("sbuf_new failed"));
 1743 
 1744         FOREACH_PROC_IN_SYSTEM(p) {
 1745                 RACCT_LOCK();
 1746                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1747                         /*
 1748                          * Non-process rules will be added to the buffer later.
 1749                          * Adding them here would result in duplicated output.
 1750                          */
 1751                         if (link->rrl_rule->rr_subject_type !=
 1752                             RCTL_SUBJECT_TYPE_PROCESS)
 1753                                 continue;
 1754                         if (!rctl_rule_matches(link->rrl_rule, filter))
 1755                                 continue;
 1756                         rctl_rule_to_sbuf(sb, link->rrl_rule);
 1757                         sbuf_printf(sb, ",");
 1758                 }
 1759                 RACCT_UNLOCK();
 1760         }
 1761 
 1762         loginclass_racct_foreach(rctl_get_rules_callback,
 1763             rctl_rule_pre_callback, rctl_rule_post_callback,
 1764             filter, sb);
 1765         ui_racct_foreach(rctl_get_rules_callback,
 1766             rctl_rule_pre_callback, rctl_rule_post_callback,
 1767             filter, sb);
 1768         prison_racct_foreach(rctl_get_rules_callback,
 1769             rctl_rule_pre_callback, rctl_rule_post_callback,
 1770             filter, sb);
 1771         if (sbuf_error(sb) == ENOMEM) {
 1772                 error = ERANGE;
 1773                 goto out;
 1774         }
 1775 
 1776         /*
 1777          * Remove trailing ",".
 1778          */
 1779         if (sbuf_len(sb) > 0)
 1780                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1781 
 1782         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1783 out:
 1784         rctl_rule_release(filter);
 1785         sx_sunlock(&allproc_lock);
 1786         free(buf, M_RCTL);
 1787         return (error);
 1788 }
 1789 
 1790 int
 1791 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 1792 {
 1793         struct sbuf *sb;
 1794         struct rctl_rule *filter;
 1795         struct rctl_rule_link *link;
 1796         char *inputstr, *buf;
 1797         size_t bufsize;
 1798         int error;
 1799 
 1800         if (!racct_enable)
 1801                 return (ENOSYS);
 1802 
 1803         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
 1804         if (error != 0)
 1805                 return (error);
 1806 
 1807         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1808         if (error != 0)
 1809                 return (error);
 1810 
 1811         sx_slock(&allproc_lock);
 1812         error = rctl_string_to_rule(inputstr, &filter);
 1813         free(inputstr, M_RCTL);
 1814         if (error != 0) {
 1815                 sx_sunlock(&allproc_lock);
 1816                 return (error);
 1817         }
 1818 
 1819         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
 1820                 rctl_rule_release(filter);
 1821                 sx_sunlock(&allproc_lock);
 1822                 return (EINVAL);
 1823         }
 1824         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
 1825                 rctl_rule_release(filter);
 1826                 sx_sunlock(&allproc_lock);
 1827                 return (EOPNOTSUPP);
 1828         }
 1829         if (filter->rr_subject.rs_proc == NULL) {
 1830                 rctl_rule_release(filter);
 1831                 sx_sunlock(&allproc_lock);
 1832                 return (EINVAL);
 1833         }
 1834 
 1835         bufsize = uap->outbuflen;
 1836         if (bufsize > rctl_maxbufsize) {
 1837                 rctl_rule_release(filter);
 1838                 sx_sunlock(&allproc_lock);
 1839                 return (E2BIG);
 1840         }
 1841 
 1842         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1843         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1844         KASSERT(sb != NULL, ("sbuf_new failed"));
 1845 
 1846         RACCT_LOCK();
 1847         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
 1848             rrl_next) {
 1849                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1850                 sbuf_printf(sb, ",");
 1851         }
 1852         RACCT_UNLOCK();
 1853         if (sbuf_error(sb) == ENOMEM) {
 1854                 error = ERANGE;
 1855                 sbuf_delete(sb);
 1856                 goto out;
 1857         }
 1858 
 1859         /*
 1860          * Remove trailing ",".
 1861          */
 1862         if (sbuf_len(sb) > 0)
 1863                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1864 
 1865         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1866 out:
 1867         rctl_rule_release(filter);
 1868         sx_sunlock(&allproc_lock);
 1869         free(buf, M_RCTL);
 1870         return (error);
 1871 }
 1872 
 1873 int
 1874 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 1875 {
 1876         struct rctl_rule *rule;
 1877         char *inputstr;
 1878         int error;
 1879 
 1880         if (!racct_enable)
 1881                 return (ENOSYS);
 1882 
 1883         error = priv_check(td, PRIV_RCTL_ADD_RULE);
 1884         if (error != 0)
 1885                 return (error);
 1886 
 1887         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1888         if (error != 0)
 1889                 return (error);
 1890 
 1891         sx_slock(&allproc_lock);
 1892         error = rctl_string_to_rule(inputstr, &rule);
 1893         free(inputstr, M_RCTL);
 1894         if (error != 0) {
 1895                 sx_sunlock(&allproc_lock);
 1896                 return (error);
 1897         }
 1898         /*
 1899          * The 'per' part of a rule is optional.
 1900          */
 1901         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
 1902             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
 1903                 rule->rr_per = rule->rr_subject_type;
 1904 
 1905         if (!rctl_rule_fully_specified(rule)) {
 1906                 error = EINVAL;
 1907                 goto out;
 1908         }
 1909 
 1910         error = rctl_rule_add(rule);
 1911 
 1912 out:
 1913         rctl_rule_release(rule);
 1914         sx_sunlock(&allproc_lock);
 1915         return (error);
 1916 }
 1917 
 1918 int
 1919 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 1920 {
 1921         struct rctl_rule *filter;
 1922         char *inputstr;
 1923         int error;
 1924 
 1925         if (!racct_enable)
 1926                 return (ENOSYS);
 1927 
 1928         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
 1929         if (error != 0)
 1930                 return (error);
 1931 
 1932         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1933         if (error != 0)
 1934                 return (error);
 1935 
 1936         sx_slock(&allproc_lock);
 1937         error = rctl_string_to_rule(inputstr, &filter);
 1938         free(inputstr, M_RCTL);
 1939         if (error != 0) {
 1940                 sx_sunlock(&allproc_lock);
 1941                 return (error);
 1942         }
 1943 
 1944         error = rctl_rule_remove(filter);
 1945         rctl_rule_release(filter);
 1946         sx_sunlock(&allproc_lock);
 1947 
 1948         return (error);
 1949 }
 1950 
 1951 /*
 1952  * Update RCTL rule list after credential change.
 1953  */
 1954 void
 1955 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
 1956 {
 1957         LIST_HEAD(, rctl_rule_link) newrules;
 1958         struct rctl_rule_link *link, *newlink;
 1959         struct uidinfo *newuip;
 1960         struct loginclass *newlc;
 1961         struct prison_racct *newprr;
 1962         int rulecnt, i;
 1963 
 1964         if (!racct_enable)
 1965                 return;
 1966 
 1967         PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 1968 
 1969         newuip = newcred->cr_ruidinfo;
 1970         newlc = newcred->cr_loginclass;
 1971         newprr = newcred->cr_prison->pr_prison_racct;
 1972 
 1973         LIST_INIT(&newrules);
 1974 
 1975 again:
 1976         /*
 1977          * First, count the rules that apply to the process with new
 1978          * credentials.
 1979          */
 1980         rulecnt = 0;
 1981         RACCT_LOCK();
 1982         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1983                 if (link->rrl_rule->rr_subject_type ==
 1984                     RCTL_SUBJECT_TYPE_PROCESS)
 1985                         rulecnt++;
 1986         }
 1987         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
 1988                 rulecnt++;
 1989         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
 1990                 rulecnt++;
 1991         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
 1992                 rulecnt++;
 1993         RACCT_UNLOCK();
 1994 
 1995         /*
 1996          * Create temporary list.  We've dropped the rctl_lock in order
 1997          * to use M_WAITOK.
 1998          */
 1999         for (i = 0; i < rulecnt; i++) {
 2000                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
 2001                 newlink->rrl_rule = NULL;
 2002                 newlink->rrl_exceeded = 0;
 2003                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
 2004         }
 2005 
 2006         newlink = LIST_FIRST(&newrules);
 2007 
 2008         /*
 2009          * Assign rules to the newly allocated list entries.
 2010          */
 2011         RACCT_LOCK();
 2012         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 2013                 if (link->rrl_rule->rr_subject_type ==
 2014                     RCTL_SUBJECT_TYPE_PROCESS) {
 2015                         if (newlink == NULL)
 2016                                 goto goaround;
 2017                         rctl_rule_acquire(link->rrl_rule);
 2018                         newlink->rrl_rule = link->rrl_rule;
 2019                         newlink->rrl_exceeded = link->rrl_exceeded;
 2020                         newlink = LIST_NEXT(newlink, rrl_next);
 2021                         rulecnt--;
 2022                 }
 2023         }
 2024         
 2025         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
 2026                 if (newlink == NULL)
 2027                         goto goaround;
 2028                 rctl_rule_acquire(link->rrl_rule);
 2029                 newlink->rrl_rule = link->rrl_rule;
 2030                 newlink->rrl_exceeded = link->rrl_exceeded;
 2031                 newlink = LIST_NEXT(newlink, rrl_next);
 2032                 rulecnt--;
 2033         }
 2034 
 2035         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
 2036                 if (newlink == NULL)
 2037                         goto goaround;
 2038                 rctl_rule_acquire(link->rrl_rule);
 2039                 newlink->rrl_rule = link->rrl_rule;
 2040                 newlink->rrl_exceeded = link->rrl_exceeded;
 2041                 newlink = LIST_NEXT(newlink, rrl_next);
 2042                 rulecnt--;
 2043         }
 2044 
 2045         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
 2046                 if (newlink == NULL)
 2047                         goto goaround;
 2048                 rctl_rule_acquire(link->rrl_rule);
 2049                 newlink->rrl_rule = link->rrl_rule;
 2050                 newlink->rrl_exceeded = link->rrl_exceeded;
 2051                 newlink = LIST_NEXT(newlink, rrl_next);
 2052                 rulecnt--;
 2053         }
 2054 
 2055         if (rulecnt == 0) {
 2056                 /*
 2057                  * Free the old rule list.
 2058                  */
 2059                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
 2060                         link = LIST_FIRST(&p->p_racct->r_rule_links);
 2061                         LIST_REMOVE(link, rrl_next);
 2062                         rctl_rule_release(link->rrl_rule);
 2063                         uma_zfree(rctl_rule_link_zone, link);
 2064                 }
 2065 
 2066                 /*
 2067                  * Replace lists and we're done.
 2068                  *
 2069                  * XXX: Is there any way to switch list heads instead
 2070                  *      of iterating here?
 2071                  */
 2072                 while (!LIST_EMPTY(&newrules)) {
 2073                         newlink = LIST_FIRST(&newrules);
 2074                         LIST_REMOVE(newlink, rrl_next);
 2075                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
 2076                             newlink, rrl_next);
 2077                 }
 2078 
 2079                 RACCT_UNLOCK();
 2080 
 2081                 return;
 2082         }
 2083 
 2084 goaround:
 2085         RACCT_UNLOCK();
 2086 
 2087         /*
 2088          * Rule list changed while we were not holding the rctl_lock.
 2089          * Free the new list and try again.
 2090          */
 2091         while (!LIST_EMPTY(&newrules)) {
 2092                 newlink = LIST_FIRST(&newrules);
 2093                 LIST_REMOVE(newlink, rrl_next);
 2094                 if (newlink->rrl_rule != NULL)
 2095                         rctl_rule_release(newlink->rrl_rule);
 2096                 uma_zfree(rctl_rule_link_zone, newlink);
 2097         }
 2098 
 2099         goto again;
 2100 }
 2101 
 2102 /*
 2103  * Assign RCTL rules to the newly created process.
 2104  */
 2105 int
 2106 rctl_proc_fork(struct proc *parent, struct proc *child)
 2107 {
 2108         struct rctl_rule *rule;
 2109         struct rctl_rule_link *link;
 2110         int error;
 2111 
 2112         ASSERT_RACCT_ENABLED();
 2113         RACCT_LOCK_ASSERT();
 2114         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
 2115 
 2116         LIST_INIT(&child->p_racct->r_rule_links);
 2117 
 2118         /*
 2119          * Go through limits applicable to the parent and assign them
 2120          * to the child.  Rules with 'process' subject have to be duplicated
 2121          * in order to make their rr_subject point to the new process.
 2122          */
 2123         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
 2124                 if (link->rrl_rule->rr_subject_type ==
 2125                     RCTL_SUBJECT_TYPE_PROCESS) {
 2126                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
 2127                         if (rule == NULL)
 2128                                 goto fail;
 2129                         KASSERT(rule->rr_subject.rs_proc == parent,
 2130                             ("rule->rr_subject.rs_proc != parent"));
 2131                         rule->rr_subject.rs_proc = child;
 2132                         error = rctl_racct_add_rule_locked(child->p_racct,
 2133                             rule);
 2134                         rctl_rule_release(rule);
 2135                         if (error != 0)
 2136                                 goto fail;
 2137                 } else {
 2138                         error = rctl_racct_add_rule_locked(child->p_racct,
 2139                             link->rrl_rule);
 2140                         if (error != 0)
 2141                                 goto fail;
 2142                 }
 2143         }
 2144 
 2145         return (0);
 2146 
 2147 fail:
 2148         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
 2149                 link = LIST_FIRST(&child->p_racct->r_rule_links);
 2150                 LIST_REMOVE(link, rrl_next);
 2151                 rctl_rule_release(link->rrl_rule);
 2152                 uma_zfree(rctl_rule_link_zone, link);
 2153         }
 2154 
 2155         return (EAGAIN);
 2156 }
 2157 
 2158 /*
 2159  * Release rules attached to the racct.
 2160  */
 2161 void
 2162 rctl_racct_release(struct racct *racct)
 2163 {
 2164         struct rctl_rule_link *link;
 2165 
 2166         ASSERT_RACCT_ENABLED();
 2167         RACCT_LOCK_ASSERT();
 2168 
 2169         while (!LIST_EMPTY(&racct->r_rule_links)) {
 2170                 link = LIST_FIRST(&racct->r_rule_links);
 2171                 LIST_REMOVE(link, rrl_next);
 2172                 rctl_rule_release(link->rrl_rule);
 2173                 uma_zfree(rctl_rule_link_zone, link);
 2174         }
 2175 }
 2176 
 2177 static void
 2178 rctl_init(void)
 2179 {
 2180 
 2181         if (!racct_enable)
 2182                 return;
 2183 
 2184         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
 2185             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 2186         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
 2187             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
 2188             UMA_ALIGN_PTR, 0);
 2189 
 2190         /*
 2191          * Set default values, making sure not to overwrite the ones
 2192          * fetched from tunables.  Most of those could be set at the
 2193          * declaration, except for the rctl_throttle_max - we cannot
 2194          * set it there due to hz not being compile time constant.
 2195          */
 2196         if (rctl_throttle_min < 1)
 2197                 rctl_throttle_min = 1;
 2198         if (rctl_throttle_max < rctl_throttle_min)
 2199                 rctl_throttle_max = 2 * hz;
 2200         if (rctl_throttle_pct < 0)
 2201                 rctl_throttle_pct = 100;
 2202         if (rctl_throttle_pct2 < 0)
 2203                 rctl_throttle_pct2 = 100;
 2204 }
 2205 
 2206 #else /* !RCTL */
 2207 
 2208 int
 2209 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 2210 {
 2211         
 2212         return (ENOSYS);
 2213 }
 2214 
 2215 int
 2216 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 2217 {
 2218         
 2219         return (ENOSYS);
 2220 }
 2221 
 2222 int
 2223 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 2224 {
 2225         
 2226         return (ENOSYS);
 2227 }
 2228 
 2229 int
 2230 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 2231 {
 2232         
 2233         return (ENOSYS);
 2234 }
 2235 
 2236 int
 2237 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 2238 {
 2239         
 2240         return (ENOSYS);
 2241 }
 2242 
 2243 #endif /* !RCTL */

Cache object: d590c3a4e05c8feb66ce771f97f950fd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.