The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2010 The FreeBSD Foundation
    5  * All rights reserved.
    6  *
    7  * This software was developed by Edward Tomasz Napierala under sponsorship
    8  * from the FreeBSD Foundation.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  * $FreeBSD: releng/12.0/sys/kern/kern_rctl.c 332816 2018-04-20 13:08:04Z avg $
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD: releng/12.0/sys/kern/kern_rctl.c 332816 2018-04-20 13:08:04Z avg $");
   36 
   37 #include <sys/param.h>
   38 #include <sys/bus.h>
   39 #include <sys/malloc.h>
   40 #include <sys/queue.h>
   41 #include <sys/refcount.h>
   42 #include <sys/jail.h>
   43 #include <sys/kernel.h>
   44 #include <sys/limits.h>
   45 #include <sys/loginclass.h>
   46 #include <sys/priv.h>
   47 #include <sys/proc.h>
   48 #include <sys/racct.h>
   49 #include <sys/rctl.h>
   50 #include <sys/resourcevar.h>
   51 #include <sys/sx.h>
   52 #include <sys/sysent.h>
   53 #include <sys/sysproto.h>
   54 #include <sys/systm.h>
   55 #include <sys/types.h>
   56 #include <sys/eventhandler.h>
   57 #include <sys/lock.h>
   58 #include <sys/mutex.h>
   59 #include <sys/rwlock.h>
   60 #include <sys/sbuf.h>
   61 #include <sys/taskqueue.h>
   62 #include <sys/tree.h>
   63 #include <vm/uma.h>
   64 
   65 #ifdef RCTL
   66 #ifndef RACCT
   67 #error "The RCTL option requires the RACCT option"
   68 #endif
   69 
   70 FEATURE(rctl, "Resource Limits");
   71 
   72 #define HRF_DEFAULT             0
   73 #define HRF_DONT_INHERIT        1
   74 #define HRF_DONT_ACCUMULATE     2
   75 
   76 #define RCTL_MAX_INBUFSIZE      4 * 1024
   77 #define RCTL_MAX_OUTBUFSIZE     16 * 1024 * 1024
   78 #define RCTL_LOG_BUFSIZE        128
   79 
   80 #define RCTL_PCPU_SHIFT         (10 * 1000000)
   81 
   82 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
   83 static int rctl_log_rate_limit = 10;
   84 static int rctl_devctl_rate_limit = 10;
   85 
   86 /*
   87  * Values below are initialized in rctl_init().
   88  */
   89 static int rctl_throttle_min = -1;
   90 static int rctl_throttle_max = -1;
   91 static int rctl_throttle_pct = -1;
   92 static int rctl_throttle_pct2 = -1;
   93 
   94 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
   95 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
   96 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
   97 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
   98 
   99 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
  100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
  101     &rctl_maxbufsize, 0, "Maximum output buffer size");
  102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
  103     &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
  104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
  105     &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
  106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
  107     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU",
  108     "Shortest throttling duration, in hz");
  109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
  110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
  111     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU",
  112     "Longest throttling duration, in hz");
  113 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
  114 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
  115     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU",
  116     "Throttling penalty for process consumption, in percent");
  117 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
  118 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
  119     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU",
  120     "Throttling penalty for container consumption, in percent");
  121 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
  122 
  123 /*
  124  * 'rctl_rule_link' connects a rule with every racct it's related to.
  125  * For example, rule 'user:X:openfiles:deny=N/process' is linked
  126  * with uidinfo for user X, and to each process of that user.
  127  */
  128 struct rctl_rule_link {
  129         LIST_ENTRY(rctl_rule_link)      rrl_next;
  130         struct rctl_rule                *rrl_rule;
  131         int                             rrl_exceeded;
  132 };
  133 
  134 struct dict {
  135         const char      *d_name;
  136         int             d_value;
  137 };
  138 
  139 static struct dict subjectnames[] = {
  140         { "process", RCTL_SUBJECT_TYPE_PROCESS },
  141         { "user", RCTL_SUBJECT_TYPE_USER },
  142         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
  143         { "jail", RCTL_SUBJECT_TYPE_JAIL },
  144         { NULL, -1 }};
  145 
  146 static struct dict resourcenames[] = {
  147         { "cputime", RACCT_CPU },
  148         { "datasize", RACCT_DATA },
  149         { "stacksize", RACCT_STACK },
  150         { "coredumpsize", RACCT_CORE },
  151         { "memoryuse", RACCT_RSS },
  152         { "memorylocked", RACCT_MEMLOCK },
  153         { "maxproc", RACCT_NPROC },
  154         { "openfiles", RACCT_NOFILE },
  155         { "vmemoryuse", RACCT_VMEM },
  156         { "pseudoterminals", RACCT_NPTS },
  157         { "swapuse", RACCT_SWAP },
  158         { "nthr", RACCT_NTHR },
  159         { "msgqqueued", RACCT_MSGQQUEUED },
  160         { "msgqsize", RACCT_MSGQSIZE },
  161         { "nmsgq", RACCT_NMSGQ },
  162         { "nsem", RACCT_NSEM },
  163         { "nsemop", RACCT_NSEMOP },
  164         { "nshm", RACCT_NSHM },
  165         { "shmsize", RACCT_SHMSIZE },
  166         { "wallclock", RACCT_WALLCLOCK },
  167         { "pcpu", RACCT_PCTCPU },
  168         { "readbps", RACCT_READBPS },
  169         { "writebps", RACCT_WRITEBPS },
  170         { "readiops", RACCT_READIOPS },
  171         { "writeiops", RACCT_WRITEIOPS },
  172         { NULL, -1 }};
  173 
  174 static struct dict actionnames[] = {
  175         { "sighup", RCTL_ACTION_SIGHUP },
  176         { "sigint", RCTL_ACTION_SIGINT },
  177         { "sigquit", RCTL_ACTION_SIGQUIT },
  178         { "sigill", RCTL_ACTION_SIGILL },
  179         { "sigtrap", RCTL_ACTION_SIGTRAP },
  180         { "sigabrt", RCTL_ACTION_SIGABRT },
  181         { "sigemt", RCTL_ACTION_SIGEMT },
  182         { "sigfpe", RCTL_ACTION_SIGFPE },
  183         { "sigkill", RCTL_ACTION_SIGKILL },
  184         { "sigbus", RCTL_ACTION_SIGBUS },
  185         { "sigsegv", RCTL_ACTION_SIGSEGV },
  186         { "sigsys", RCTL_ACTION_SIGSYS },
  187         { "sigpipe", RCTL_ACTION_SIGPIPE },
  188         { "sigalrm", RCTL_ACTION_SIGALRM },
  189         { "sigterm", RCTL_ACTION_SIGTERM },
  190         { "sigurg", RCTL_ACTION_SIGURG },
  191         { "sigstop", RCTL_ACTION_SIGSTOP },
  192         { "sigtstp", RCTL_ACTION_SIGTSTP },
  193         { "sigchld", RCTL_ACTION_SIGCHLD },
  194         { "sigttin", RCTL_ACTION_SIGTTIN },
  195         { "sigttou", RCTL_ACTION_SIGTTOU },
  196         { "sigio", RCTL_ACTION_SIGIO },
  197         { "sigxcpu", RCTL_ACTION_SIGXCPU },
  198         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
  199         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
  200         { "sigprof", RCTL_ACTION_SIGPROF },
  201         { "sigwinch", RCTL_ACTION_SIGWINCH },
  202         { "siginfo", RCTL_ACTION_SIGINFO },
  203         { "sigusr1", RCTL_ACTION_SIGUSR1 },
  204         { "sigusr2", RCTL_ACTION_SIGUSR2 },
  205         { "sigthr", RCTL_ACTION_SIGTHR },
  206         { "deny", RCTL_ACTION_DENY },
  207         { "log", RCTL_ACTION_LOG },
  208         { "devctl", RCTL_ACTION_DEVCTL },
  209         { "throttle", RCTL_ACTION_THROTTLE },
  210         { NULL, -1 }};
  211 
  212 static void rctl_init(void);
  213 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
  214 
  215 static uma_zone_t rctl_rule_zone;
  216 static uma_zone_t rctl_rule_link_zone;
  217 
  218 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
  219 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
  220 
  221 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
  222 
  223 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
  224 {
  225         int error, val = rctl_throttle_min;
  226 
  227         error = sysctl_handle_int(oidp, &val, 0, req);
  228         if (error || !req->newptr)
  229                 return (error);
  230         if (val < 1 || val > rctl_throttle_max)
  231                 return (EINVAL);
  232 
  233         RACCT_LOCK();
  234         rctl_throttle_min = val;
  235         RACCT_UNLOCK();
  236 
  237         return (0);
  238 }
  239 
  240 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
  241 {
  242         int error, val = rctl_throttle_max;
  243 
  244         error = sysctl_handle_int(oidp, &val, 0, req);
  245         if (error || !req->newptr)
  246                 return (error);
  247         if (val < rctl_throttle_min)
  248                 return (EINVAL);
  249 
  250         RACCT_LOCK();
  251         rctl_throttle_max = val;
  252         RACCT_UNLOCK();
  253 
  254         return (0);
  255 }
  256 
  257 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
  258 {
  259         int error, val = rctl_throttle_pct;
  260 
  261         error = sysctl_handle_int(oidp, &val, 0, req);
  262         if (error || !req->newptr)
  263                 return (error);
  264         if (val < 0)
  265                 return (EINVAL);
  266 
  267         RACCT_LOCK();
  268         rctl_throttle_pct = val;
  269         RACCT_UNLOCK();
  270 
  271         return (0);
  272 }
  273 
  274 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
  275 {
  276         int error, val = rctl_throttle_pct2;
  277 
  278         error = sysctl_handle_int(oidp, &val, 0, req);
  279         if (error || !req->newptr)
  280                 return (error);
  281         if (val < 0)
  282                 return (EINVAL);
  283 
  284         RACCT_LOCK();
  285         rctl_throttle_pct2 = val;
  286         RACCT_UNLOCK();
  287 
  288         return (0);
  289 }
  290 
  291 static const char *
  292 rctl_subject_type_name(int subject)
  293 {
  294         int i;
  295 
  296         for (i = 0; subjectnames[i].d_name != NULL; i++) {
  297                 if (subjectnames[i].d_value == subject)
  298                         return (subjectnames[i].d_name);
  299         }
  300 
  301         panic("rctl_subject_type_name: unknown subject type %d", subject);
  302 }
  303 
  304 static const char *
  305 rctl_action_name(int action)
  306 {
  307         int i;
  308 
  309         for (i = 0; actionnames[i].d_name != NULL; i++) {
  310                 if (actionnames[i].d_value == action)
  311                         return (actionnames[i].d_name);
  312         }
  313 
  314         panic("rctl_action_name: unknown action %d", action);
  315 }
  316 
  317 const char *
  318 rctl_resource_name(int resource)
  319 {
  320         int i;
  321 
  322         for (i = 0; resourcenames[i].d_name != NULL; i++) {
  323                 if (resourcenames[i].d_value == resource)
  324                         return (resourcenames[i].d_name);
  325         }
  326 
  327         panic("rctl_resource_name: unknown resource %d", resource);
  328 }
  329 
  330 static struct racct *
  331 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
  332 {
  333         struct ucred *cred = p->p_ucred;
  334 
  335         ASSERT_RACCT_ENABLED();
  336         RACCT_LOCK_ASSERT();
  337 
  338         switch (rule->rr_per) {
  339         case RCTL_SUBJECT_TYPE_PROCESS:
  340                 return (p->p_racct);
  341         case RCTL_SUBJECT_TYPE_USER:
  342                 return (cred->cr_ruidinfo->ui_racct);
  343         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  344                 return (cred->cr_loginclass->lc_racct);
  345         case RCTL_SUBJECT_TYPE_JAIL:
  346                 return (cred->cr_prison->pr_prison_racct->prr_racct);
  347         default:
  348                 panic("%s: unknown per %d", __func__, rule->rr_per);
  349         }
  350 }
  351 
  352 /*
  353  * Return the amount of resource that can be allocated by 'p' before
  354  * hitting 'rule'.
  355  */
  356 static int64_t
  357 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
  358 {
  359         const struct racct *racct;
  360         int64_t available;
  361 
  362         ASSERT_RACCT_ENABLED();
  363         RACCT_LOCK_ASSERT();
  364 
  365         racct = rctl_proc_rule_to_racct(p, rule);
  366         available = rule->rr_amount - racct->r_resources[rule->rr_resource];
  367 
  368         return (available);
  369 }
  370 
  371 /*
  372  * Called every second for proc, uidinfo, loginclass, and jail containers.
  373  * If the limit isn't exceeded, it decreases the usage amount to zero.
  374  * Otherwise, it decreases it by the value of the limit.  This way
  375  * resource consumption exceeding the limit "carries over" to the next
  376  * period.
  377  */
  378 void
  379 rctl_throttle_decay(struct racct *racct, int resource)
  380 {
  381         struct rctl_rule *rule;
  382         struct rctl_rule_link *link;
  383         int64_t minavailable;
  384 
  385         ASSERT_RACCT_ENABLED();
  386         RACCT_LOCK_ASSERT();
  387 
  388         minavailable = INT64_MAX;
  389 
  390         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
  391                 rule = link->rrl_rule;
  392 
  393                 if (rule->rr_resource != resource)
  394                         continue;
  395                 if (rule->rr_action != RCTL_ACTION_THROTTLE)
  396                         continue;
  397 
  398                 if (rule->rr_amount < minavailable)
  399                         minavailable = rule->rr_amount;
  400         }
  401 
  402         if (racct->r_resources[resource] < minavailable) {
  403                 racct->r_resources[resource] = 0;
  404         } else {
  405                 /*
  406                  * Cap utilization counter at ten times the limit.  Otherwise,
  407                  * if we changed the rule lowering the allowed amount, it could
  408                  * take unreasonably long time for the accumulated resource
  409                  * usage to drop.
  410                  */
  411                 if (racct->r_resources[resource] > minavailable * 10)
  412                         racct->r_resources[resource] = minavailable * 10;
  413 
  414                 racct->r_resources[resource] -= minavailable;
  415         }
  416 }
  417 
  418 /*
  419  * Special version of rctl_get_available() for the %CPU resource.
  420  * We slightly cheat here and return less than we normally would.
  421  */
  422 int64_t
  423 rctl_pcpu_available(const struct proc *p) {
  424         struct rctl_rule *rule;
  425         struct rctl_rule_link *link;
  426         int64_t available, minavailable, limit;
  427 
  428         ASSERT_RACCT_ENABLED();
  429         RACCT_LOCK_ASSERT();
  430 
  431         minavailable = INT64_MAX;
  432         limit = 0;
  433 
  434         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  435                 rule = link->rrl_rule;
  436                 if (rule->rr_resource != RACCT_PCTCPU)
  437                         continue;
  438                 if (rule->rr_action != RCTL_ACTION_DENY)
  439                         continue;
  440                 available = rctl_available_resource(p, rule);
  441                 if (available < minavailable) {
  442                         minavailable = available;
  443                         limit = rule->rr_amount;
  444                 }
  445         }
  446 
  447         /*
  448          * Return slightly less than actual value of the available
  449          * %cpu resource.  This makes %cpu throttling more aggressive
  450          * and lets us act sooner than the limits are already exceeded.
  451          */
  452         if (limit != 0) {
  453                 if (limit > 2 * RCTL_PCPU_SHIFT)
  454                         minavailable -= RCTL_PCPU_SHIFT;
  455                 else
  456                         minavailable -= (limit / 2);
  457         }
  458 
  459         return (minavailable);
  460 }
  461 
  462 static uint64_t
  463 xadd(uint64_t a, uint64_t b)
  464 {
  465         uint64_t c;
  466 
  467         c = a + b;
  468 
  469         /*
  470          * Detect overflow.
  471          */
  472         if (c < a || c < b)
  473                 return (UINT64_MAX);
  474 
  475         return (c);
  476 }
  477 
  478 static uint64_t
  479 xmul(uint64_t a, uint64_t b)
  480 {
  481 
  482         if (b != 0 && a > UINT64_MAX / b)
  483                 return (UINT64_MAX);
  484 
  485         return (a * b);
  486 }
  487 
  488 /*
  489  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
  490  * to what it keeps allocated now.  Returns non-zero if the allocation should
  491  * be denied, 0 otherwise.
  492  */
  493 int
  494 rctl_enforce(struct proc *p, int resource, uint64_t amount)
  495 {
  496         static struct timeval log_lasttime, devctl_lasttime;
  497         static int log_curtime = 0, devctl_curtime = 0;
  498         struct rctl_rule *rule;
  499         struct rctl_rule_link *link;
  500         struct sbuf sb;
  501         char *buf;
  502         int64_t available;
  503         uint64_t sleep_ms, sleep_ratio;
  504         int should_deny = 0;
  505 
  506         ASSERT_RACCT_ENABLED();
  507         RACCT_LOCK_ASSERT();
  508 
  509         /*
  510          * There may be more than one matching rule; go through all of them.
  511          * Denial should be done last, after logging and sending signals.
  512          */
  513         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  514                 rule = link->rrl_rule;
  515                 if (rule->rr_resource != resource)
  516                         continue;
  517 
  518                 available = rctl_available_resource(p, rule);
  519                 if (available >= (int64_t)amount) {
  520                         link->rrl_exceeded = 0;
  521                         continue;
  522                 }
  523 
  524                 switch (rule->rr_action) {
  525                 case RCTL_ACTION_DENY:
  526                         should_deny = 1;
  527                         continue;
  528                 case RCTL_ACTION_LOG:
  529                         /*
  530                          * If rrl_exceeded != 0, it means we've already
  531                          * logged a warning for this process.
  532                          */
  533                         if (link->rrl_exceeded != 0)
  534                                 continue;
  535 
  536                         /*
  537                          * If the process state is not fully initialized yet,
  538                          * we can't access most of the required fields, e.g.
  539                          * p->p_comm.  This happens when called from fork1().
  540                          * Ignore this rule for now; it will be processed just
  541                          * after fork, when called from racct_proc_fork_done().
  542                          */
  543                         if (p->p_state != PRS_NORMAL)
  544                                 continue;
  545 
  546                         if (!ppsratecheck(&log_lasttime, &log_curtime,
  547                             rctl_log_rate_limit))
  548                                 continue;
  549 
  550                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  551                         if (buf == NULL) {
  552                                 printf("rctl_enforce: out of memory\n");
  553                                 continue;
  554                         }
  555                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  556                         rctl_rule_to_sbuf(&sb, rule);
  557                         sbuf_finish(&sb);
  558                         printf("rctl: rule \"%s\" matched by pid %d "
  559                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
  560                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
  561                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  562                         sbuf_delete(&sb);
  563                         free(buf, M_RCTL);
  564                         link->rrl_exceeded = 1;
  565                         continue;
  566                 case RCTL_ACTION_DEVCTL:
  567                         if (link->rrl_exceeded != 0)
  568                                 continue;
  569 
  570                         if (p->p_state != PRS_NORMAL)
  571                                 continue;
  572 
  573                         if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
  574                             rctl_devctl_rate_limit))
  575                                 continue;
  576 
  577                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  578                         if (buf == NULL) {
  579                                 printf("rctl_enforce: out of memory\n");
  580                                 continue;
  581                         }
  582                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  583                         sbuf_printf(&sb, "rule=");
  584                         rctl_rule_to_sbuf(&sb, rule);
  585                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
  586                             p->p_pid, p->p_ucred->cr_ruid,
  587                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  588                         sbuf_finish(&sb);
  589                         devctl_notify_f("RCTL", "rule", "matched",
  590                             sbuf_data(&sb), M_NOWAIT);
  591                         sbuf_delete(&sb);
  592                         free(buf, M_RCTL);
  593                         link->rrl_exceeded = 1;
  594                         continue;
  595                 case RCTL_ACTION_THROTTLE:
  596                         if (p->p_state != PRS_NORMAL)
  597                                 continue;
  598 
  599                         /*
  600                          * Make the process sleep for a fraction of second
  601                          * proportional to the ratio of process' resource
  602                          * utilization compared to the limit.  The point is
  603                          * to penalize resource hogs: processes that consume
  604                          * more of the available resources sleep for longer.
  605                          *
  606                          * We're trying to defer division until the very end,
  607                          * to minimize the rounding effects.  The following
  608                          * calculation could have been written in a clearer
  609                          * way like this:
  610                          *
  611                          * sleep_ms = hz * p->p_racct->r_resources[resource] /
  612                          *     rule->rr_amount;
  613                          * sleep_ms *= rctl_throttle_pct / 100;
  614                          * if (sleep_ms < rctl_throttle_min)
  615                          *         sleep_ms = rctl_throttle_min;
  616                          *
  617                          */
  618                         sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
  619                         sleep_ms = xmul(sleep_ms,  rctl_throttle_pct) / 100;
  620                         if (sleep_ms < rctl_throttle_min * rule->rr_amount)
  621                                 sleep_ms = rctl_throttle_min * rule->rr_amount;
  622 
  623                         /*
  624                          * Multiply that by the ratio of the resource
  625                          * consumption for the container compared to the limit,
  626                          * squared.  In other words, a process in a container
  627                          * that is two times over the limit will be throttled
  628                          * four times as much for hitting the same rule.  The
  629                          * point is to penalize processes more if the container
  630                          * itself (eg certain UID or jail) is above the limit.
  631                          */
  632                         if (available < 0)
  633                                 sleep_ratio = -available / rule->rr_amount;
  634                         else
  635                                 sleep_ratio = 0;
  636                         sleep_ratio = xmul(sleep_ratio, sleep_ratio);
  637                         sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
  638                         sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
  639 
  640                         /*
  641                          * Finally the division.
  642                          */
  643                         sleep_ms /= rule->rr_amount;
  644 
  645                         if (sleep_ms > rctl_throttle_max)
  646                                 sleep_ms = rctl_throttle_max;
  647 #if 0
  648                         printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
  649                            __func__, p->p_pid, p->p_comm,
  650                            p->p_racct->r_resources[resource],
  651                            rule->rr_amount, (uintmax_t)sleep_ms,
  652                            (uintmax_t)sleep_ratio, (intmax_t)available);
  653 #endif
  654 
  655                         KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
  656                             __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
  657                         racct_proc_throttle(p, sleep_ms);
  658                         continue;
  659                 default:
  660                         if (link->rrl_exceeded != 0)
  661                                 continue;
  662 
  663                         if (p->p_state != PRS_NORMAL)
  664                                 continue;
  665 
  666                         KASSERT(rule->rr_action > 0 &&
  667                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
  668                             ("rctl_enforce: unknown action %d",
  669                              rule->rr_action));
  670 
  671                         /*
  672                          * We're using the fact that RCTL_ACTION_SIG* values
  673                          * are equal to their counterparts from sys/signal.h.
  674                          */
  675                         kern_psignal(p, rule->rr_action);
  676                         link->rrl_exceeded = 1;
  677                         continue;
  678                 }
  679         }
  680 
  681         if (should_deny) {
  682                 /*
  683                  * Return fake error code; the caller should change it
  684                  * into one proper for the situation - EFSIZ, ENOMEM etc.
  685                  */
  686                 return (EDOOFUS);
  687         }
  688 
  689         return (0);
  690 }
  691 
  692 uint64_t
  693 rctl_get_limit(struct proc *p, int resource)
  694 {
  695         struct rctl_rule *rule;
  696         struct rctl_rule_link *link;
  697         uint64_t amount = UINT64_MAX;
  698 
  699         ASSERT_RACCT_ENABLED();
  700         RACCT_LOCK_ASSERT();
  701 
  702         /*
  703          * There may be more than one matching rule; go through all of them.
  704          * Denial should be done last, after logging and sending signals.
  705          */
  706         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  707                 rule = link->rrl_rule;
  708                 if (rule->rr_resource != resource)
  709                         continue;
  710                 if (rule->rr_action != RCTL_ACTION_DENY)
  711                         continue;
  712                 if (rule->rr_amount < amount)
  713                         amount = rule->rr_amount;
  714         }
  715 
  716         return (amount);
  717 }
  718 
  719 uint64_t
  720 rctl_get_available(struct proc *p, int resource)
  721 {
  722         struct rctl_rule *rule;
  723         struct rctl_rule_link *link;
  724         int64_t available, minavailable, allocated;
  725 
  726         minavailable = INT64_MAX;
  727 
  728         ASSERT_RACCT_ENABLED();
  729         RACCT_LOCK_ASSERT();
  730 
  731         /*
  732          * There may be more than one matching rule; go through all of them.
  733          * Denial should be done last, after logging and sending signals.
  734          */
  735         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  736                 rule = link->rrl_rule;
  737                 if (rule->rr_resource != resource)
  738                         continue;
  739                 if (rule->rr_action != RCTL_ACTION_DENY)
  740                         continue;
  741                 available = rctl_available_resource(p, rule);
  742                 if (available < minavailable)
  743                         minavailable = available;
  744         }
  745 
  746         /*
  747          * XXX: Think about this _hard_.
  748          */
  749         allocated = p->p_racct->r_resources[resource];
  750         if (minavailable < INT64_MAX - allocated)
  751                 minavailable += allocated;
  752         if (minavailable < 0)
  753                 minavailable = 0;
  754 
  755         return (minavailable);
  756 }
  757 
  758 static int
  759 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
  760 {
  761 
  762         ASSERT_RACCT_ENABLED();
  763 
  764         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
  765                 if (rule->rr_subject_type != filter->rr_subject_type)
  766                         return (0);
  767 
  768                 switch (filter->rr_subject_type) {
  769                 case RCTL_SUBJECT_TYPE_PROCESS:
  770                         if (filter->rr_subject.rs_proc != NULL &&
  771                             rule->rr_subject.rs_proc !=
  772                             filter->rr_subject.rs_proc)
  773                                 return (0);
  774                         break;
  775                 case RCTL_SUBJECT_TYPE_USER:
  776                         if (filter->rr_subject.rs_uip != NULL &&
  777                             rule->rr_subject.rs_uip !=
  778                             filter->rr_subject.rs_uip)
  779                                 return (0);
  780                         break;
  781                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
  782                         if (filter->rr_subject.rs_loginclass != NULL &&
  783                             rule->rr_subject.rs_loginclass !=
  784                             filter->rr_subject.rs_loginclass)
  785                                 return (0);
  786                         break;
  787                 case RCTL_SUBJECT_TYPE_JAIL:
  788                         if (filter->rr_subject.rs_prison_racct != NULL &&
  789                             rule->rr_subject.rs_prison_racct !=
  790                             filter->rr_subject.rs_prison_racct)
  791                                 return (0);
  792                         break;
  793                 default:
  794                         panic("rctl_rule_matches: unknown subject type %d",
  795                             filter->rr_subject_type);
  796                 }
  797         }
  798 
  799         if (filter->rr_resource != RACCT_UNDEFINED) {
  800                 if (rule->rr_resource != filter->rr_resource)
  801                         return (0);
  802         }
  803 
  804         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
  805                 if (rule->rr_action != filter->rr_action)
  806                         return (0);
  807         }
  808 
  809         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
  810                 if (rule->rr_amount != filter->rr_amount)
  811                         return (0);
  812         }
  813 
  814         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
  815                 if (rule->rr_per != filter->rr_per)
  816                         return (0);
  817         }
  818 
  819         return (1);
  820 }
  821 
  822 static int
  823 str2value(const char *str, int *value, struct dict *table)
  824 {
  825         int i;
  826 
  827         if (value == NULL)
  828                 return (EINVAL);
  829 
  830         for (i = 0; table[i].d_name != NULL; i++) {
  831                 if (strcasecmp(table[i].d_name, str) == 0) {
  832                         *value =  table[i].d_value;
  833                         return (0);
  834                 }
  835         }
  836 
  837         return (EINVAL);
  838 }
  839 
  840 static int
  841 str2id(const char *str, id_t *value)
  842 {
  843         char *end;
  844 
  845         if (str == NULL)
  846                 return (EINVAL);
  847 
  848         *value = strtoul(str, &end, 10);
  849         if ((size_t)(end - str) != strlen(str))
  850                 return (EINVAL);
  851 
  852         return (0);
  853 }
  854 
  855 static int
  856 str2int64(const char *str, int64_t *value)
  857 {
  858         char *end;
  859 
  860         if (str == NULL)
  861                 return (EINVAL);
  862 
  863         *value = strtoul(str, &end, 10);
  864         if ((size_t)(end - str) != strlen(str))
  865                 return (EINVAL);
  866 
  867         if (*value < 0)
  868                 return (ERANGE);
  869 
  870         return (0);
  871 }
  872 
  873 /*
  874  * Connect the rule to the racct, increasing refcount for the rule.
  875  */
  876 static void
  877 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
  878 {
  879         struct rctl_rule_link *link;
  880 
  881         ASSERT_RACCT_ENABLED();
  882         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  883 
  884         rctl_rule_acquire(rule);
  885         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
  886         link->rrl_rule = rule;
  887         link->rrl_exceeded = 0;
  888 
  889         RACCT_LOCK();
  890         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  891         RACCT_UNLOCK();
  892 }
  893 
  894 static int
  895 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
  896 {
  897         struct rctl_rule_link *link;
  898 
  899         ASSERT_RACCT_ENABLED();
  900         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  901         RACCT_LOCK_ASSERT();
  902 
  903         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
  904         if (link == NULL)
  905                 return (ENOMEM);
  906         rctl_rule_acquire(rule);
  907         link->rrl_rule = rule;
  908         link->rrl_exceeded = 0;
  909 
  910         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  911 
  912         return (0);
  913 }
  914 
  915 /*
  916  * Remove limits for a rules matching the filter and release
  917  * the refcounts for the rules, possibly freeing them.  Returns
  918  * the number of limit structures removed.
  919  */
  920 static int
  921 rctl_racct_remove_rules(struct racct *racct,
  922     const struct rctl_rule *filter)
  923 {
  924         struct rctl_rule_link *link, *linktmp;
  925         int removed = 0;
  926 
  927         ASSERT_RACCT_ENABLED();
  928         RACCT_LOCK_ASSERT();
  929 
  930         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
  931                 if (!rctl_rule_matches(link->rrl_rule, filter))
  932                         continue;
  933 
  934                 LIST_REMOVE(link, rrl_next);
  935                 rctl_rule_release(link->rrl_rule);
  936                 uma_zfree(rctl_rule_link_zone, link);
  937                 removed++;
  938         }
  939         return (removed);
  940 }
  941 
  942 static void
  943 rctl_rule_acquire_subject(struct rctl_rule *rule)
  944 {
  945 
  946         ASSERT_RACCT_ENABLED();
  947 
  948         switch (rule->rr_subject_type) {
  949         case RCTL_SUBJECT_TYPE_UNDEFINED:
  950         case RCTL_SUBJECT_TYPE_PROCESS:
  951                 break;
  952         case RCTL_SUBJECT_TYPE_JAIL:
  953                 if (rule->rr_subject.rs_prison_racct != NULL)
  954                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
  955                 break;
  956         case RCTL_SUBJECT_TYPE_USER:
  957                 if (rule->rr_subject.rs_uip != NULL)
  958                         uihold(rule->rr_subject.rs_uip);
  959                 break;
  960         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  961                 if (rule->rr_subject.rs_loginclass != NULL)
  962                         loginclass_hold(rule->rr_subject.rs_loginclass);
  963                 break;
  964         default:
  965                 panic("rctl_rule_acquire_subject: unknown subject type %d",
  966                     rule->rr_subject_type);
  967         }
  968 }
  969 
  970 static void
  971 rctl_rule_release_subject(struct rctl_rule *rule)
  972 {
  973 
  974         ASSERT_RACCT_ENABLED();
  975 
  976         switch (rule->rr_subject_type) {
  977         case RCTL_SUBJECT_TYPE_UNDEFINED:
  978         case RCTL_SUBJECT_TYPE_PROCESS:
  979                 break;
  980         case RCTL_SUBJECT_TYPE_JAIL:
  981                 if (rule->rr_subject.rs_prison_racct != NULL)
  982                         prison_racct_free(rule->rr_subject.rs_prison_racct);
  983                 break;
  984         case RCTL_SUBJECT_TYPE_USER:
  985                 if (rule->rr_subject.rs_uip != NULL)
  986                         uifree(rule->rr_subject.rs_uip);
  987                 break;
  988         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  989                 if (rule->rr_subject.rs_loginclass != NULL)
  990                         loginclass_free(rule->rr_subject.rs_loginclass);
  991                 break;
  992         default:
  993                 panic("rctl_rule_release_subject: unknown subject type %d",
  994                     rule->rr_subject_type);
  995         }
  996 }
  997 
  998 struct rctl_rule *
  999 rctl_rule_alloc(int flags)
 1000 {
 1001         struct rctl_rule *rule;
 1002 
 1003         ASSERT_RACCT_ENABLED();
 1004 
 1005         rule = uma_zalloc(rctl_rule_zone, flags);
 1006         if (rule == NULL)
 1007                 return (NULL);
 1008         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1009         rule->rr_subject.rs_proc = NULL;
 1010         rule->rr_subject.rs_uip = NULL;
 1011         rule->rr_subject.rs_loginclass = NULL;
 1012         rule->rr_subject.rs_prison_racct = NULL;
 1013         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1014         rule->rr_resource = RACCT_UNDEFINED;
 1015         rule->rr_action = RCTL_ACTION_UNDEFINED;
 1016         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1017         refcount_init(&rule->rr_refcount, 1);
 1018 
 1019         return (rule);
 1020 }
 1021 
 1022 struct rctl_rule *
 1023 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
 1024 {
 1025         struct rctl_rule *copy;
 1026 
 1027         ASSERT_RACCT_ENABLED();
 1028 
 1029         copy = uma_zalloc(rctl_rule_zone, flags);
 1030         if (copy == NULL)
 1031                 return (NULL);
 1032         copy->rr_subject_type = rule->rr_subject_type;
 1033         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
 1034         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
 1035         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
 1036         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
 1037         copy->rr_per = rule->rr_per;
 1038         copy->rr_resource = rule->rr_resource;
 1039         copy->rr_action = rule->rr_action;
 1040         copy->rr_amount = rule->rr_amount;
 1041         refcount_init(&copy->rr_refcount, 1);
 1042         rctl_rule_acquire_subject(copy);
 1043 
 1044         return (copy);
 1045 }
 1046 
 1047 void
 1048 rctl_rule_acquire(struct rctl_rule *rule)
 1049 {
 1050 
 1051         ASSERT_RACCT_ENABLED();
 1052         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1053 
 1054         refcount_acquire(&rule->rr_refcount);
 1055 }
 1056 
 1057 static void
 1058 rctl_rule_free(void *context, int pending)
 1059 {
 1060         struct rctl_rule *rule;
 1061         
 1062         rule = (struct rctl_rule *)context;
 1063 
 1064         ASSERT_RACCT_ENABLED();
 1065         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
 1066         
 1067         /*
 1068          * We don't need locking here; rule is guaranteed to be inaccessible.
 1069          */
 1070         
 1071         rctl_rule_release_subject(rule);
 1072         uma_zfree(rctl_rule_zone, rule);
 1073 }
 1074 
 1075 void
 1076 rctl_rule_release(struct rctl_rule *rule)
 1077 {
 1078 
 1079         ASSERT_RACCT_ENABLED();
 1080         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1081 
 1082         if (refcount_release(&rule->rr_refcount)) {
 1083                 /*
 1084                  * rctl_rule_release() is often called when iterating
 1085                  * over all the uidinfo structures in the system,
 1086                  * holding uihashtbl_lock.  Since rctl_rule_free()
 1087                  * might end up calling uifree(), this would lead
 1088                  * to lock recursion.  Use taskqueue to avoid this.
 1089                  */
 1090                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
 1091                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
 1092         }
 1093 }
 1094 
 1095 static int
 1096 rctl_rule_fully_specified(const struct rctl_rule *rule)
 1097 {
 1098 
 1099         ASSERT_RACCT_ENABLED();
 1100 
 1101         switch (rule->rr_subject_type) {
 1102         case RCTL_SUBJECT_TYPE_UNDEFINED:
 1103                 return (0);
 1104         case RCTL_SUBJECT_TYPE_PROCESS:
 1105                 if (rule->rr_subject.rs_proc == NULL)
 1106                         return (0);
 1107                 break;
 1108         case RCTL_SUBJECT_TYPE_USER:
 1109                 if (rule->rr_subject.rs_uip == NULL)
 1110                         return (0);
 1111                 break;
 1112         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1113                 if (rule->rr_subject.rs_loginclass == NULL)
 1114                         return (0);
 1115                 break;
 1116         case RCTL_SUBJECT_TYPE_JAIL:
 1117                 if (rule->rr_subject.rs_prison_racct == NULL)
 1118                         return (0);
 1119                 break;
 1120         default:
 1121                 panic("rctl_rule_fully_specified: unknown subject type %d",
 1122                     rule->rr_subject_type);
 1123         }
 1124         if (rule->rr_resource == RACCT_UNDEFINED)
 1125                 return (0);
 1126         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
 1127                 return (0);
 1128         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
 1129                 return (0);
 1130         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
 1131                 return (0);
 1132 
 1133         return (1);
 1134 }
 1135 
 1136 static int
 1137 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
 1138 {
 1139         struct rctl_rule *rule;
 1140         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
 1141              *amountstr, *perstr;
 1142         id_t id;
 1143         int error = 0;
 1144 
 1145         ASSERT_RACCT_ENABLED();
 1146 
 1147         rule = rctl_rule_alloc(M_WAITOK);
 1148 
 1149         subjectstr = strsep(&rulestr, ":");
 1150         subject_idstr = strsep(&rulestr, ":");
 1151         resourcestr = strsep(&rulestr, ":");
 1152         actionstr = strsep(&rulestr, "=/");
 1153         amountstr = strsep(&rulestr, "/");
 1154         perstr = rulestr;
 1155 
 1156         if (subjectstr == NULL || subjectstr[0] == '\0')
 1157                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1158         else {
 1159                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
 1160                 if (error != 0)
 1161                         goto out;
 1162         }
 1163 
 1164         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
 1165                 rule->rr_subject.rs_proc = NULL;
 1166                 rule->rr_subject.rs_uip = NULL;
 1167                 rule->rr_subject.rs_loginclass = NULL;
 1168                 rule->rr_subject.rs_prison_racct = NULL;
 1169         } else {
 1170                 switch (rule->rr_subject_type) {
 1171                 case RCTL_SUBJECT_TYPE_UNDEFINED:
 1172                         error = EINVAL;
 1173                         goto out;
 1174                 case RCTL_SUBJECT_TYPE_PROCESS:
 1175                         error = str2id(subject_idstr, &id);
 1176                         if (error != 0)
 1177                                 goto out;
 1178                         sx_assert(&allproc_lock, SA_LOCKED);
 1179                         rule->rr_subject.rs_proc = pfind(id);
 1180                         if (rule->rr_subject.rs_proc == NULL) {
 1181                                 error = ESRCH;
 1182                                 goto out;
 1183                         }
 1184                         PROC_UNLOCK(rule->rr_subject.rs_proc);
 1185                         break;
 1186                 case RCTL_SUBJECT_TYPE_USER:
 1187                         error = str2id(subject_idstr, &id);
 1188                         if (error != 0)
 1189                                 goto out;
 1190                         rule->rr_subject.rs_uip = uifind(id);
 1191                         break;
 1192                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1193                         rule->rr_subject.rs_loginclass =
 1194                             loginclass_find(subject_idstr);
 1195                         if (rule->rr_subject.rs_loginclass == NULL) {
 1196                                 error = ENAMETOOLONG;
 1197                                 goto out;
 1198                         }
 1199                         break;
 1200                 case RCTL_SUBJECT_TYPE_JAIL:
 1201                         rule->rr_subject.rs_prison_racct =
 1202                             prison_racct_find(subject_idstr);
 1203                         if (rule->rr_subject.rs_prison_racct == NULL) {
 1204                                 error = ENAMETOOLONG;
 1205                                 goto out;
 1206                         }
 1207                         break;
 1208                default:
 1209                        panic("rctl_string_to_rule: unknown subject type %d",
 1210                            rule->rr_subject_type);
 1211                }
 1212         }
 1213 
 1214         if (resourcestr == NULL || resourcestr[0] == '\0')
 1215                 rule->rr_resource = RACCT_UNDEFINED;
 1216         else {
 1217                 error = str2value(resourcestr, &rule->rr_resource,
 1218                     resourcenames);
 1219                 if (error != 0)
 1220                         goto out;
 1221         }
 1222 
 1223         if (actionstr == NULL || actionstr[0] == '\0')
 1224                 rule->rr_action = RCTL_ACTION_UNDEFINED;
 1225         else {
 1226                 error = str2value(actionstr, &rule->rr_action, actionnames);
 1227                 if (error != 0)
 1228                         goto out;
 1229         }
 1230 
 1231         if (amountstr == NULL || amountstr[0] == '\0')
 1232                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1233         else {
 1234                 error = str2int64(amountstr, &rule->rr_amount);
 1235                 if (error != 0)
 1236                         goto out;
 1237                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
 1238                         if (rule->rr_amount > INT64_MAX / 1000000) {
 1239                                 error = ERANGE;
 1240                                 goto out;
 1241                         }
 1242                         rule->rr_amount *= 1000000;
 1243                 }
 1244         }
 1245 
 1246         if (perstr == NULL || perstr[0] == '\0')
 1247                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1248         else {
 1249                 error = str2value(perstr, &rule->rr_per, subjectnames);
 1250                 if (error != 0)
 1251                         goto out;
 1252         }
 1253 
 1254 out:
 1255         if (error == 0)
 1256                 *rulep = rule;
 1257         else
 1258                 rctl_rule_release(rule);
 1259 
 1260         return (error);
 1261 }
 1262 
 1263 /*
 1264  * Link a rule with all the subjects it applies to.
 1265  */
 1266 int
 1267 rctl_rule_add(struct rctl_rule *rule)
 1268 {
 1269         struct proc *p;
 1270         struct ucred *cred;
 1271         struct uidinfo *uip;
 1272         struct prison *pr;
 1273         struct prison_racct *prr;
 1274         struct loginclass *lc;
 1275         struct rctl_rule *rule2;
 1276         int match;
 1277 
 1278         ASSERT_RACCT_ENABLED();
 1279         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 1280 
 1281         /*
 1282          * Some rules just don't make sense, like "deny" rule for an undeniable
 1283          * resource.  The exception are the RSS and %CPU resources - they are
 1284          * not deniable in the racct sense, but the limit is enforced in
 1285          * a different way.
 1286          */
 1287         if (rule->rr_action == RCTL_ACTION_DENY &&
 1288             !RACCT_IS_DENIABLE(rule->rr_resource) &&
 1289             rule->rr_resource != RACCT_RSS &&
 1290             rule->rr_resource != RACCT_PCTCPU) {
 1291                 return (EOPNOTSUPP);
 1292         }
 1293 
 1294         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1295             !RACCT_IS_DECAYING(rule->rr_resource)) {
 1296                 return (EOPNOTSUPP);
 1297         }
 1298 
 1299         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1300             rule->rr_resource == RACCT_PCTCPU) {
 1301                 return (EOPNOTSUPP);
 1302         }
 1303 
 1304         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
 1305             RACCT_IS_SLOPPY(rule->rr_resource)) {
 1306                 return (EOPNOTSUPP);
 1307         }
 1308 
 1309         /*
 1310          * Make sure there are no duplicated rules.  Also, for the "deny"
 1311          * rules, remove ones differing only by "amount".
 1312          */
 1313         if (rule->rr_action == RCTL_ACTION_DENY) {
 1314                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
 1315                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1316                 rctl_rule_remove(rule2);
 1317                 rctl_rule_release(rule2);
 1318         } else
 1319                 rctl_rule_remove(rule);
 1320 
 1321         switch (rule->rr_subject_type) {
 1322         case RCTL_SUBJECT_TYPE_PROCESS:
 1323                 p = rule->rr_subject.rs_proc;
 1324                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
 1325 
 1326                 rctl_racct_add_rule(p->p_racct, rule);
 1327                 /*
 1328                  * In case of per-process rule, we don't have anything more
 1329                  * to do.
 1330                  */
 1331                 return (0);
 1332 
 1333         case RCTL_SUBJECT_TYPE_USER:
 1334                 uip = rule->rr_subject.rs_uip;
 1335                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
 1336                 rctl_racct_add_rule(uip->ui_racct, rule);
 1337                 break;
 1338 
 1339         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1340                 lc = rule->rr_subject.rs_loginclass;
 1341                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
 1342                 rctl_racct_add_rule(lc->lc_racct, rule);
 1343                 break;
 1344 
 1345         case RCTL_SUBJECT_TYPE_JAIL:
 1346                 prr = rule->rr_subject.rs_prison_racct;
 1347                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
 1348                 rctl_racct_add_rule(prr->prr_racct, rule);
 1349                 break;
 1350 
 1351         default:
 1352                 panic("rctl_rule_add: unknown subject type %d",
 1353                     rule->rr_subject_type);
 1354         }
 1355 
 1356         /*
 1357          * Now go through all the processes and add the new rule to the ones
 1358          * it applies to.
 1359          */
 1360         sx_assert(&allproc_lock, SA_LOCKED);
 1361         FOREACH_PROC_IN_SYSTEM(p) {
 1362                 cred = p->p_ucred;
 1363                 switch (rule->rr_subject_type) {
 1364                 case RCTL_SUBJECT_TYPE_USER:
 1365                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
 1366                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
 1367                                 break;
 1368                         continue;
 1369                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1370                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
 1371                                 break;
 1372                         continue;
 1373                 case RCTL_SUBJECT_TYPE_JAIL:
 1374                         match = 0;
 1375                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
 1376                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
 1377                                         match = 1;
 1378                                         break;
 1379                                 }
 1380                         }
 1381                         if (match)
 1382                                 break;
 1383                         continue;
 1384                 default:
 1385                         panic("rctl_rule_add: unknown subject type %d",
 1386                             rule->rr_subject_type);
 1387                 }
 1388 
 1389                 rctl_racct_add_rule(p->p_racct, rule);
 1390         }
 1391 
 1392         return (0);
 1393 }
 1394 
 1395 static void
 1396 rctl_rule_pre_callback(void)
 1397 {
 1398 
 1399         RACCT_LOCK();
 1400 }
 1401 
 1402 static void
 1403 rctl_rule_post_callback(void)
 1404 {
 1405 
 1406         RACCT_UNLOCK();
 1407 }
 1408 
 1409 static void
 1410 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
 1411 {
 1412         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1413         int found = 0;
 1414 
 1415         ASSERT_RACCT_ENABLED();
 1416         RACCT_LOCK_ASSERT();
 1417 
 1418         found += rctl_racct_remove_rules(racct, filter);
 1419 
 1420         *((int *)arg3) += found;
 1421 }
 1422 
 1423 /*
 1424  * Remove all rules that match the filter.
 1425  */
 1426 int
 1427 rctl_rule_remove(struct rctl_rule *filter)
 1428 {
 1429         struct proc *p;
 1430         int found = 0;
 1431 
 1432         ASSERT_RACCT_ENABLED();
 1433 
 1434         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
 1435             filter->rr_subject.rs_proc != NULL) {
 1436                 p = filter->rr_subject.rs_proc;
 1437                 RACCT_LOCK();
 1438                 found = rctl_racct_remove_rules(p->p_racct, filter);
 1439                 RACCT_UNLOCK();
 1440                 if (found)
 1441                         return (0);
 1442                 return (ESRCH);
 1443         }
 1444 
 1445         loginclass_racct_foreach(rctl_rule_remove_callback,
 1446             rctl_rule_pre_callback, rctl_rule_post_callback,
 1447             filter, (void *)&found);
 1448         ui_racct_foreach(rctl_rule_remove_callback,
 1449             rctl_rule_pre_callback, rctl_rule_post_callback,
 1450             filter, (void *)&found);
 1451         prison_racct_foreach(rctl_rule_remove_callback,
 1452             rctl_rule_pre_callback, rctl_rule_post_callback,
 1453             filter, (void *)&found);
 1454 
 1455         sx_assert(&allproc_lock, SA_LOCKED);
 1456         RACCT_LOCK();
 1457         FOREACH_PROC_IN_SYSTEM(p) {
 1458                 found += rctl_racct_remove_rules(p->p_racct, filter);
 1459         }
 1460         RACCT_UNLOCK();
 1461 
 1462         if (found)
 1463                 return (0);
 1464         return (ESRCH);
 1465 }
 1466 
 1467 /*
 1468  * Appends a rule to the sbuf.
 1469  */
 1470 static void
 1471 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
 1472 {
 1473         int64_t amount;
 1474 
 1475         ASSERT_RACCT_ENABLED();
 1476 
 1477         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
 1478 
 1479         switch (rule->rr_subject_type) {
 1480         case RCTL_SUBJECT_TYPE_PROCESS:
 1481                 if (rule->rr_subject.rs_proc == NULL)
 1482                         sbuf_printf(sb, ":");
 1483                 else
 1484                         sbuf_printf(sb, "%d:",
 1485                             rule->rr_subject.rs_proc->p_pid);
 1486                 break;
 1487         case RCTL_SUBJECT_TYPE_USER:
 1488                 if (rule->rr_subject.rs_uip == NULL)
 1489                         sbuf_printf(sb, ":");
 1490                 else
 1491                         sbuf_printf(sb, "%d:",
 1492                             rule->rr_subject.rs_uip->ui_uid);
 1493                 break;
 1494         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1495                 if (rule->rr_subject.rs_loginclass == NULL)
 1496                         sbuf_printf(sb, ":");
 1497                 else
 1498                         sbuf_printf(sb, "%s:",
 1499                             rule->rr_subject.rs_loginclass->lc_name);
 1500                 break;
 1501         case RCTL_SUBJECT_TYPE_JAIL:
 1502                 if (rule->rr_subject.rs_prison_racct == NULL)
 1503                         sbuf_printf(sb, ":");
 1504                 else
 1505                         sbuf_printf(sb, "%s:",
 1506                             rule->rr_subject.rs_prison_racct->prr_name);
 1507                 break;
 1508         default:
 1509                 panic("rctl_rule_to_sbuf: unknown subject type %d",
 1510                     rule->rr_subject_type);
 1511         }
 1512 
 1513         amount = rule->rr_amount;
 1514         if (amount != RCTL_AMOUNT_UNDEFINED &&
 1515             RACCT_IS_IN_MILLIONS(rule->rr_resource))
 1516                 amount /= 1000000;
 1517 
 1518         sbuf_printf(sb, "%s:%s=%jd",
 1519             rctl_resource_name(rule->rr_resource),
 1520             rctl_action_name(rule->rr_action),
 1521             amount);
 1522 
 1523         if (rule->rr_per != rule->rr_subject_type)
 1524                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
 1525 }
 1526 
 1527 /*
 1528  * Routine used by RCTL syscalls to read in input string.
 1529  */
 1530 static int
 1531 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
 1532 {
 1533         char *str;
 1534         int error;
 1535 
 1536         ASSERT_RACCT_ENABLED();
 1537 
 1538         if (inbuflen <= 0)
 1539                 return (EINVAL);
 1540         if (inbuflen > RCTL_MAX_INBUFSIZE)
 1541                 return (E2BIG);
 1542 
 1543         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
 1544         error = copyinstr(inbufp, str, inbuflen, NULL);
 1545         if (error != 0) {
 1546                 free(str, M_RCTL);
 1547                 return (error);
 1548         }
 1549 
 1550         *inputstr = str;
 1551 
 1552         return (0);
 1553 }
 1554 
 1555 /*
 1556  * Routine used by RCTL syscalls to write out output string.
 1557  */
 1558 static int
 1559 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
 1560 {
 1561         int error;
 1562 
 1563         ASSERT_RACCT_ENABLED();
 1564 
 1565         if (outputsbuf == NULL)
 1566                 return (0);
 1567 
 1568         sbuf_finish(outputsbuf);
 1569         if (outbuflen < sbuf_len(outputsbuf) + 1) {
 1570                 sbuf_delete(outputsbuf);
 1571                 return (ERANGE);
 1572         }
 1573         error = copyout(sbuf_data(outputsbuf), outbufp,
 1574             sbuf_len(outputsbuf) + 1);
 1575         sbuf_delete(outputsbuf);
 1576         return (error);
 1577 }
 1578 
 1579 static struct sbuf *
 1580 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
 1581 {
 1582         struct sbuf *sb;
 1583         int64_t amount;
 1584         int i;
 1585 
 1586         ASSERT_RACCT_ENABLED();
 1587 
 1588         sb = sbuf_new_auto();
 1589         for (i = 0; i <= RACCT_MAX; i++) {
 1590                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
 1591                         continue;
 1592                 RACCT_LOCK();
 1593                 amount = racct->r_resources[i];
 1594                 RACCT_UNLOCK();
 1595                 if (RACCT_IS_IN_MILLIONS(i))
 1596                         amount /= 1000000;
 1597                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
 1598         }
 1599         sbuf_setpos(sb, sbuf_len(sb) - 1);
 1600         return (sb);
 1601 }
 1602 
 1603 int
 1604 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 1605 {
 1606         struct rctl_rule *filter;
 1607         struct sbuf *outputsbuf = NULL;
 1608         struct proc *p;
 1609         struct uidinfo *uip;
 1610         struct loginclass *lc;
 1611         struct prison_racct *prr;
 1612         char *inputstr;
 1613         int error;
 1614 
 1615         if (!racct_enable)
 1616                 return (ENOSYS);
 1617 
 1618         error = priv_check(td, PRIV_RCTL_GET_RACCT);
 1619         if (error != 0)
 1620                 return (error);
 1621 
 1622         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1623         if (error != 0)
 1624                 return (error);
 1625 
 1626         sx_slock(&allproc_lock);
 1627         error = rctl_string_to_rule(inputstr, &filter);
 1628         free(inputstr, M_RCTL);
 1629         if (error != 0) {
 1630                 sx_sunlock(&allproc_lock);
 1631                 return (error);
 1632         }
 1633 
 1634         switch (filter->rr_subject_type) {
 1635         case RCTL_SUBJECT_TYPE_PROCESS:
 1636                 p = filter->rr_subject.rs_proc;
 1637                 if (p == NULL) {
 1638                         error = EINVAL;
 1639                         goto out;
 1640                 }
 1641                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
 1642                 break;
 1643         case RCTL_SUBJECT_TYPE_USER:
 1644                 uip = filter->rr_subject.rs_uip;
 1645                 if (uip == NULL) {
 1646                         error = EINVAL;
 1647                         goto out;
 1648                 }
 1649                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
 1650                 break;
 1651         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1652                 lc = filter->rr_subject.rs_loginclass;
 1653                 if (lc == NULL) {
 1654                         error = EINVAL;
 1655                         goto out;
 1656                 }
 1657                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
 1658                 break;
 1659         case RCTL_SUBJECT_TYPE_JAIL:
 1660                 prr = filter->rr_subject.rs_prison_racct;
 1661                 if (prr == NULL) {
 1662                         error = EINVAL;
 1663                         goto out;
 1664                 }
 1665                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
 1666                 break;
 1667         default:
 1668                 error = EINVAL;
 1669         }
 1670 out:
 1671         rctl_rule_release(filter);
 1672         sx_sunlock(&allproc_lock);
 1673         if (error != 0)
 1674                 return (error);
 1675 
 1676         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
 1677 
 1678         return (error);
 1679 }
 1680 
 1681 static void
 1682 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
 1683 {
 1684         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1685         struct rctl_rule_link *link;
 1686         struct sbuf *sb = (struct sbuf *)arg3;
 1687 
 1688         ASSERT_RACCT_ENABLED();
 1689         RACCT_LOCK_ASSERT();
 1690 
 1691         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
 1692                 if (!rctl_rule_matches(link->rrl_rule, filter))
 1693                         continue;
 1694                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1695                 sbuf_printf(sb, ",");
 1696         }
 1697 }
 1698 
 1699 int
 1700 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 1701 {
 1702         struct sbuf *sb;
 1703         struct rctl_rule *filter;
 1704         struct rctl_rule_link *link;
 1705         struct proc *p;
 1706         char *inputstr, *buf;
 1707         size_t bufsize;
 1708         int error;
 1709 
 1710         if (!racct_enable)
 1711                 return (ENOSYS);
 1712 
 1713         error = priv_check(td, PRIV_RCTL_GET_RULES);
 1714         if (error != 0)
 1715                 return (error);
 1716 
 1717         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1718         if (error != 0)
 1719                 return (error);
 1720 
 1721         sx_slock(&allproc_lock);
 1722         error = rctl_string_to_rule(inputstr, &filter);
 1723         free(inputstr, M_RCTL);
 1724         if (error != 0) {
 1725                 sx_sunlock(&allproc_lock);
 1726                 return (error);
 1727         }
 1728 
 1729         bufsize = uap->outbuflen;
 1730         if (bufsize > rctl_maxbufsize) {
 1731                 sx_sunlock(&allproc_lock);
 1732                 return (E2BIG);
 1733         }
 1734 
 1735         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1736         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1737         KASSERT(sb != NULL, ("sbuf_new failed"));
 1738 
 1739         FOREACH_PROC_IN_SYSTEM(p) {
 1740                 RACCT_LOCK();
 1741                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1742                         /*
 1743                          * Non-process rules will be added to the buffer later.
 1744                          * Adding them here would result in duplicated output.
 1745                          */
 1746                         if (link->rrl_rule->rr_subject_type !=
 1747                             RCTL_SUBJECT_TYPE_PROCESS)
 1748                                 continue;
 1749                         if (!rctl_rule_matches(link->rrl_rule, filter))
 1750                                 continue;
 1751                         rctl_rule_to_sbuf(sb, link->rrl_rule);
 1752                         sbuf_printf(sb, ",");
 1753                 }
 1754                 RACCT_UNLOCK();
 1755         }
 1756 
 1757         loginclass_racct_foreach(rctl_get_rules_callback,
 1758             rctl_rule_pre_callback, rctl_rule_post_callback,
 1759             filter, sb);
 1760         ui_racct_foreach(rctl_get_rules_callback,
 1761             rctl_rule_pre_callback, rctl_rule_post_callback,
 1762             filter, sb);
 1763         prison_racct_foreach(rctl_get_rules_callback,
 1764             rctl_rule_pre_callback, rctl_rule_post_callback,
 1765             filter, sb);
 1766         if (sbuf_error(sb) == ENOMEM) {
 1767                 error = ERANGE;
 1768                 goto out;
 1769         }
 1770 
 1771         /*
 1772          * Remove trailing ",".
 1773          */
 1774         if (sbuf_len(sb) > 0)
 1775                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1776 
 1777         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1778 out:
 1779         rctl_rule_release(filter);
 1780         sx_sunlock(&allproc_lock);
 1781         free(buf, M_RCTL);
 1782         return (error);
 1783 }
 1784 
 1785 int
 1786 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 1787 {
 1788         struct sbuf *sb;
 1789         struct rctl_rule *filter;
 1790         struct rctl_rule_link *link;
 1791         char *inputstr, *buf;
 1792         size_t bufsize;
 1793         int error;
 1794 
 1795         if (!racct_enable)
 1796                 return (ENOSYS);
 1797 
 1798         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
 1799         if (error != 0)
 1800                 return (error);
 1801 
 1802         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1803         if (error != 0)
 1804                 return (error);
 1805 
 1806         sx_slock(&allproc_lock);
 1807         error = rctl_string_to_rule(inputstr, &filter);
 1808         free(inputstr, M_RCTL);
 1809         if (error != 0) {
 1810                 sx_sunlock(&allproc_lock);
 1811                 return (error);
 1812         }
 1813 
 1814         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
 1815                 rctl_rule_release(filter);
 1816                 sx_sunlock(&allproc_lock);
 1817                 return (EINVAL);
 1818         }
 1819         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
 1820                 rctl_rule_release(filter);
 1821                 sx_sunlock(&allproc_lock);
 1822                 return (EOPNOTSUPP);
 1823         }
 1824         if (filter->rr_subject.rs_proc == NULL) {
 1825                 rctl_rule_release(filter);
 1826                 sx_sunlock(&allproc_lock);
 1827                 return (EINVAL);
 1828         }
 1829 
 1830         bufsize = uap->outbuflen;
 1831         if (bufsize > rctl_maxbufsize) {
 1832                 rctl_rule_release(filter);
 1833                 sx_sunlock(&allproc_lock);
 1834                 return (E2BIG);
 1835         }
 1836 
 1837         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1838         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1839         KASSERT(sb != NULL, ("sbuf_new failed"));
 1840 
 1841         RACCT_LOCK();
 1842         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
 1843             rrl_next) {
 1844                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1845                 sbuf_printf(sb, ",");
 1846         }
 1847         RACCT_UNLOCK();
 1848         if (sbuf_error(sb) == ENOMEM) {
 1849                 error = ERANGE;
 1850                 sbuf_delete(sb);
 1851                 goto out;
 1852         }
 1853 
 1854         /*
 1855          * Remove trailing ",".
 1856          */
 1857         if (sbuf_len(sb) > 0)
 1858                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1859 
 1860         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1861 out:
 1862         rctl_rule_release(filter);
 1863         sx_sunlock(&allproc_lock);
 1864         free(buf, M_RCTL);
 1865         return (error);
 1866 }
 1867 
 1868 int
 1869 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 1870 {
 1871         struct rctl_rule *rule;
 1872         char *inputstr;
 1873         int error;
 1874 
 1875         if (!racct_enable)
 1876                 return (ENOSYS);
 1877 
 1878         error = priv_check(td, PRIV_RCTL_ADD_RULE);
 1879         if (error != 0)
 1880                 return (error);
 1881 
 1882         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1883         if (error != 0)
 1884                 return (error);
 1885 
 1886         sx_slock(&allproc_lock);
 1887         error = rctl_string_to_rule(inputstr, &rule);
 1888         free(inputstr, M_RCTL);
 1889         if (error != 0) {
 1890                 sx_sunlock(&allproc_lock);
 1891                 return (error);
 1892         }
 1893         /*
 1894          * The 'per' part of a rule is optional.
 1895          */
 1896         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
 1897             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
 1898                 rule->rr_per = rule->rr_subject_type;
 1899 
 1900         if (!rctl_rule_fully_specified(rule)) {
 1901                 error = EINVAL;
 1902                 goto out;
 1903         }
 1904 
 1905         error = rctl_rule_add(rule);
 1906 
 1907 out:
 1908         rctl_rule_release(rule);
 1909         sx_sunlock(&allproc_lock);
 1910         return (error);
 1911 }
 1912 
 1913 int
 1914 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 1915 {
 1916         struct rctl_rule *filter;
 1917         char *inputstr;
 1918         int error;
 1919 
 1920         if (!racct_enable)
 1921                 return (ENOSYS);
 1922 
 1923         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
 1924         if (error != 0)
 1925                 return (error);
 1926 
 1927         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1928         if (error != 0)
 1929                 return (error);
 1930 
 1931         sx_slock(&allproc_lock);
 1932         error = rctl_string_to_rule(inputstr, &filter);
 1933         free(inputstr, M_RCTL);
 1934         if (error != 0) {
 1935                 sx_sunlock(&allproc_lock);
 1936                 return (error);
 1937         }
 1938 
 1939         error = rctl_rule_remove(filter);
 1940         rctl_rule_release(filter);
 1941         sx_sunlock(&allproc_lock);
 1942 
 1943         return (error);
 1944 }
 1945 
 1946 /*
 1947  * Update RCTL rule list after credential change.
 1948  */
 1949 void
 1950 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
 1951 {
 1952         LIST_HEAD(, rctl_rule_link) newrules;
 1953         struct rctl_rule_link *link, *newlink;
 1954         struct uidinfo *newuip;
 1955         struct loginclass *newlc;
 1956         struct prison_racct *newprr;
 1957         int rulecnt, i;
 1958 
 1959         if (!racct_enable)
 1960                 return;
 1961 
 1962         PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 1963 
 1964         newuip = newcred->cr_ruidinfo;
 1965         newlc = newcred->cr_loginclass;
 1966         newprr = newcred->cr_prison->pr_prison_racct;
 1967 
 1968         LIST_INIT(&newrules);
 1969 
 1970 again:
 1971         /*
 1972          * First, count the rules that apply to the process with new
 1973          * credentials.
 1974          */
 1975         rulecnt = 0;
 1976         RACCT_LOCK();
 1977         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1978                 if (link->rrl_rule->rr_subject_type ==
 1979                     RCTL_SUBJECT_TYPE_PROCESS)
 1980                         rulecnt++;
 1981         }
 1982         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
 1983                 rulecnt++;
 1984         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
 1985                 rulecnt++;
 1986         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
 1987                 rulecnt++;
 1988         RACCT_UNLOCK();
 1989 
 1990         /*
 1991          * Create temporary list.  We've dropped the rctl_lock in order
 1992          * to use M_WAITOK.
 1993          */
 1994         for (i = 0; i < rulecnt; i++) {
 1995                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
 1996                 newlink->rrl_rule = NULL;
 1997                 newlink->rrl_exceeded = 0;
 1998                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
 1999         }
 2000 
 2001         newlink = LIST_FIRST(&newrules);
 2002 
 2003         /*
 2004          * Assign rules to the newly allocated list entries.
 2005          */
 2006         RACCT_LOCK();
 2007         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 2008                 if (link->rrl_rule->rr_subject_type ==
 2009                     RCTL_SUBJECT_TYPE_PROCESS) {
 2010                         if (newlink == NULL)
 2011                                 goto goaround;
 2012                         rctl_rule_acquire(link->rrl_rule);
 2013                         newlink->rrl_rule = link->rrl_rule;
 2014                         newlink->rrl_exceeded = link->rrl_exceeded;
 2015                         newlink = LIST_NEXT(newlink, rrl_next);
 2016                         rulecnt--;
 2017                 }
 2018         }
 2019         
 2020         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
 2021                 if (newlink == NULL)
 2022                         goto goaround;
 2023                 rctl_rule_acquire(link->rrl_rule);
 2024                 newlink->rrl_rule = link->rrl_rule;
 2025                 newlink->rrl_exceeded = link->rrl_exceeded;
 2026                 newlink = LIST_NEXT(newlink, rrl_next);
 2027                 rulecnt--;
 2028         }
 2029 
 2030         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
 2031                 if (newlink == NULL)
 2032                         goto goaround;
 2033                 rctl_rule_acquire(link->rrl_rule);
 2034                 newlink->rrl_rule = link->rrl_rule;
 2035                 newlink->rrl_exceeded = link->rrl_exceeded;
 2036                 newlink = LIST_NEXT(newlink, rrl_next);
 2037                 rulecnt--;
 2038         }
 2039 
 2040         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
 2041                 if (newlink == NULL)
 2042                         goto goaround;
 2043                 rctl_rule_acquire(link->rrl_rule);
 2044                 newlink->rrl_rule = link->rrl_rule;
 2045                 newlink->rrl_exceeded = link->rrl_exceeded;
 2046                 newlink = LIST_NEXT(newlink, rrl_next);
 2047                 rulecnt--;
 2048         }
 2049 
 2050         if (rulecnt == 0) {
 2051                 /*
 2052                  * Free the old rule list.
 2053                  */
 2054                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
 2055                         link = LIST_FIRST(&p->p_racct->r_rule_links);
 2056                         LIST_REMOVE(link, rrl_next);
 2057                         rctl_rule_release(link->rrl_rule);
 2058                         uma_zfree(rctl_rule_link_zone, link);
 2059                 }
 2060 
 2061                 /*
 2062                  * Replace lists and we're done.
 2063                  *
 2064                  * XXX: Is there any way to switch list heads instead
 2065                  *      of iterating here?
 2066                  */
 2067                 while (!LIST_EMPTY(&newrules)) {
 2068                         newlink = LIST_FIRST(&newrules);
 2069                         LIST_REMOVE(newlink, rrl_next);
 2070                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
 2071                             newlink, rrl_next);
 2072                 }
 2073 
 2074                 RACCT_UNLOCK();
 2075 
 2076                 return;
 2077         }
 2078 
 2079 goaround:
 2080         RACCT_UNLOCK();
 2081 
 2082         /*
 2083          * Rule list changed while we were not holding the rctl_lock.
 2084          * Free the new list and try again.
 2085          */
 2086         while (!LIST_EMPTY(&newrules)) {
 2087                 newlink = LIST_FIRST(&newrules);
 2088                 LIST_REMOVE(newlink, rrl_next);
 2089                 if (newlink->rrl_rule != NULL)
 2090                         rctl_rule_release(newlink->rrl_rule);
 2091                 uma_zfree(rctl_rule_link_zone, newlink);
 2092         }
 2093 
 2094         goto again;
 2095 }
 2096 
 2097 /*
 2098  * Assign RCTL rules to the newly created process.
 2099  */
 2100 int
 2101 rctl_proc_fork(struct proc *parent, struct proc *child)
 2102 {
 2103         struct rctl_rule *rule;
 2104         struct rctl_rule_link *link;
 2105         int error;
 2106 
 2107         ASSERT_RACCT_ENABLED();
 2108         RACCT_LOCK_ASSERT();
 2109         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
 2110 
 2111         LIST_INIT(&child->p_racct->r_rule_links);
 2112 
 2113         /*
 2114          * Go through limits applicable to the parent and assign them
 2115          * to the child.  Rules with 'process' subject have to be duplicated
 2116          * in order to make their rr_subject point to the new process.
 2117          */
 2118         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
 2119                 if (link->rrl_rule->rr_subject_type ==
 2120                     RCTL_SUBJECT_TYPE_PROCESS) {
 2121                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
 2122                         if (rule == NULL)
 2123                                 goto fail;
 2124                         KASSERT(rule->rr_subject.rs_proc == parent,
 2125                             ("rule->rr_subject.rs_proc != parent"));
 2126                         rule->rr_subject.rs_proc = child;
 2127                         error = rctl_racct_add_rule_locked(child->p_racct,
 2128                             rule);
 2129                         rctl_rule_release(rule);
 2130                         if (error != 0)
 2131                                 goto fail;
 2132                 } else {
 2133                         error = rctl_racct_add_rule_locked(child->p_racct,
 2134                             link->rrl_rule);
 2135                         if (error != 0)
 2136                                 goto fail;
 2137                 }
 2138         }
 2139 
 2140         return (0);
 2141 
 2142 fail:
 2143         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
 2144                 link = LIST_FIRST(&child->p_racct->r_rule_links);
 2145                 LIST_REMOVE(link, rrl_next);
 2146                 rctl_rule_release(link->rrl_rule);
 2147                 uma_zfree(rctl_rule_link_zone, link);
 2148         }
 2149 
 2150         return (EAGAIN);
 2151 }
 2152 
 2153 /*
 2154  * Release rules attached to the racct.
 2155  */
 2156 void
 2157 rctl_racct_release(struct racct *racct)
 2158 {
 2159         struct rctl_rule_link *link;
 2160 
 2161         ASSERT_RACCT_ENABLED();
 2162         RACCT_LOCK_ASSERT();
 2163 
 2164         while (!LIST_EMPTY(&racct->r_rule_links)) {
 2165                 link = LIST_FIRST(&racct->r_rule_links);
 2166                 LIST_REMOVE(link, rrl_next);
 2167                 rctl_rule_release(link->rrl_rule);
 2168                 uma_zfree(rctl_rule_link_zone, link);
 2169         }
 2170 }
 2171 
 2172 static void
 2173 rctl_init(void)
 2174 {
 2175 
 2176         if (!racct_enable)
 2177                 return;
 2178 
 2179         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
 2180             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 2181         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
 2182             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
 2183             UMA_ALIGN_PTR, 0);
 2184 
 2185         /*
 2186          * Set default values, making sure not to overwrite the ones
 2187          * fetched from tunables.  Most of those could be set at the
 2188          * declaration, except for the rctl_throttle_max - we cannot
 2189          * set it there due to hz not being compile time constant.
 2190          */
 2191         if (rctl_throttle_min < 1)
 2192                 rctl_throttle_min = 1;
 2193         if (rctl_throttle_max < rctl_throttle_min)
 2194                 rctl_throttle_max = 2 * hz;
 2195         if (rctl_throttle_pct < 0)
 2196                 rctl_throttle_pct = 100;
 2197         if (rctl_throttle_pct2 < 0)
 2198                 rctl_throttle_pct2 = 100;
 2199 }
 2200 
 2201 #else /* !RCTL */
 2202 
 2203 int
 2204 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 2205 {
 2206         
 2207         return (ENOSYS);
 2208 }
 2209 
 2210 int
 2211 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 2212 {
 2213         
 2214         return (ENOSYS);
 2215 }
 2216 
 2217 int
 2218 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 2219 {
 2220         
 2221         return (ENOSYS);
 2222 }
 2223 
 2224 int
 2225 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 2226 {
 2227         
 2228         return (ENOSYS);
 2229 }
 2230 
 2231 int
 2232 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 2233 {
 2234         
 2235         return (ENOSYS);
 2236 }
 2237 
 2238 #endif /* !RCTL */

Cache object: 77f35f979d2db22d328d309814a9a5c7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.