The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2010 The FreeBSD Foundation
    5  *
    6  * This software was developed by Edward Tomasz Napierala under sponsorship
    7  * from the FreeBSD Foundation.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  * $FreeBSD$
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __FBSDID("$FreeBSD$");
   35 
   36 #include <sys/param.h>
   37 #include <sys/devctl.h>
   38 #include <sys/malloc.h>
   39 #include <sys/queue.h>
   40 #include <sys/refcount.h>
   41 #include <sys/jail.h>
   42 #include <sys/kernel.h>
   43 #include <sys/limits.h>
   44 #include <sys/loginclass.h>
   45 #include <sys/priv.h>
   46 #include <sys/proc.h>
   47 #include <sys/racct.h>
   48 #include <sys/rctl.h>
   49 #include <sys/resourcevar.h>
   50 #include <sys/sx.h>
   51 #include <sys/sysproto.h>
   52 #include <sys/systm.h>
   53 #include <sys/types.h>
   54 #include <sys/eventhandler.h>
   55 #include <sys/lock.h>
   56 #include <sys/mutex.h>
   57 #include <sys/rwlock.h>
   58 #include <sys/sbuf.h>
   59 #include <sys/taskqueue.h>
   60 #include <sys/tree.h>
   61 #include <vm/uma.h>
   62 
   63 #ifdef RCTL
   64 #ifndef RACCT
   65 #error "The RCTL option requires the RACCT option"
   66 #endif
   67 
   68 FEATURE(rctl, "Resource Limits");
   69 
   70 #define HRF_DEFAULT             0
   71 #define HRF_DONT_INHERIT        1
   72 #define HRF_DONT_ACCUMULATE     2
   73 
   74 #define RCTL_MAX_INBUFSIZE      4 * 1024
   75 #define RCTL_MAX_OUTBUFSIZE     16 * 1024 * 1024
   76 #define RCTL_LOG_BUFSIZE        128
   77 
   78 #define RCTL_PCPU_SHIFT         (10 * 1000000)
   79 
   80 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
   81 static int rctl_log_rate_limit = 10;
   82 static int rctl_devctl_rate_limit = 10;
   83 
   84 /*
   85  * Values below are initialized in rctl_init().
   86  */
   87 static int rctl_throttle_min = -1;
   88 static int rctl_throttle_max = -1;
   89 static int rctl_throttle_pct = -1;
   90 static int rctl_throttle_pct2 = -1;
   91 
   92 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
   93 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
   94 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
   95 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
   96 
   97 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
   98     "Resource Limits");
   99 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
  100     &rctl_maxbufsize, 0, "Maximum output buffer size");
  101 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
  102     &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
  103 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
  104     &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
  105 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
  106     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
  107     &rctl_throttle_min_sysctl, "IU",
  108     "Shortest throttling duration, in hz");
  109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
  110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
  111     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
  112     &rctl_throttle_max_sysctl, "IU",
  113     "Longest throttling duration, in hz");
  114 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
  115 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
  116     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
  117     &rctl_throttle_pct_sysctl, "IU",
  118     "Throttling penalty for process consumption, in percent");
  119 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
  120 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
  121     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
  122     &rctl_throttle_pct2_sysctl, "IU",
  123     "Throttling penalty for container consumption, in percent");
  124 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
  125 
  126 /*
  127  * 'rctl_rule_link' connects a rule with every racct it's related to.
  128  * For example, rule 'user:X:openfiles:deny=N/process' is linked
  129  * with uidinfo for user X, and to each process of that user.
  130  */
  131 struct rctl_rule_link {
  132         LIST_ENTRY(rctl_rule_link)      rrl_next;
  133         struct rctl_rule                *rrl_rule;
  134         int                             rrl_exceeded;
  135 };
  136 
  137 struct dict {
  138         const char      *d_name;
  139         int             d_value;
  140 };
  141 
  142 static struct dict subjectnames[] = {
  143         { "process", RCTL_SUBJECT_TYPE_PROCESS },
  144         { "user", RCTL_SUBJECT_TYPE_USER },
  145         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
  146         { "jail", RCTL_SUBJECT_TYPE_JAIL },
  147         { NULL, -1 }};
  148 
  149 static struct dict resourcenames[] = {
  150         { "cputime", RACCT_CPU },
  151         { "datasize", RACCT_DATA },
  152         { "stacksize", RACCT_STACK },
  153         { "coredumpsize", RACCT_CORE },
  154         { "memoryuse", RACCT_RSS },
  155         { "memorylocked", RACCT_MEMLOCK },
  156         { "maxproc", RACCT_NPROC },
  157         { "openfiles", RACCT_NOFILE },
  158         { "vmemoryuse", RACCT_VMEM },
  159         { "pseudoterminals", RACCT_NPTS },
  160         { "swapuse", RACCT_SWAP },
  161         { "nthr", RACCT_NTHR },
  162         { "msgqqueued", RACCT_MSGQQUEUED },
  163         { "msgqsize", RACCT_MSGQSIZE },
  164         { "nmsgq", RACCT_NMSGQ },
  165         { "nsem", RACCT_NSEM },
  166         { "nsemop", RACCT_NSEMOP },
  167         { "nshm", RACCT_NSHM },
  168         { "shmsize", RACCT_SHMSIZE },
  169         { "wallclock", RACCT_WALLCLOCK },
  170         { "pcpu", RACCT_PCTCPU },
  171         { "readbps", RACCT_READBPS },
  172         { "writebps", RACCT_WRITEBPS },
  173         { "readiops", RACCT_READIOPS },
  174         { "writeiops", RACCT_WRITEIOPS },
  175         { NULL, -1 }};
  176 
  177 static struct dict actionnames[] = {
  178         { "sighup", RCTL_ACTION_SIGHUP },
  179         { "sigint", RCTL_ACTION_SIGINT },
  180         { "sigquit", RCTL_ACTION_SIGQUIT },
  181         { "sigill", RCTL_ACTION_SIGILL },
  182         { "sigtrap", RCTL_ACTION_SIGTRAP },
  183         { "sigabrt", RCTL_ACTION_SIGABRT },
  184         { "sigemt", RCTL_ACTION_SIGEMT },
  185         { "sigfpe", RCTL_ACTION_SIGFPE },
  186         { "sigkill", RCTL_ACTION_SIGKILL },
  187         { "sigbus", RCTL_ACTION_SIGBUS },
  188         { "sigsegv", RCTL_ACTION_SIGSEGV },
  189         { "sigsys", RCTL_ACTION_SIGSYS },
  190         { "sigpipe", RCTL_ACTION_SIGPIPE },
  191         { "sigalrm", RCTL_ACTION_SIGALRM },
  192         { "sigterm", RCTL_ACTION_SIGTERM },
  193         { "sigurg", RCTL_ACTION_SIGURG },
  194         { "sigstop", RCTL_ACTION_SIGSTOP },
  195         { "sigtstp", RCTL_ACTION_SIGTSTP },
  196         { "sigchld", RCTL_ACTION_SIGCHLD },
  197         { "sigttin", RCTL_ACTION_SIGTTIN },
  198         { "sigttou", RCTL_ACTION_SIGTTOU },
  199         { "sigio", RCTL_ACTION_SIGIO },
  200         { "sigxcpu", RCTL_ACTION_SIGXCPU },
  201         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
  202         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
  203         { "sigprof", RCTL_ACTION_SIGPROF },
  204         { "sigwinch", RCTL_ACTION_SIGWINCH },
  205         { "siginfo", RCTL_ACTION_SIGINFO },
  206         { "sigusr1", RCTL_ACTION_SIGUSR1 },
  207         { "sigusr2", RCTL_ACTION_SIGUSR2 },
  208         { "sigthr", RCTL_ACTION_SIGTHR },
  209         { "deny", RCTL_ACTION_DENY },
  210         { "log", RCTL_ACTION_LOG },
  211         { "devctl", RCTL_ACTION_DEVCTL },
  212         { "throttle", RCTL_ACTION_THROTTLE },
  213         { NULL, -1 }};
  214 
  215 static void rctl_init(void);
  216 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
  217 
  218 static uma_zone_t rctl_rule_zone;
  219 static uma_zone_t rctl_rule_link_zone;
  220 
  221 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
  222 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
  223 
  224 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
  225 
  226 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
  227 {
  228         int error, val = rctl_throttle_min;
  229 
  230         error = sysctl_handle_int(oidp, &val, 0, req);
  231         if (error || !req->newptr)
  232                 return (error);
  233         if (val < 1 || val > rctl_throttle_max)
  234                 return (EINVAL);
  235 
  236         RACCT_LOCK();
  237         rctl_throttle_min = val;
  238         RACCT_UNLOCK();
  239 
  240         return (0);
  241 }
  242 
  243 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
  244 {
  245         int error, val = rctl_throttle_max;
  246 
  247         error = sysctl_handle_int(oidp, &val, 0, req);
  248         if (error || !req->newptr)
  249                 return (error);
  250         if (val < rctl_throttle_min)
  251                 return (EINVAL);
  252 
  253         RACCT_LOCK();
  254         rctl_throttle_max = val;
  255         RACCT_UNLOCK();
  256 
  257         return (0);
  258 }
  259 
  260 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
  261 {
  262         int error, val = rctl_throttle_pct;
  263 
  264         error = sysctl_handle_int(oidp, &val, 0, req);
  265         if (error || !req->newptr)
  266                 return (error);
  267         if (val < 0)
  268                 return (EINVAL);
  269 
  270         RACCT_LOCK();
  271         rctl_throttle_pct = val;
  272         RACCT_UNLOCK();
  273 
  274         return (0);
  275 }
  276 
  277 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
  278 {
  279         int error, val = rctl_throttle_pct2;
  280 
  281         error = sysctl_handle_int(oidp, &val, 0, req);
  282         if (error || !req->newptr)
  283                 return (error);
  284         if (val < 0)
  285                 return (EINVAL);
  286 
  287         RACCT_LOCK();
  288         rctl_throttle_pct2 = val;
  289         RACCT_UNLOCK();
  290 
  291         return (0);
  292 }
  293 
  294 static const char *
  295 rctl_subject_type_name(int subject)
  296 {
  297         int i;
  298 
  299         for (i = 0; subjectnames[i].d_name != NULL; i++) {
  300                 if (subjectnames[i].d_value == subject)
  301                         return (subjectnames[i].d_name);
  302         }
  303 
  304         panic("rctl_subject_type_name: unknown subject type %d", subject);
  305 }
  306 
  307 static const char *
  308 rctl_action_name(int action)
  309 {
  310         int i;
  311 
  312         for (i = 0; actionnames[i].d_name != NULL; i++) {
  313                 if (actionnames[i].d_value == action)
  314                         return (actionnames[i].d_name);
  315         }
  316 
  317         panic("rctl_action_name: unknown action %d", action);
  318 }
  319 
  320 const char *
  321 rctl_resource_name(int resource)
  322 {
  323         int i;
  324 
  325         for (i = 0; resourcenames[i].d_name != NULL; i++) {
  326                 if (resourcenames[i].d_value == resource)
  327                         return (resourcenames[i].d_name);
  328         }
  329 
  330         panic("rctl_resource_name: unknown resource %d", resource);
  331 }
  332 
  333 static struct racct *
  334 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
  335 {
  336         struct ucred *cred = p->p_ucred;
  337 
  338         ASSERT_RACCT_ENABLED();
  339         RACCT_LOCK_ASSERT();
  340 
  341         switch (rule->rr_per) {
  342         case RCTL_SUBJECT_TYPE_PROCESS:
  343                 return (p->p_racct);
  344         case RCTL_SUBJECT_TYPE_USER:
  345                 return (cred->cr_ruidinfo->ui_racct);
  346         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  347                 return (cred->cr_loginclass->lc_racct);
  348         case RCTL_SUBJECT_TYPE_JAIL:
  349                 return (cred->cr_prison->pr_prison_racct->prr_racct);
  350         default:
  351                 panic("%s: unknown per %d", __func__, rule->rr_per);
  352         }
  353 }
  354 
  355 /*
  356  * Return the amount of resource that can be allocated by 'p' before
  357  * hitting 'rule'.
  358  */
  359 static int64_t
  360 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
  361 {
  362         const struct racct *racct;
  363         int64_t available;
  364 
  365         ASSERT_RACCT_ENABLED();
  366         RACCT_LOCK_ASSERT();
  367 
  368         racct = rctl_proc_rule_to_racct(p, rule);
  369         available = rule->rr_amount - racct->r_resources[rule->rr_resource];
  370 
  371         return (available);
  372 }
  373 
  374 /*
  375  * Called every second for proc, uidinfo, loginclass, and jail containers.
  376  * If the limit isn't exceeded, it decreases the usage amount to zero.
  377  * Otherwise, it decreases it by the value of the limit.  This way
  378  * resource consumption exceeding the limit "carries over" to the next
  379  * period.
  380  */
  381 void
  382 rctl_throttle_decay(struct racct *racct, int resource)
  383 {
  384         struct rctl_rule *rule;
  385         struct rctl_rule_link *link;
  386         int64_t minavailable;
  387 
  388         ASSERT_RACCT_ENABLED();
  389         RACCT_LOCK_ASSERT();
  390 
  391         minavailable = INT64_MAX;
  392 
  393         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
  394                 rule = link->rrl_rule;
  395 
  396                 if (rule->rr_resource != resource)
  397                         continue;
  398                 if (rule->rr_action != RCTL_ACTION_THROTTLE)
  399                         continue;
  400 
  401                 if (rule->rr_amount < minavailable)
  402                         minavailable = rule->rr_amount;
  403         }
  404 
  405         if (racct->r_resources[resource] < minavailable) {
  406                 racct->r_resources[resource] = 0;
  407         } else {
  408                 /*
  409                  * Cap utilization counter at ten times the limit.  Otherwise,
  410                  * if we changed the rule lowering the allowed amount, it could
  411                  * take unreasonably long time for the accumulated resource
  412                  * usage to drop.
  413                  */
  414                 if (racct->r_resources[resource] > minavailable * 10)
  415                         racct->r_resources[resource] = minavailable * 10;
  416 
  417                 racct->r_resources[resource] -= minavailable;
  418         }
  419 }
  420 
  421 /*
  422  * Special version of rctl_get_available() for the %CPU resource.
  423  * We slightly cheat here and return less than we normally would.
  424  */
  425 int64_t
  426 rctl_pcpu_available(const struct proc *p) {
  427         struct rctl_rule *rule;
  428         struct rctl_rule_link *link;
  429         int64_t available, minavailable, limit;
  430 
  431         ASSERT_RACCT_ENABLED();
  432         RACCT_LOCK_ASSERT();
  433 
  434         minavailable = INT64_MAX;
  435         limit = 0;
  436 
  437         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  438                 rule = link->rrl_rule;
  439                 if (rule->rr_resource != RACCT_PCTCPU)
  440                         continue;
  441                 if (rule->rr_action != RCTL_ACTION_DENY)
  442                         continue;
  443                 available = rctl_available_resource(p, rule);
  444                 if (available < minavailable) {
  445                         minavailable = available;
  446                         limit = rule->rr_amount;
  447                 }
  448         }
  449 
  450         /*
  451          * Return slightly less than actual value of the available
  452          * %cpu resource.  This makes %cpu throttling more aggressive
  453          * and lets us act sooner than the limits are already exceeded.
  454          */
  455         if (limit != 0) {
  456                 if (limit > 2 * RCTL_PCPU_SHIFT)
  457                         minavailable -= RCTL_PCPU_SHIFT;
  458                 else
  459                         minavailable -= (limit / 2);
  460         }
  461 
  462         return (minavailable);
  463 }
  464 
  465 static uint64_t
  466 xadd(uint64_t a, uint64_t b)
  467 {
  468         uint64_t c;
  469 
  470         c = a + b;
  471 
  472         /*
  473          * Detect overflow.
  474          */
  475         if (c < a || c < b)
  476                 return (UINT64_MAX);
  477 
  478         return (c);
  479 }
  480 
  481 static uint64_t
  482 xmul(uint64_t a, uint64_t b)
  483 {
  484 
  485         if (b != 0 && a > UINT64_MAX / b)
  486                 return (UINT64_MAX);
  487 
  488         return (a * b);
  489 }
  490 
  491 /*
  492  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
  493  * to what it keeps allocated now.  Returns non-zero if the allocation should
  494  * be denied, 0 otherwise.
  495  */
  496 int
  497 rctl_enforce(struct proc *p, int resource, uint64_t amount)
  498 {
  499         static struct timeval log_lasttime, devctl_lasttime;
  500         static int log_curtime = 0, devctl_curtime = 0;
  501         struct rctl_rule *rule;
  502         struct rctl_rule_link *link;
  503         struct sbuf sb;
  504         char *buf;
  505         int64_t available;
  506         uint64_t sleep_ms, sleep_ratio;
  507         int should_deny = 0;
  508 
  509         ASSERT_RACCT_ENABLED();
  510         RACCT_LOCK_ASSERT();
  511 
  512         /*
  513          * There may be more than one matching rule; go through all of them.
  514          * Denial should be done last, after logging and sending signals.
  515          */
  516         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  517                 rule = link->rrl_rule;
  518                 if (rule->rr_resource != resource)
  519                         continue;
  520 
  521                 available = rctl_available_resource(p, rule);
  522                 if (available >= (int64_t)amount) {
  523                         link->rrl_exceeded = 0;
  524                         continue;
  525                 }
  526 
  527                 switch (rule->rr_action) {
  528                 case RCTL_ACTION_DENY:
  529                         should_deny = 1;
  530                         continue;
  531                 case RCTL_ACTION_LOG:
  532                         /*
  533                          * If rrl_exceeded != 0, it means we've already
  534                          * logged a warning for this process.
  535                          */
  536                         if (link->rrl_exceeded != 0)
  537                                 continue;
  538 
  539                         /*
  540                          * If the process state is not fully initialized yet,
  541                          * we can't access most of the required fields, e.g.
  542                          * p->p_comm.  This happens when called from fork1().
  543                          * Ignore this rule for now; it will be processed just
  544                          * after fork, when called from racct_proc_fork_done().
  545                          */
  546                         if (p->p_state != PRS_NORMAL)
  547                                 continue;
  548 
  549                         if (!ppsratecheck(&log_lasttime, &log_curtime,
  550                             rctl_log_rate_limit))
  551                                 continue;
  552 
  553                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  554                         if (buf == NULL) {
  555                                 printf("rctl_enforce: out of memory\n");
  556                                 continue;
  557                         }
  558                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  559                         rctl_rule_to_sbuf(&sb, rule);
  560                         sbuf_finish(&sb);
  561                         printf("rctl: rule \"%s\" matched by pid %d "
  562                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
  563                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
  564                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  565                         sbuf_delete(&sb);
  566                         free(buf, M_RCTL);
  567                         link->rrl_exceeded = 1;
  568                         continue;
  569                 case RCTL_ACTION_DEVCTL:
  570                         if (link->rrl_exceeded != 0)
  571                                 continue;
  572 
  573                         if (p->p_state != PRS_NORMAL)
  574                                 continue;
  575 
  576                         if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
  577                             rctl_devctl_rate_limit))
  578                                 continue;
  579 
  580                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  581                         if (buf == NULL) {
  582                                 printf("rctl_enforce: out of memory\n");
  583                                 continue;
  584                         }
  585                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  586                         sbuf_printf(&sb, "rule=");
  587                         rctl_rule_to_sbuf(&sb, rule);
  588                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
  589                             p->p_pid, p->p_ucred->cr_ruid,
  590                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  591                         sbuf_finish(&sb);
  592                         devctl_notify("RCTL", "rule", "matched",
  593                             sbuf_data(&sb));
  594                         sbuf_delete(&sb);
  595                         free(buf, M_RCTL);
  596                         link->rrl_exceeded = 1;
  597                         continue;
  598                 case RCTL_ACTION_THROTTLE:
  599                         if (p->p_state != PRS_NORMAL)
  600                                 continue;
  601 
  602                         if (rule->rr_amount == 0) {
  603                                 racct_proc_throttle(p, rctl_throttle_max);
  604                                 continue;
  605                         }
  606 
  607                         /*
  608                          * Make the process sleep for a fraction of second
  609                          * proportional to the ratio of process' resource
  610                          * utilization compared to the limit.  The point is
  611                          * to penalize resource hogs: processes that consume
  612                          * more of the available resources sleep for longer.
  613                          *
  614                          * We're trying to defer division until the very end,
  615                          * to minimize the rounding effects.  The following
  616                          * calculation could have been written in a clearer
  617                          * way like this:
  618                          *
  619                          * sleep_ms = hz * p->p_racct->r_resources[resource] /
  620                          *     rule->rr_amount;
  621                          * sleep_ms *= rctl_throttle_pct / 100;
  622                          * if (sleep_ms < rctl_throttle_min)
  623                          *         sleep_ms = rctl_throttle_min;
  624                          *
  625                          */
  626                         sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
  627                         sleep_ms = xmul(sleep_ms,  rctl_throttle_pct) / 100;
  628                         if (sleep_ms < rctl_throttle_min * rule->rr_amount)
  629                                 sleep_ms = rctl_throttle_min * rule->rr_amount;
  630 
  631                         /*
  632                          * Multiply that by the ratio of the resource
  633                          * consumption for the container compared to the limit,
  634                          * squared.  In other words, a process in a container
  635                          * that is two times over the limit will be throttled
  636                          * four times as much for hitting the same rule.  The
  637                          * point is to penalize processes more if the container
  638                          * itself (eg certain UID or jail) is above the limit.
  639                          */
  640                         if (available < 0)
  641                                 sleep_ratio = -available / rule->rr_amount;
  642                         else
  643                                 sleep_ratio = 0;
  644                         sleep_ratio = xmul(sleep_ratio, sleep_ratio);
  645                         sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
  646                         sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
  647 
  648                         /*
  649                          * Finally the division.
  650                          */
  651                         sleep_ms /= rule->rr_amount;
  652 
  653                         if (sleep_ms > rctl_throttle_max)
  654                                 sleep_ms = rctl_throttle_max;
  655 #if 0
  656                         printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
  657                            __func__, p->p_pid, p->p_comm,
  658                            p->p_racct->r_resources[resource],
  659                            rule->rr_amount, (uintmax_t)sleep_ms,
  660                            (uintmax_t)sleep_ratio, (intmax_t)available);
  661 #endif
  662 
  663                         KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
  664                             __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
  665                         racct_proc_throttle(p, sleep_ms);
  666                         continue;
  667                 default:
  668                         if (link->rrl_exceeded != 0)
  669                                 continue;
  670 
  671                         if (p->p_state != PRS_NORMAL)
  672                                 continue;
  673 
  674                         KASSERT(rule->rr_action > 0 &&
  675                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
  676                             ("rctl_enforce: unknown action %d",
  677                              rule->rr_action));
  678 
  679                         /*
  680                          * We're using the fact that RCTL_ACTION_SIG* values
  681                          * are equal to their counterparts from sys/signal.h.
  682                          */
  683                         kern_psignal(p, rule->rr_action);
  684                         link->rrl_exceeded = 1;
  685                         continue;
  686                 }
  687         }
  688 
  689         if (should_deny) {
  690                 /*
  691                  * Return fake error code; the caller should change it
  692                  * into one proper for the situation - EFSIZ, ENOMEM etc.
  693                  */
  694                 return (EDOOFUS);
  695         }
  696 
  697         return (0);
  698 }
  699 
  700 uint64_t
  701 rctl_get_limit(struct proc *p, int resource)
  702 {
  703         struct rctl_rule *rule;
  704         struct rctl_rule_link *link;
  705         uint64_t amount = UINT64_MAX;
  706 
  707         ASSERT_RACCT_ENABLED();
  708         RACCT_LOCK_ASSERT();
  709 
  710         /*
  711          * There may be more than one matching rule; go through all of them.
  712          * Denial should be done last, after logging and sending signals.
  713          */
  714         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  715                 rule = link->rrl_rule;
  716                 if (rule->rr_resource != resource)
  717                         continue;
  718                 if (rule->rr_action != RCTL_ACTION_DENY)
  719                         continue;
  720                 if (rule->rr_amount < amount)
  721                         amount = rule->rr_amount;
  722         }
  723 
  724         return (amount);
  725 }
  726 
  727 uint64_t
  728 rctl_get_available(struct proc *p, int resource)
  729 {
  730         struct rctl_rule *rule;
  731         struct rctl_rule_link *link;
  732         int64_t available, minavailable, allocated;
  733 
  734         minavailable = INT64_MAX;
  735 
  736         ASSERT_RACCT_ENABLED();
  737         RACCT_LOCK_ASSERT();
  738 
  739         /*
  740          * There may be more than one matching rule; go through all of them.
  741          * Denial should be done last, after logging and sending signals.
  742          */
  743         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  744                 rule = link->rrl_rule;
  745                 if (rule->rr_resource != resource)
  746                         continue;
  747                 if (rule->rr_action != RCTL_ACTION_DENY)
  748                         continue;
  749                 available = rctl_available_resource(p, rule);
  750                 if (available < minavailable)
  751                         minavailable = available;
  752         }
  753 
  754         /*
  755          * XXX: Think about this _hard_.
  756          */
  757         allocated = p->p_racct->r_resources[resource];
  758         if (minavailable < INT64_MAX - allocated)
  759                 minavailable += allocated;
  760         if (minavailable < 0)
  761                 minavailable = 0;
  762 
  763         return (minavailable);
  764 }
  765 
  766 static int
  767 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
  768 {
  769 
  770         ASSERT_RACCT_ENABLED();
  771 
  772         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
  773                 if (rule->rr_subject_type != filter->rr_subject_type)
  774                         return (0);
  775 
  776                 switch (filter->rr_subject_type) {
  777                 case RCTL_SUBJECT_TYPE_PROCESS:
  778                         if (filter->rr_subject.rs_proc != NULL &&
  779                             rule->rr_subject.rs_proc !=
  780                             filter->rr_subject.rs_proc)
  781                                 return (0);
  782                         break;
  783                 case RCTL_SUBJECT_TYPE_USER:
  784                         if (filter->rr_subject.rs_uip != NULL &&
  785                             rule->rr_subject.rs_uip !=
  786                             filter->rr_subject.rs_uip)
  787                                 return (0);
  788                         break;
  789                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
  790                         if (filter->rr_subject.rs_loginclass != NULL &&
  791                             rule->rr_subject.rs_loginclass !=
  792                             filter->rr_subject.rs_loginclass)
  793                                 return (0);
  794                         break;
  795                 case RCTL_SUBJECT_TYPE_JAIL:
  796                         if (filter->rr_subject.rs_prison_racct != NULL &&
  797                             rule->rr_subject.rs_prison_racct !=
  798                             filter->rr_subject.rs_prison_racct)
  799                                 return (0);
  800                         break;
  801                 default:
  802                         panic("rctl_rule_matches: unknown subject type %d",
  803                             filter->rr_subject_type);
  804                 }
  805         }
  806 
  807         if (filter->rr_resource != RACCT_UNDEFINED) {
  808                 if (rule->rr_resource != filter->rr_resource)
  809                         return (0);
  810         }
  811 
  812         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
  813                 if (rule->rr_action != filter->rr_action)
  814                         return (0);
  815         }
  816 
  817         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
  818                 if (rule->rr_amount != filter->rr_amount)
  819                         return (0);
  820         }
  821 
  822         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
  823                 if (rule->rr_per != filter->rr_per)
  824                         return (0);
  825         }
  826 
  827         return (1);
  828 }
  829 
  830 static int
  831 str2value(const char *str, int *value, struct dict *table)
  832 {
  833         int i;
  834 
  835         if (value == NULL)
  836                 return (EINVAL);
  837 
  838         for (i = 0; table[i].d_name != NULL; i++) {
  839                 if (strcasecmp(table[i].d_name, str) == 0) {
  840                         *value =  table[i].d_value;
  841                         return (0);
  842                 }
  843         }
  844 
  845         return (EINVAL);
  846 }
  847 
  848 static int
  849 str2id(const char *str, id_t *value)
  850 {
  851         char *end;
  852 
  853         if (str == NULL)
  854                 return (EINVAL);
  855 
  856         *value = strtoul(str, &end, 10);
  857         if ((size_t)(end - str) != strlen(str))
  858                 return (EINVAL);
  859 
  860         return (0);
  861 }
  862 
  863 static int
  864 str2int64(const char *str, int64_t *value)
  865 {
  866         char *end;
  867 
  868         if (str == NULL)
  869                 return (EINVAL);
  870 
  871         *value = strtoul(str, &end, 10);
  872         if ((size_t)(end - str) != strlen(str))
  873                 return (EINVAL);
  874 
  875         if (*value < 0)
  876                 return (ERANGE);
  877 
  878         return (0);
  879 }
  880 
  881 /*
  882  * Connect the rule to the racct, increasing refcount for the rule.
  883  */
  884 static void
  885 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
  886 {
  887         struct rctl_rule_link *link;
  888 
  889         ASSERT_RACCT_ENABLED();
  890         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  891 
  892         rctl_rule_acquire(rule);
  893         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
  894         link->rrl_rule = rule;
  895         link->rrl_exceeded = 0;
  896 
  897         RACCT_LOCK();
  898         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  899         RACCT_UNLOCK();
  900 }
  901 
  902 static int
  903 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
  904 {
  905         struct rctl_rule_link *link;
  906 
  907         ASSERT_RACCT_ENABLED();
  908         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  909         RACCT_LOCK_ASSERT();
  910 
  911         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
  912         if (link == NULL)
  913                 return (ENOMEM);
  914         rctl_rule_acquire(rule);
  915         link->rrl_rule = rule;
  916         link->rrl_exceeded = 0;
  917 
  918         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  919 
  920         return (0);
  921 }
  922 
  923 /*
  924  * Remove limits for a rules matching the filter and release
  925  * the refcounts for the rules, possibly freeing them.  Returns
  926  * the number of limit structures removed.
  927  */
  928 static int
  929 rctl_racct_remove_rules(struct racct *racct,
  930     const struct rctl_rule *filter)
  931 {
  932         struct rctl_rule_link *link, *linktmp;
  933         int removed = 0;
  934 
  935         ASSERT_RACCT_ENABLED();
  936         RACCT_LOCK_ASSERT();
  937 
  938         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
  939                 if (!rctl_rule_matches(link->rrl_rule, filter))
  940                         continue;
  941 
  942                 LIST_REMOVE(link, rrl_next);
  943                 rctl_rule_release(link->rrl_rule);
  944                 uma_zfree(rctl_rule_link_zone, link);
  945                 removed++;
  946         }
  947         return (removed);
  948 }
  949 
  950 static void
  951 rctl_rule_acquire_subject(struct rctl_rule *rule)
  952 {
  953 
  954         ASSERT_RACCT_ENABLED();
  955 
  956         switch (rule->rr_subject_type) {
  957         case RCTL_SUBJECT_TYPE_UNDEFINED:
  958         case RCTL_SUBJECT_TYPE_PROCESS:
  959                 break;
  960         case RCTL_SUBJECT_TYPE_JAIL:
  961                 if (rule->rr_subject.rs_prison_racct != NULL)
  962                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
  963                 break;
  964         case RCTL_SUBJECT_TYPE_USER:
  965                 if (rule->rr_subject.rs_uip != NULL)
  966                         uihold(rule->rr_subject.rs_uip);
  967                 break;
  968         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  969                 if (rule->rr_subject.rs_loginclass != NULL)
  970                         loginclass_hold(rule->rr_subject.rs_loginclass);
  971                 break;
  972         default:
  973                 panic("rctl_rule_acquire_subject: unknown subject type %d",
  974                     rule->rr_subject_type);
  975         }
  976 }
  977 
  978 static void
  979 rctl_rule_release_subject(struct rctl_rule *rule)
  980 {
  981 
  982         ASSERT_RACCT_ENABLED();
  983 
  984         switch (rule->rr_subject_type) {
  985         case RCTL_SUBJECT_TYPE_UNDEFINED:
  986         case RCTL_SUBJECT_TYPE_PROCESS:
  987                 break;
  988         case RCTL_SUBJECT_TYPE_JAIL:
  989                 if (rule->rr_subject.rs_prison_racct != NULL)
  990                         prison_racct_free(rule->rr_subject.rs_prison_racct);
  991                 break;
  992         case RCTL_SUBJECT_TYPE_USER:
  993                 if (rule->rr_subject.rs_uip != NULL)
  994                         uifree(rule->rr_subject.rs_uip);
  995                 break;
  996         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  997                 if (rule->rr_subject.rs_loginclass != NULL)
  998                         loginclass_free(rule->rr_subject.rs_loginclass);
  999                 break;
 1000         default:
 1001                 panic("rctl_rule_release_subject: unknown subject type %d",
 1002                     rule->rr_subject_type);
 1003         }
 1004 }
 1005 
 1006 struct rctl_rule *
 1007 rctl_rule_alloc(int flags)
 1008 {
 1009         struct rctl_rule *rule;
 1010 
 1011         ASSERT_RACCT_ENABLED();
 1012 
 1013         rule = uma_zalloc(rctl_rule_zone, flags);
 1014         if (rule == NULL)
 1015                 return (NULL);
 1016         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1017         rule->rr_subject.rs_proc = NULL;
 1018         rule->rr_subject.rs_uip = NULL;
 1019         rule->rr_subject.rs_loginclass = NULL;
 1020         rule->rr_subject.rs_prison_racct = NULL;
 1021         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1022         rule->rr_resource = RACCT_UNDEFINED;
 1023         rule->rr_action = RCTL_ACTION_UNDEFINED;
 1024         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1025         refcount_init(&rule->rr_refcount, 1);
 1026 
 1027         return (rule);
 1028 }
 1029 
 1030 struct rctl_rule *
 1031 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
 1032 {
 1033         struct rctl_rule *copy;
 1034 
 1035         ASSERT_RACCT_ENABLED();
 1036 
 1037         copy = uma_zalloc(rctl_rule_zone, flags);
 1038         if (copy == NULL)
 1039                 return (NULL);
 1040         copy->rr_subject_type = rule->rr_subject_type;
 1041         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
 1042         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
 1043         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
 1044         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
 1045         copy->rr_per = rule->rr_per;
 1046         copy->rr_resource = rule->rr_resource;
 1047         copy->rr_action = rule->rr_action;
 1048         copy->rr_amount = rule->rr_amount;
 1049         refcount_init(&copy->rr_refcount, 1);
 1050         rctl_rule_acquire_subject(copy);
 1051 
 1052         return (copy);
 1053 }
 1054 
 1055 void
 1056 rctl_rule_acquire(struct rctl_rule *rule)
 1057 {
 1058 
 1059         ASSERT_RACCT_ENABLED();
 1060         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1061 
 1062         refcount_acquire(&rule->rr_refcount);
 1063 }
 1064 
 1065 static void
 1066 rctl_rule_free(void *context, int pending)
 1067 {
 1068         struct rctl_rule *rule;
 1069 
 1070         rule = (struct rctl_rule *)context;
 1071 
 1072         ASSERT_RACCT_ENABLED();
 1073         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
 1074 
 1075         /*
 1076          * We don't need locking here; rule is guaranteed to be inaccessible.
 1077          */
 1078 
 1079         rctl_rule_release_subject(rule);
 1080         uma_zfree(rctl_rule_zone, rule);
 1081 }
 1082 
 1083 void
 1084 rctl_rule_release(struct rctl_rule *rule)
 1085 {
 1086 
 1087         ASSERT_RACCT_ENABLED();
 1088         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1089 
 1090         if (refcount_release(&rule->rr_refcount)) {
 1091                 /*
 1092                  * rctl_rule_release() is often called when iterating
 1093                  * over all the uidinfo structures in the system,
 1094                  * holding uihashtbl_lock.  Since rctl_rule_free()
 1095                  * might end up calling uifree(), this would lead
 1096                  * to lock recursion.  Use taskqueue to avoid this.
 1097                  */
 1098                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
 1099                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
 1100         }
 1101 }
 1102 
 1103 static int
 1104 rctl_rule_fully_specified(const struct rctl_rule *rule)
 1105 {
 1106 
 1107         ASSERT_RACCT_ENABLED();
 1108 
 1109         switch (rule->rr_subject_type) {
 1110         case RCTL_SUBJECT_TYPE_UNDEFINED:
 1111                 return (0);
 1112         case RCTL_SUBJECT_TYPE_PROCESS:
 1113                 if (rule->rr_subject.rs_proc == NULL)
 1114                         return (0);
 1115                 break;
 1116         case RCTL_SUBJECT_TYPE_USER:
 1117                 if (rule->rr_subject.rs_uip == NULL)
 1118                         return (0);
 1119                 break;
 1120         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1121                 if (rule->rr_subject.rs_loginclass == NULL)
 1122                         return (0);
 1123                 break;
 1124         case RCTL_SUBJECT_TYPE_JAIL:
 1125                 if (rule->rr_subject.rs_prison_racct == NULL)
 1126                         return (0);
 1127                 break;
 1128         default:
 1129                 panic("rctl_rule_fully_specified: unknown subject type %d",
 1130                     rule->rr_subject_type);
 1131         }
 1132         if (rule->rr_resource == RACCT_UNDEFINED)
 1133                 return (0);
 1134         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
 1135                 return (0);
 1136         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
 1137                 return (0);
 1138         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
 1139                 return (0);
 1140 
 1141         return (1);
 1142 }
 1143 
 1144 static int
 1145 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
 1146 {
 1147         struct rctl_rule *rule;
 1148         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
 1149              *amountstr, *perstr;
 1150         id_t id;
 1151         int error = 0;
 1152 
 1153         ASSERT_RACCT_ENABLED();
 1154 
 1155         rule = rctl_rule_alloc(M_WAITOK);
 1156 
 1157         subjectstr = strsep(&rulestr, ":");
 1158         subject_idstr = strsep(&rulestr, ":");
 1159         resourcestr = strsep(&rulestr, ":");
 1160         actionstr = strsep(&rulestr, "=/");
 1161         amountstr = strsep(&rulestr, "/");
 1162         perstr = rulestr;
 1163 
 1164         if (subjectstr == NULL || subjectstr[0] == '\0')
 1165                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1166         else {
 1167                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
 1168                 if (error != 0)
 1169                         goto out;
 1170         }
 1171 
 1172         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
 1173                 rule->rr_subject.rs_proc = NULL;
 1174                 rule->rr_subject.rs_uip = NULL;
 1175                 rule->rr_subject.rs_loginclass = NULL;
 1176                 rule->rr_subject.rs_prison_racct = NULL;
 1177         } else {
 1178                 switch (rule->rr_subject_type) {
 1179                 case RCTL_SUBJECT_TYPE_UNDEFINED:
 1180                         error = EINVAL;
 1181                         goto out;
 1182                 case RCTL_SUBJECT_TYPE_PROCESS:
 1183                         error = str2id(subject_idstr, &id);
 1184                         if (error != 0)
 1185                                 goto out;
 1186                         sx_assert(&allproc_lock, SA_LOCKED);
 1187                         rule->rr_subject.rs_proc = pfind(id);
 1188                         if (rule->rr_subject.rs_proc == NULL) {
 1189                                 error = ESRCH;
 1190                                 goto out;
 1191                         }
 1192                         PROC_UNLOCK(rule->rr_subject.rs_proc);
 1193                         break;
 1194                 case RCTL_SUBJECT_TYPE_USER:
 1195                         error = str2id(subject_idstr, &id);
 1196                         if (error != 0)
 1197                                 goto out;
 1198                         rule->rr_subject.rs_uip = uifind(id);
 1199                         break;
 1200                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1201                         rule->rr_subject.rs_loginclass =
 1202                             loginclass_find(subject_idstr);
 1203                         if (rule->rr_subject.rs_loginclass == NULL) {
 1204                                 error = ENAMETOOLONG;
 1205                                 goto out;
 1206                         }
 1207                         break;
 1208                 case RCTL_SUBJECT_TYPE_JAIL:
 1209                         rule->rr_subject.rs_prison_racct =
 1210                             prison_racct_find(subject_idstr);
 1211                         if (rule->rr_subject.rs_prison_racct == NULL) {
 1212                                 error = ENAMETOOLONG;
 1213                                 goto out;
 1214                         }
 1215                         break;
 1216                default:
 1217                        panic("rctl_string_to_rule: unknown subject type %d",
 1218                            rule->rr_subject_type);
 1219                }
 1220         }
 1221 
 1222         if (resourcestr == NULL || resourcestr[0] == '\0')
 1223                 rule->rr_resource = RACCT_UNDEFINED;
 1224         else {
 1225                 error = str2value(resourcestr, &rule->rr_resource,
 1226                     resourcenames);
 1227                 if (error != 0)
 1228                         goto out;
 1229         }
 1230 
 1231         if (actionstr == NULL || actionstr[0] == '\0')
 1232                 rule->rr_action = RCTL_ACTION_UNDEFINED;
 1233         else {
 1234                 error = str2value(actionstr, &rule->rr_action, actionnames);
 1235                 if (error != 0)
 1236                         goto out;
 1237         }
 1238 
 1239         if (amountstr == NULL || amountstr[0] == '\0')
 1240                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1241         else {
 1242                 error = str2int64(amountstr, &rule->rr_amount);
 1243                 if (error != 0)
 1244                         goto out;
 1245                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
 1246                         if (rule->rr_amount > INT64_MAX / 1000000) {
 1247                                 error = ERANGE;
 1248                                 goto out;
 1249                         }
 1250                         rule->rr_amount *= 1000000;
 1251                 }
 1252         }
 1253 
 1254         if (perstr == NULL || perstr[0] == '\0')
 1255                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1256         else {
 1257                 error = str2value(perstr, &rule->rr_per, subjectnames);
 1258                 if (error != 0)
 1259                         goto out;
 1260         }
 1261 
 1262 out:
 1263         if (error == 0)
 1264                 *rulep = rule;
 1265         else
 1266                 rctl_rule_release(rule);
 1267 
 1268         return (error);
 1269 }
 1270 
 1271 /*
 1272  * Link a rule with all the subjects it applies to.
 1273  */
 1274 int
 1275 rctl_rule_add(struct rctl_rule *rule)
 1276 {
 1277         struct proc *p;
 1278         struct ucred *cred;
 1279         struct uidinfo *uip;
 1280         struct prison *pr;
 1281         struct prison_racct *prr;
 1282         struct loginclass *lc;
 1283         struct rctl_rule *rule2;
 1284         int match;
 1285 
 1286         ASSERT_RACCT_ENABLED();
 1287         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 1288 
 1289         /*
 1290          * Some rules just don't make sense, like "deny" rule for an undeniable
 1291          * resource.  The exception are the RSS and %CPU resources - they are
 1292          * not deniable in the racct sense, but the limit is enforced in
 1293          * a different way.
 1294          */
 1295         if (rule->rr_action == RCTL_ACTION_DENY &&
 1296             !RACCT_IS_DENIABLE(rule->rr_resource) &&
 1297             rule->rr_resource != RACCT_RSS &&
 1298             rule->rr_resource != RACCT_PCTCPU) {
 1299                 return (EOPNOTSUPP);
 1300         }
 1301 
 1302         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1303             !RACCT_IS_DECAYING(rule->rr_resource)) {
 1304                 return (EOPNOTSUPP);
 1305         }
 1306 
 1307         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1308             rule->rr_resource == RACCT_PCTCPU) {
 1309                 return (EOPNOTSUPP);
 1310         }
 1311 
 1312         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
 1313             RACCT_IS_SLOPPY(rule->rr_resource)) {
 1314                 return (EOPNOTSUPP);
 1315         }
 1316 
 1317         /*
 1318          * Make sure there are no duplicated rules.  Also, for the "deny"
 1319          * rules, remove ones differing only by "amount".
 1320          */
 1321         if (rule->rr_action == RCTL_ACTION_DENY) {
 1322                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
 1323                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1324                 rctl_rule_remove(rule2);
 1325                 rctl_rule_release(rule2);
 1326         } else
 1327                 rctl_rule_remove(rule);
 1328 
 1329         switch (rule->rr_subject_type) {
 1330         case RCTL_SUBJECT_TYPE_PROCESS:
 1331                 p = rule->rr_subject.rs_proc;
 1332                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
 1333 
 1334                 rctl_racct_add_rule(p->p_racct, rule);
 1335                 /*
 1336                  * In case of per-process rule, we don't have anything more
 1337                  * to do.
 1338                  */
 1339                 return (0);
 1340 
 1341         case RCTL_SUBJECT_TYPE_USER:
 1342                 uip = rule->rr_subject.rs_uip;
 1343                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
 1344                 rctl_racct_add_rule(uip->ui_racct, rule);
 1345                 break;
 1346 
 1347         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1348                 lc = rule->rr_subject.rs_loginclass;
 1349                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
 1350                 rctl_racct_add_rule(lc->lc_racct, rule);
 1351                 break;
 1352 
 1353         case RCTL_SUBJECT_TYPE_JAIL:
 1354                 prr = rule->rr_subject.rs_prison_racct;
 1355                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
 1356                 rctl_racct_add_rule(prr->prr_racct, rule);
 1357                 break;
 1358 
 1359         default:
 1360                 panic("rctl_rule_add: unknown subject type %d",
 1361                     rule->rr_subject_type);
 1362         }
 1363 
 1364         /*
 1365          * Now go through all the processes and add the new rule to the ones
 1366          * it applies to.
 1367          */
 1368         sx_assert(&allproc_lock, SA_LOCKED);
 1369         FOREACH_PROC_IN_SYSTEM(p) {
 1370                 cred = p->p_ucred;
 1371                 switch (rule->rr_subject_type) {
 1372                 case RCTL_SUBJECT_TYPE_USER:
 1373                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
 1374                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
 1375                                 break;
 1376                         continue;
 1377                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1378                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
 1379                                 break;
 1380                         continue;
 1381                 case RCTL_SUBJECT_TYPE_JAIL:
 1382                         match = 0;
 1383                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
 1384                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
 1385                                         match = 1;
 1386                                         break;
 1387                                 }
 1388                         }
 1389                         if (match)
 1390                                 break;
 1391                         continue;
 1392                 default:
 1393                         panic("rctl_rule_add: unknown subject type %d",
 1394                             rule->rr_subject_type);
 1395                 }
 1396 
 1397                 rctl_racct_add_rule(p->p_racct, rule);
 1398         }
 1399 
 1400         return (0);
 1401 }
 1402 
 1403 static void
 1404 rctl_rule_pre_callback(void)
 1405 {
 1406 
 1407         RACCT_LOCK();
 1408 }
 1409 
 1410 static void
 1411 rctl_rule_post_callback(void)
 1412 {
 1413 
 1414         RACCT_UNLOCK();
 1415 }
 1416 
 1417 static void
 1418 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
 1419 {
 1420         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1421         int found = 0;
 1422 
 1423         ASSERT_RACCT_ENABLED();
 1424         RACCT_LOCK_ASSERT();
 1425 
 1426         found += rctl_racct_remove_rules(racct, filter);
 1427 
 1428         *((int *)arg3) += found;
 1429 }
 1430 
 1431 /*
 1432  * Remove all rules that match the filter.
 1433  */
 1434 int
 1435 rctl_rule_remove(struct rctl_rule *filter)
 1436 {
 1437         struct proc *p;
 1438         int found = 0;
 1439 
 1440         ASSERT_RACCT_ENABLED();
 1441 
 1442         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
 1443             filter->rr_subject.rs_proc != NULL) {
 1444                 p = filter->rr_subject.rs_proc;
 1445                 RACCT_LOCK();
 1446                 found = rctl_racct_remove_rules(p->p_racct, filter);
 1447                 RACCT_UNLOCK();
 1448                 if (found)
 1449                         return (0);
 1450                 return (ESRCH);
 1451         }
 1452 
 1453         loginclass_racct_foreach(rctl_rule_remove_callback,
 1454             rctl_rule_pre_callback, rctl_rule_post_callback,
 1455             filter, (void *)&found);
 1456         ui_racct_foreach(rctl_rule_remove_callback,
 1457             rctl_rule_pre_callback, rctl_rule_post_callback,
 1458             filter, (void *)&found);
 1459         prison_racct_foreach(rctl_rule_remove_callback,
 1460             rctl_rule_pre_callback, rctl_rule_post_callback,
 1461             filter, (void *)&found);
 1462 
 1463         sx_assert(&allproc_lock, SA_LOCKED);
 1464         RACCT_LOCK();
 1465         FOREACH_PROC_IN_SYSTEM(p) {
 1466                 found += rctl_racct_remove_rules(p->p_racct, filter);
 1467         }
 1468         RACCT_UNLOCK();
 1469 
 1470         if (found)
 1471                 return (0);
 1472         return (ESRCH);
 1473 }
 1474 
 1475 /*
 1476  * Appends a rule to the sbuf.
 1477  */
 1478 static void
 1479 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
 1480 {
 1481         int64_t amount;
 1482 
 1483         ASSERT_RACCT_ENABLED();
 1484 
 1485         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
 1486 
 1487         switch (rule->rr_subject_type) {
 1488         case RCTL_SUBJECT_TYPE_PROCESS:
 1489                 if (rule->rr_subject.rs_proc == NULL)
 1490                         sbuf_printf(sb, ":");
 1491                 else
 1492                         sbuf_printf(sb, "%d:",
 1493                             rule->rr_subject.rs_proc->p_pid);
 1494                 break;
 1495         case RCTL_SUBJECT_TYPE_USER:
 1496                 if (rule->rr_subject.rs_uip == NULL)
 1497                         sbuf_printf(sb, ":");
 1498                 else
 1499                         sbuf_printf(sb, "%d:",
 1500                             rule->rr_subject.rs_uip->ui_uid);
 1501                 break;
 1502         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1503                 if (rule->rr_subject.rs_loginclass == NULL)
 1504                         sbuf_printf(sb, ":");
 1505                 else
 1506                         sbuf_printf(sb, "%s:",
 1507                             rule->rr_subject.rs_loginclass->lc_name);
 1508                 break;
 1509         case RCTL_SUBJECT_TYPE_JAIL:
 1510                 if (rule->rr_subject.rs_prison_racct == NULL)
 1511                         sbuf_printf(sb, ":");
 1512                 else
 1513                         sbuf_printf(sb, "%s:",
 1514                             rule->rr_subject.rs_prison_racct->prr_name);
 1515                 break;
 1516         default:
 1517                 panic("rctl_rule_to_sbuf: unknown subject type %d",
 1518                     rule->rr_subject_type);
 1519         }
 1520 
 1521         amount = rule->rr_amount;
 1522         if (amount != RCTL_AMOUNT_UNDEFINED &&
 1523             RACCT_IS_IN_MILLIONS(rule->rr_resource))
 1524                 amount /= 1000000;
 1525 
 1526         sbuf_printf(sb, "%s:%s=%jd",
 1527             rctl_resource_name(rule->rr_resource),
 1528             rctl_action_name(rule->rr_action),
 1529             amount);
 1530 
 1531         if (rule->rr_per != rule->rr_subject_type)
 1532                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
 1533 }
 1534 
 1535 /*
 1536  * Routine used by RCTL syscalls to read in input string.
 1537  */
 1538 static int
 1539 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
 1540 {
 1541         char *str;
 1542         int error;
 1543 
 1544         ASSERT_RACCT_ENABLED();
 1545 
 1546         if (inbuflen <= 0)
 1547                 return (EINVAL);
 1548         if (inbuflen > RCTL_MAX_INBUFSIZE)
 1549                 return (E2BIG);
 1550 
 1551         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
 1552         error = copyinstr(inbufp, str, inbuflen, NULL);
 1553         if (error != 0) {
 1554                 free(str, M_RCTL);
 1555                 return (error);
 1556         }
 1557 
 1558         *inputstr = str;
 1559 
 1560         return (0);
 1561 }
 1562 
 1563 /*
 1564  * Routine used by RCTL syscalls to write out output string.
 1565  */
 1566 static int
 1567 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
 1568 {
 1569         int error;
 1570 
 1571         ASSERT_RACCT_ENABLED();
 1572 
 1573         if (outputsbuf == NULL)
 1574                 return (0);
 1575 
 1576         sbuf_finish(outputsbuf);
 1577         if (outbuflen < sbuf_len(outputsbuf) + 1) {
 1578                 sbuf_delete(outputsbuf);
 1579                 return (ERANGE);
 1580         }
 1581         error = copyout(sbuf_data(outputsbuf), outbufp,
 1582             sbuf_len(outputsbuf) + 1);
 1583         sbuf_delete(outputsbuf);
 1584         return (error);
 1585 }
 1586 
 1587 static struct sbuf *
 1588 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
 1589 {
 1590         struct sbuf *sb;
 1591         int64_t amount;
 1592         int i;
 1593 
 1594         ASSERT_RACCT_ENABLED();
 1595 
 1596         sb = sbuf_new_auto();
 1597         for (i = 0; i <= RACCT_MAX; i++) {
 1598                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
 1599                         continue;
 1600                 RACCT_LOCK();
 1601                 amount = racct->r_resources[i];
 1602                 RACCT_UNLOCK();
 1603                 if (RACCT_IS_IN_MILLIONS(i))
 1604                         amount /= 1000000;
 1605                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
 1606         }
 1607         sbuf_setpos(sb, sbuf_len(sb) - 1);
 1608         return (sb);
 1609 }
 1610 
 1611 int
 1612 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 1613 {
 1614         struct rctl_rule *filter;
 1615         struct sbuf *outputsbuf = NULL;
 1616         struct proc *p;
 1617         struct uidinfo *uip;
 1618         struct loginclass *lc;
 1619         struct prison_racct *prr;
 1620         char *inputstr;
 1621         int error;
 1622 
 1623         if (!racct_enable)
 1624                 return (ENOSYS);
 1625 
 1626         error = priv_check(td, PRIV_RCTL_GET_RACCT);
 1627         if (error != 0)
 1628                 return (error);
 1629 
 1630         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1631         if (error != 0)
 1632                 return (error);
 1633 
 1634         sx_slock(&allproc_lock);
 1635         error = rctl_string_to_rule(inputstr, &filter);
 1636         free(inputstr, M_RCTL);
 1637         if (error != 0) {
 1638                 sx_sunlock(&allproc_lock);
 1639                 return (error);
 1640         }
 1641 
 1642         switch (filter->rr_subject_type) {
 1643         case RCTL_SUBJECT_TYPE_PROCESS:
 1644                 p = filter->rr_subject.rs_proc;
 1645                 if (p == NULL) {
 1646                         error = EINVAL;
 1647                         goto out;
 1648                 }
 1649                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
 1650                 break;
 1651         case RCTL_SUBJECT_TYPE_USER:
 1652                 uip = filter->rr_subject.rs_uip;
 1653                 if (uip == NULL) {
 1654                         error = EINVAL;
 1655                         goto out;
 1656                 }
 1657                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
 1658                 break;
 1659         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1660                 lc = filter->rr_subject.rs_loginclass;
 1661                 if (lc == NULL) {
 1662                         error = EINVAL;
 1663                         goto out;
 1664                 }
 1665                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
 1666                 break;
 1667         case RCTL_SUBJECT_TYPE_JAIL:
 1668                 prr = filter->rr_subject.rs_prison_racct;
 1669                 if (prr == NULL) {
 1670                         error = EINVAL;
 1671                         goto out;
 1672                 }
 1673                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
 1674                 break;
 1675         default:
 1676                 error = EINVAL;
 1677         }
 1678 out:
 1679         rctl_rule_release(filter);
 1680         sx_sunlock(&allproc_lock);
 1681         if (error != 0)
 1682                 return (error);
 1683 
 1684         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
 1685 
 1686         return (error);
 1687 }
 1688 
 1689 static void
 1690 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
 1691 {
 1692         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1693         struct rctl_rule_link *link;
 1694         struct sbuf *sb = (struct sbuf *)arg3;
 1695 
 1696         ASSERT_RACCT_ENABLED();
 1697         RACCT_LOCK_ASSERT();
 1698 
 1699         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
 1700                 if (!rctl_rule_matches(link->rrl_rule, filter))
 1701                         continue;
 1702                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1703                 sbuf_printf(sb, ",");
 1704         }
 1705 }
 1706 
 1707 int
 1708 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 1709 {
 1710         struct sbuf *sb;
 1711         struct rctl_rule *filter;
 1712         struct rctl_rule_link *link;
 1713         struct proc *p;
 1714         char *inputstr, *buf;
 1715         size_t bufsize;
 1716         int error;
 1717 
 1718         if (!racct_enable)
 1719                 return (ENOSYS);
 1720 
 1721         error = priv_check(td, PRIV_RCTL_GET_RULES);
 1722         if (error != 0)
 1723                 return (error);
 1724 
 1725         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1726         if (error != 0)
 1727                 return (error);
 1728 
 1729         sx_slock(&allproc_lock);
 1730         error = rctl_string_to_rule(inputstr, &filter);
 1731         free(inputstr, M_RCTL);
 1732         if (error != 0) {
 1733                 sx_sunlock(&allproc_lock);
 1734                 return (error);
 1735         }
 1736 
 1737         bufsize = uap->outbuflen;
 1738         if (bufsize > rctl_maxbufsize) {
 1739                 sx_sunlock(&allproc_lock);
 1740                 return (E2BIG);
 1741         }
 1742 
 1743         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1744         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1745         KASSERT(sb != NULL, ("sbuf_new failed"));
 1746 
 1747         FOREACH_PROC_IN_SYSTEM(p) {
 1748                 RACCT_LOCK();
 1749                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1750                         /*
 1751                          * Non-process rules will be added to the buffer later.
 1752                          * Adding them here would result in duplicated output.
 1753                          */
 1754                         if (link->rrl_rule->rr_subject_type !=
 1755                             RCTL_SUBJECT_TYPE_PROCESS)
 1756                                 continue;
 1757                         if (!rctl_rule_matches(link->rrl_rule, filter))
 1758                                 continue;
 1759                         rctl_rule_to_sbuf(sb, link->rrl_rule);
 1760                         sbuf_printf(sb, ",");
 1761                 }
 1762                 RACCT_UNLOCK();
 1763         }
 1764 
 1765         loginclass_racct_foreach(rctl_get_rules_callback,
 1766             rctl_rule_pre_callback, rctl_rule_post_callback,
 1767             filter, sb);
 1768         ui_racct_foreach(rctl_get_rules_callback,
 1769             rctl_rule_pre_callback, rctl_rule_post_callback,
 1770             filter, sb);
 1771         prison_racct_foreach(rctl_get_rules_callback,
 1772             rctl_rule_pre_callback, rctl_rule_post_callback,
 1773             filter, sb);
 1774         if (sbuf_error(sb) == ENOMEM) {
 1775                 error = ERANGE;
 1776                 goto out;
 1777         }
 1778 
 1779         /*
 1780          * Remove trailing ",".
 1781          */
 1782         if (sbuf_len(sb) > 0)
 1783                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1784 
 1785         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1786 out:
 1787         rctl_rule_release(filter);
 1788         sx_sunlock(&allproc_lock);
 1789         free(buf, M_RCTL);
 1790         return (error);
 1791 }
 1792 
 1793 int
 1794 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 1795 {
 1796         struct sbuf *sb;
 1797         struct rctl_rule *filter;
 1798         struct rctl_rule_link *link;
 1799         char *inputstr, *buf;
 1800         size_t bufsize;
 1801         int error;
 1802 
 1803         if (!racct_enable)
 1804                 return (ENOSYS);
 1805 
 1806         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
 1807         if (error != 0)
 1808                 return (error);
 1809 
 1810         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1811         if (error != 0)
 1812                 return (error);
 1813 
 1814         sx_slock(&allproc_lock);
 1815         error = rctl_string_to_rule(inputstr, &filter);
 1816         free(inputstr, M_RCTL);
 1817         if (error != 0) {
 1818                 sx_sunlock(&allproc_lock);
 1819                 return (error);
 1820         }
 1821 
 1822         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
 1823                 rctl_rule_release(filter);
 1824                 sx_sunlock(&allproc_lock);
 1825                 return (EINVAL);
 1826         }
 1827         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
 1828                 rctl_rule_release(filter);
 1829                 sx_sunlock(&allproc_lock);
 1830                 return (EOPNOTSUPP);
 1831         }
 1832         if (filter->rr_subject.rs_proc == NULL) {
 1833                 rctl_rule_release(filter);
 1834                 sx_sunlock(&allproc_lock);
 1835                 return (EINVAL);
 1836         }
 1837 
 1838         bufsize = uap->outbuflen;
 1839         if (bufsize > rctl_maxbufsize) {
 1840                 rctl_rule_release(filter);
 1841                 sx_sunlock(&allproc_lock);
 1842                 return (E2BIG);
 1843         }
 1844 
 1845         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1846         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1847         KASSERT(sb != NULL, ("sbuf_new failed"));
 1848 
 1849         RACCT_LOCK();
 1850         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
 1851             rrl_next) {
 1852                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1853                 sbuf_printf(sb, ",");
 1854         }
 1855         RACCT_UNLOCK();
 1856         if (sbuf_error(sb) == ENOMEM) {
 1857                 error = ERANGE;
 1858                 sbuf_delete(sb);
 1859                 goto out;
 1860         }
 1861 
 1862         /*
 1863          * Remove trailing ",".
 1864          */
 1865         if (sbuf_len(sb) > 0)
 1866                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1867 
 1868         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1869 out:
 1870         rctl_rule_release(filter);
 1871         sx_sunlock(&allproc_lock);
 1872         free(buf, M_RCTL);
 1873         return (error);
 1874 }
 1875 
 1876 int
 1877 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 1878 {
 1879         struct rctl_rule *rule;
 1880         char *inputstr;
 1881         int error;
 1882 
 1883         if (!racct_enable)
 1884                 return (ENOSYS);
 1885 
 1886         error = priv_check(td, PRIV_RCTL_ADD_RULE);
 1887         if (error != 0)
 1888                 return (error);
 1889 
 1890         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1891         if (error != 0)
 1892                 return (error);
 1893 
 1894         sx_slock(&allproc_lock);
 1895         error = rctl_string_to_rule(inputstr, &rule);
 1896         free(inputstr, M_RCTL);
 1897         if (error != 0) {
 1898                 sx_sunlock(&allproc_lock);
 1899                 return (error);
 1900         }
 1901         /*
 1902          * The 'per' part of a rule is optional.
 1903          */
 1904         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
 1905             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
 1906                 rule->rr_per = rule->rr_subject_type;
 1907 
 1908         if (!rctl_rule_fully_specified(rule)) {
 1909                 error = EINVAL;
 1910                 goto out;
 1911         }
 1912 
 1913         error = rctl_rule_add(rule);
 1914 
 1915 out:
 1916         rctl_rule_release(rule);
 1917         sx_sunlock(&allproc_lock);
 1918         return (error);
 1919 }
 1920 
 1921 int
 1922 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 1923 {
 1924         struct rctl_rule *filter;
 1925         char *inputstr;
 1926         int error;
 1927 
 1928         if (!racct_enable)
 1929                 return (ENOSYS);
 1930 
 1931         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
 1932         if (error != 0)
 1933                 return (error);
 1934 
 1935         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1936         if (error != 0)
 1937                 return (error);
 1938 
 1939         sx_slock(&allproc_lock);
 1940         error = rctl_string_to_rule(inputstr, &filter);
 1941         free(inputstr, M_RCTL);
 1942         if (error != 0) {
 1943                 sx_sunlock(&allproc_lock);
 1944                 return (error);
 1945         }
 1946 
 1947         error = rctl_rule_remove(filter);
 1948         rctl_rule_release(filter);
 1949         sx_sunlock(&allproc_lock);
 1950 
 1951         return (error);
 1952 }
 1953 
 1954 /*
 1955  * Update RCTL rule list after credential change.
 1956  */
 1957 void
 1958 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
 1959 {
 1960         LIST_HEAD(, rctl_rule_link) newrules;
 1961         struct rctl_rule_link *link, *newlink;
 1962         struct uidinfo *newuip;
 1963         struct loginclass *newlc;
 1964         struct prison_racct *newprr;
 1965         int rulecnt, i;
 1966 
 1967         if (!racct_enable)
 1968                 return;
 1969 
 1970         PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 1971 
 1972         newuip = newcred->cr_ruidinfo;
 1973         newlc = newcred->cr_loginclass;
 1974         newprr = newcred->cr_prison->pr_prison_racct;
 1975 
 1976         LIST_INIT(&newrules);
 1977 
 1978 again:
 1979         /*
 1980          * First, count the rules that apply to the process with new
 1981          * credentials.
 1982          */
 1983         rulecnt = 0;
 1984         RACCT_LOCK();
 1985         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1986                 if (link->rrl_rule->rr_subject_type ==
 1987                     RCTL_SUBJECT_TYPE_PROCESS)
 1988                         rulecnt++;
 1989         }
 1990         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
 1991                 rulecnt++;
 1992         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
 1993                 rulecnt++;
 1994         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
 1995                 rulecnt++;
 1996         RACCT_UNLOCK();
 1997 
 1998         /*
 1999          * Create temporary list.  We've dropped the rctl_lock in order
 2000          * to use M_WAITOK.
 2001          */
 2002         for (i = 0; i < rulecnt; i++) {
 2003                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
 2004                 newlink->rrl_rule = NULL;
 2005                 newlink->rrl_exceeded = 0;
 2006                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
 2007         }
 2008 
 2009         newlink = LIST_FIRST(&newrules);
 2010 
 2011         /*
 2012          * Assign rules to the newly allocated list entries.
 2013          */
 2014         RACCT_LOCK();
 2015         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 2016                 if (link->rrl_rule->rr_subject_type ==
 2017                     RCTL_SUBJECT_TYPE_PROCESS) {
 2018                         if (newlink == NULL)
 2019                                 goto goaround;
 2020                         rctl_rule_acquire(link->rrl_rule);
 2021                         newlink->rrl_rule = link->rrl_rule;
 2022                         newlink->rrl_exceeded = link->rrl_exceeded;
 2023                         newlink = LIST_NEXT(newlink, rrl_next);
 2024                         rulecnt--;
 2025                 }
 2026         }
 2027 
 2028         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
 2029                 if (newlink == NULL)
 2030                         goto goaround;
 2031                 rctl_rule_acquire(link->rrl_rule);
 2032                 newlink->rrl_rule = link->rrl_rule;
 2033                 newlink->rrl_exceeded = link->rrl_exceeded;
 2034                 newlink = LIST_NEXT(newlink, rrl_next);
 2035                 rulecnt--;
 2036         }
 2037 
 2038         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
 2039                 if (newlink == NULL)
 2040                         goto goaround;
 2041                 rctl_rule_acquire(link->rrl_rule);
 2042                 newlink->rrl_rule = link->rrl_rule;
 2043                 newlink->rrl_exceeded = link->rrl_exceeded;
 2044                 newlink = LIST_NEXT(newlink, rrl_next);
 2045                 rulecnt--;
 2046         }
 2047 
 2048         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
 2049                 if (newlink == NULL)
 2050                         goto goaround;
 2051                 rctl_rule_acquire(link->rrl_rule);
 2052                 newlink->rrl_rule = link->rrl_rule;
 2053                 newlink->rrl_exceeded = link->rrl_exceeded;
 2054                 newlink = LIST_NEXT(newlink, rrl_next);
 2055                 rulecnt--;
 2056         }
 2057 
 2058         if (rulecnt == 0) {
 2059                 /*
 2060                  * Free the old rule list.
 2061                  */
 2062                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
 2063                         link = LIST_FIRST(&p->p_racct->r_rule_links);
 2064                         LIST_REMOVE(link, rrl_next);
 2065                         rctl_rule_release(link->rrl_rule);
 2066                         uma_zfree(rctl_rule_link_zone, link);
 2067                 }
 2068 
 2069                 /*
 2070                  * Replace lists and we're done.
 2071                  *
 2072                  * XXX: Is there any way to switch list heads instead
 2073                  *      of iterating here?
 2074                  */
 2075                 while (!LIST_EMPTY(&newrules)) {
 2076                         newlink = LIST_FIRST(&newrules);
 2077                         LIST_REMOVE(newlink, rrl_next);
 2078                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
 2079                             newlink, rrl_next);
 2080                 }
 2081 
 2082                 RACCT_UNLOCK();
 2083 
 2084                 return;
 2085         }
 2086 
 2087 goaround:
 2088         RACCT_UNLOCK();
 2089 
 2090         /*
 2091          * Rule list changed while we were not holding the rctl_lock.
 2092          * Free the new list and try again.
 2093          */
 2094         while (!LIST_EMPTY(&newrules)) {
 2095                 newlink = LIST_FIRST(&newrules);
 2096                 LIST_REMOVE(newlink, rrl_next);
 2097                 if (newlink->rrl_rule != NULL)
 2098                         rctl_rule_release(newlink->rrl_rule);
 2099                 uma_zfree(rctl_rule_link_zone, newlink);
 2100         }
 2101 
 2102         goto again;
 2103 }
 2104 
 2105 /*
 2106  * Assign RCTL rules to the newly created process.
 2107  */
 2108 int
 2109 rctl_proc_fork(struct proc *parent, struct proc *child)
 2110 {
 2111         struct rctl_rule *rule;
 2112         struct rctl_rule_link *link;
 2113         int error;
 2114 
 2115         ASSERT_RACCT_ENABLED();
 2116         RACCT_LOCK_ASSERT();
 2117         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
 2118 
 2119         LIST_INIT(&child->p_racct->r_rule_links);
 2120 
 2121         /*
 2122          * Go through limits applicable to the parent and assign them
 2123          * to the child.  Rules with 'process' subject have to be duplicated
 2124          * in order to make their rr_subject point to the new process.
 2125          */
 2126         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
 2127                 if (link->rrl_rule->rr_subject_type ==
 2128                     RCTL_SUBJECT_TYPE_PROCESS) {
 2129                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
 2130                         if (rule == NULL)
 2131                                 goto fail;
 2132                         KASSERT(rule->rr_subject.rs_proc == parent,
 2133                             ("rule->rr_subject.rs_proc != parent"));
 2134                         rule->rr_subject.rs_proc = child;
 2135                         error = rctl_racct_add_rule_locked(child->p_racct,
 2136                             rule);
 2137                         rctl_rule_release(rule);
 2138                         if (error != 0)
 2139                                 goto fail;
 2140                 } else {
 2141                         error = rctl_racct_add_rule_locked(child->p_racct,
 2142                             link->rrl_rule);
 2143                         if (error != 0)
 2144                                 goto fail;
 2145                 }
 2146         }
 2147 
 2148         return (0);
 2149 
 2150 fail:
 2151         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
 2152                 link = LIST_FIRST(&child->p_racct->r_rule_links);
 2153                 LIST_REMOVE(link, rrl_next);
 2154                 rctl_rule_release(link->rrl_rule);
 2155                 uma_zfree(rctl_rule_link_zone, link);
 2156         }
 2157 
 2158         return (EAGAIN);
 2159 }
 2160 
 2161 /*
 2162  * Release rules attached to the racct.
 2163  */
 2164 void
 2165 rctl_racct_release(struct racct *racct)
 2166 {
 2167         struct rctl_rule_link *link;
 2168 
 2169         ASSERT_RACCT_ENABLED();
 2170         RACCT_LOCK_ASSERT();
 2171 
 2172         while (!LIST_EMPTY(&racct->r_rule_links)) {
 2173                 link = LIST_FIRST(&racct->r_rule_links);
 2174                 LIST_REMOVE(link, rrl_next);
 2175                 rctl_rule_release(link->rrl_rule);
 2176                 uma_zfree(rctl_rule_link_zone, link);
 2177         }
 2178 }
 2179 
 2180 static void
 2181 rctl_init(void)
 2182 {
 2183 
 2184         if (!racct_enable)
 2185                 return;
 2186 
 2187         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
 2188             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 2189         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
 2190             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
 2191             UMA_ALIGN_PTR, 0);
 2192 
 2193         /*
 2194          * Set default values, making sure not to overwrite the ones
 2195          * fetched from tunables.  Most of those could be set at the
 2196          * declaration, except for the rctl_throttle_max - we cannot
 2197          * set it there due to hz not being compile time constant.
 2198          */
 2199         if (rctl_throttle_min < 1)
 2200                 rctl_throttle_min = 1;
 2201         if (rctl_throttle_max < rctl_throttle_min)
 2202                 rctl_throttle_max = 2 * hz;
 2203         if (rctl_throttle_pct < 0)
 2204                 rctl_throttle_pct = 100;
 2205         if (rctl_throttle_pct2 < 0)
 2206                 rctl_throttle_pct2 = 100;
 2207 }
 2208 
 2209 #else /* !RCTL */
 2210 
 2211 int
 2212 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 2213 {
 2214 
 2215         return (ENOSYS);
 2216 }
 2217 
 2218 int
 2219 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 2220 {
 2221 
 2222         return (ENOSYS);
 2223 }
 2224 
 2225 int
 2226 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 2227 {
 2228 
 2229         return (ENOSYS);
 2230 }
 2231 
 2232 int
 2233 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 2234 {
 2235 
 2236         return (ENOSYS);
 2237 }
 2238 
 2239 int
 2240 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 2241 {
 2242 
 2243         return (ENOSYS);
 2244 }
 2245 
 2246 #endif /* !RCTL */

Cache object: 9276034ee1e8b684fbbc2dcf4e7375dc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.