The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 The FreeBSD Foundation
    3  * All rights reserved.
    4  *
    5  * This software was developed by Edward Tomasz Napierala under sponsorship
    6  * from the FreeBSD Foundation.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  * $FreeBSD: releng/11.2/sys/kern/kern_rctl.c 298819 2016-04-29 22:15:33Z pfg $
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/11.2/sys/kern/kern_rctl.c 298819 2016-04-29 22:15:33Z pfg $");
   34 
   35 #include <sys/param.h>
   36 #include <sys/bus.h>
   37 #include <sys/malloc.h>
   38 #include <sys/queue.h>
   39 #include <sys/refcount.h>
   40 #include <sys/jail.h>
   41 #include <sys/kernel.h>
   42 #include <sys/limits.h>
   43 #include <sys/loginclass.h>
   44 #include <sys/priv.h>
   45 #include <sys/proc.h>
   46 #include <sys/racct.h>
   47 #include <sys/rctl.h>
   48 #include <sys/resourcevar.h>
   49 #include <sys/sx.h>
   50 #include <sys/sysent.h>
   51 #include <sys/sysproto.h>
   52 #include <sys/systm.h>
   53 #include <sys/types.h>
   54 #include <sys/eventhandler.h>
   55 #include <sys/lock.h>
   56 #include <sys/mutex.h>
   57 #include <sys/rwlock.h>
   58 #include <sys/sbuf.h>
   59 #include <sys/taskqueue.h>
   60 #include <sys/tree.h>
   61 #include <vm/uma.h>
   62 
   63 #ifdef RCTL
   64 #ifndef RACCT
   65 #error "The RCTL option requires the RACCT option"
   66 #endif
   67 
   68 FEATURE(rctl, "Resource Limits");
   69 
   70 #define HRF_DEFAULT             0
   71 #define HRF_DONT_INHERIT        1
   72 #define HRF_DONT_ACCUMULATE     2
   73 
   74 #define RCTL_MAX_INBUFSIZE      4 * 1024
   75 #define RCTL_MAX_OUTBUFSIZE     16 * 1024 * 1024
   76 #define RCTL_LOG_BUFSIZE        128
   77 
   78 #define RCTL_PCPU_SHIFT         (10 * 1000000)
   79 
   80 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
   81 static int rctl_log_rate_limit = 10;
   82 static int rctl_devctl_rate_limit = 10;
   83 
   84 /*
   85  * Values below are initialized in rctl_init().
   86  */
   87 static int rctl_throttle_min = -1;
   88 static int rctl_throttle_max = -1;
   89 static int rctl_throttle_pct = -1;
   90 static int rctl_throttle_pct2 = -1;
   91 
   92 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
   93 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
   94 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
   95 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
   96 
   97 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
   98 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
   99     &rctl_maxbufsize, 0, "Maximum output buffer size");
  100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
  101     &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
  102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
  103     &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
  104 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
  105     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU",
  106     "Shortest throttling duration, in hz");
  107 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
  108 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
  109     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU",
  110     "Longest throttling duration, in hz");
  111 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
  112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
  113     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU",
  114     "Throttling penalty for process consumption, in percent");
  115 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
  116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
  117     CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU",
  118     "Throttling penalty for container consumption, in percent");
  119 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
  120 
  121 /*
  122  * 'rctl_rule_link' connects a rule with every racct it's related to.
  123  * For example, rule 'user:X:openfiles:deny=N/process' is linked
  124  * with uidinfo for user X, and to each process of that user.
  125  */
  126 struct rctl_rule_link {
  127         LIST_ENTRY(rctl_rule_link)      rrl_next;
  128         struct rctl_rule                *rrl_rule;
  129         int                             rrl_exceeded;
  130 };
  131 
  132 struct dict {
  133         const char      *d_name;
  134         int             d_value;
  135 };
  136 
  137 static struct dict subjectnames[] = {
  138         { "process", RCTL_SUBJECT_TYPE_PROCESS },
  139         { "user", RCTL_SUBJECT_TYPE_USER },
  140         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
  141         { "jail", RCTL_SUBJECT_TYPE_JAIL },
  142         { NULL, -1 }};
  143 
  144 static struct dict resourcenames[] = {
  145         { "cputime", RACCT_CPU },
  146         { "datasize", RACCT_DATA },
  147         { "stacksize", RACCT_STACK },
  148         { "coredumpsize", RACCT_CORE },
  149         { "memoryuse", RACCT_RSS },
  150         { "memorylocked", RACCT_MEMLOCK },
  151         { "maxproc", RACCT_NPROC },
  152         { "openfiles", RACCT_NOFILE },
  153         { "vmemoryuse", RACCT_VMEM },
  154         { "pseudoterminals", RACCT_NPTS },
  155         { "swapuse", RACCT_SWAP },
  156         { "nthr", RACCT_NTHR },
  157         { "msgqqueued", RACCT_MSGQQUEUED },
  158         { "msgqsize", RACCT_MSGQSIZE },
  159         { "nmsgq", RACCT_NMSGQ },
  160         { "nsem", RACCT_NSEM },
  161         { "nsemop", RACCT_NSEMOP },
  162         { "nshm", RACCT_NSHM },
  163         { "shmsize", RACCT_SHMSIZE },
  164         { "wallclock", RACCT_WALLCLOCK },
  165         { "pcpu", RACCT_PCTCPU },
  166         { "readbps", RACCT_READBPS },
  167         { "writebps", RACCT_WRITEBPS },
  168         { "readiops", RACCT_READIOPS },
  169         { "writeiops", RACCT_WRITEIOPS },
  170         { NULL, -1 }};
  171 
  172 static struct dict actionnames[] = {
  173         { "sighup", RCTL_ACTION_SIGHUP },
  174         { "sigint", RCTL_ACTION_SIGINT },
  175         { "sigquit", RCTL_ACTION_SIGQUIT },
  176         { "sigill", RCTL_ACTION_SIGILL },
  177         { "sigtrap", RCTL_ACTION_SIGTRAP },
  178         { "sigabrt", RCTL_ACTION_SIGABRT },
  179         { "sigemt", RCTL_ACTION_SIGEMT },
  180         { "sigfpe", RCTL_ACTION_SIGFPE },
  181         { "sigkill", RCTL_ACTION_SIGKILL },
  182         { "sigbus", RCTL_ACTION_SIGBUS },
  183         { "sigsegv", RCTL_ACTION_SIGSEGV },
  184         { "sigsys", RCTL_ACTION_SIGSYS },
  185         { "sigpipe", RCTL_ACTION_SIGPIPE },
  186         { "sigalrm", RCTL_ACTION_SIGALRM },
  187         { "sigterm", RCTL_ACTION_SIGTERM },
  188         { "sigurg", RCTL_ACTION_SIGURG },
  189         { "sigstop", RCTL_ACTION_SIGSTOP },
  190         { "sigtstp", RCTL_ACTION_SIGTSTP },
  191         { "sigchld", RCTL_ACTION_SIGCHLD },
  192         { "sigttin", RCTL_ACTION_SIGTTIN },
  193         { "sigttou", RCTL_ACTION_SIGTTOU },
  194         { "sigio", RCTL_ACTION_SIGIO },
  195         { "sigxcpu", RCTL_ACTION_SIGXCPU },
  196         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
  197         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
  198         { "sigprof", RCTL_ACTION_SIGPROF },
  199         { "sigwinch", RCTL_ACTION_SIGWINCH },
  200         { "siginfo", RCTL_ACTION_SIGINFO },
  201         { "sigusr1", RCTL_ACTION_SIGUSR1 },
  202         { "sigusr2", RCTL_ACTION_SIGUSR2 },
  203         { "sigthr", RCTL_ACTION_SIGTHR },
  204         { "deny", RCTL_ACTION_DENY },
  205         { "log", RCTL_ACTION_LOG },
  206         { "devctl", RCTL_ACTION_DEVCTL },
  207         { "throttle", RCTL_ACTION_THROTTLE },
  208         { NULL, -1 }};
  209 
  210 static void rctl_init(void);
  211 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
  212 
  213 static uma_zone_t rctl_rule_zone;
  214 static uma_zone_t rctl_rule_link_zone;
  215 
  216 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
  217 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
  218 
  219 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
  220 
  221 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
  222 {
  223         int error, val = rctl_throttle_min;
  224 
  225         error = sysctl_handle_int(oidp, &val, 0, req);
  226         if (error || !req->newptr)
  227                 return (error);
  228         if (val < 1 || val > rctl_throttle_max)
  229                 return (EINVAL);
  230 
  231         RACCT_LOCK();
  232         rctl_throttle_min = val;
  233         RACCT_UNLOCK();
  234 
  235         return (0);
  236 }
  237 
  238 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
  239 {
  240         int error, val = rctl_throttle_max;
  241 
  242         error = sysctl_handle_int(oidp, &val, 0, req);
  243         if (error || !req->newptr)
  244                 return (error);
  245         if (val < rctl_throttle_min)
  246                 return (EINVAL);
  247 
  248         RACCT_LOCK();
  249         rctl_throttle_max = val;
  250         RACCT_UNLOCK();
  251 
  252         return (0);
  253 }
  254 
  255 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
  256 {
  257         int error, val = rctl_throttle_pct;
  258 
  259         error = sysctl_handle_int(oidp, &val, 0, req);
  260         if (error || !req->newptr)
  261                 return (error);
  262         if (val < 0)
  263                 return (EINVAL);
  264 
  265         RACCT_LOCK();
  266         rctl_throttle_pct = val;
  267         RACCT_UNLOCK();
  268 
  269         return (0);
  270 }
  271 
  272 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
  273 {
  274         int error, val = rctl_throttle_pct2;
  275 
  276         error = sysctl_handle_int(oidp, &val, 0, req);
  277         if (error || !req->newptr)
  278                 return (error);
  279         if (val < 0)
  280                 return (EINVAL);
  281 
  282         RACCT_LOCK();
  283         rctl_throttle_pct2 = val;
  284         RACCT_UNLOCK();
  285 
  286         return (0);
  287 }
  288 
  289 static const char *
  290 rctl_subject_type_name(int subject)
  291 {
  292         int i;
  293 
  294         for (i = 0; subjectnames[i].d_name != NULL; i++) {
  295                 if (subjectnames[i].d_value == subject)
  296                         return (subjectnames[i].d_name);
  297         }
  298 
  299         panic("rctl_subject_type_name: unknown subject type %d", subject);
  300 }
  301 
  302 static const char *
  303 rctl_action_name(int action)
  304 {
  305         int i;
  306 
  307         for (i = 0; actionnames[i].d_name != NULL; i++) {
  308                 if (actionnames[i].d_value == action)
  309                         return (actionnames[i].d_name);
  310         }
  311 
  312         panic("rctl_action_name: unknown action %d", action);
  313 }
  314 
  315 const char *
  316 rctl_resource_name(int resource)
  317 {
  318         int i;
  319 
  320         for (i = 0; resourcenames[i].d_name != NULL; i++) {
  321                 if (resourcenames[i].d_value == resource)
  322                         return (resourcenames[i].d_name);
  323         }
  324 
  325         panic("rctl_resource_name: unknown resource %d", resource);
  326 }
  327 
  328 static struct racct *
  329 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
  330 {
  331         struct ucred *cred = p->p_ucred;
  332 
  333         ASSERT_RACCT_ENABLED();
  334         RACCT_LOCK_ASSERT();
  335 
  336         switch (rule->rr_per) {
  337         case RCTL_SUBJECT_TYPE_PROCESS:
  338                 return (p->p_racct);
  339         case RCTL_SUBJECT_TYPE_USER:
  340                 return (cred->cr_ruidinfo->ui_racct);
  341         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  342                 return (cred->cr_loginclass->lc_racct);
  343         case RCTL_SUBJECT_TYPE_JAIL:
  344                 return (cred->cr_prison->pr_prison_racct->prr_racct);
  345         default:
  346                 panic("%s: unknown per %d", __func__, rule->rr_per);
  347         }
  348 }
  349 
  350 /*
  351  * Return the amount of resource that can be allocated by 'p' before
  352  * hitting 'rule'.
  353  */
  354 static int64_t
  355 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
  356 {
  357         const struct racct *racct;
  358         int64_t available;
  359 
  360         ASSERT_RACCT_ENABLED();
  361         RACCT_LOCK_ASSERT();
  362 
  363         racct = rctl_proc_rule_to_racct(p, rule);
  364         available = rule->rr_amount - racct->r_resources[rule->rr_resource];
  365 
  366         return (available);
  367 }
  368 
  369 /*
  370  * Called every second for proc, uidinfo, loginclass, and jail containers.
  371  * If the limit isn't exceeded, it decreases the usage amount to zero.
  372  * Otherwise, it decreases it by the value of the limit.  This way
  373  * resource consumption exceeding the limit "carries over" to the next
  374  * period.
  375  */
  376 void
  377 rctl_throttle_decay(struct racct *racct, int resource)
  378 {
  379         struct rctl_rule *rule;
  380         struct rctl_rule_link *link;
  381         int64_t minavailable;
  382 
  383         ASSERT_RACCT_ENABLED();
  384         RACCT_LOCK_ASSERT();
  385 
  386         minavailable = INT64_MAX;
  387 
  388         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
  389                 rule = link->rrl_rule;
  390 
  391                 if (rule->rr_resource != resource)
  392                         continue;
  393                 if (rule->rr_action != RCTL_ACTION_THROTTLE)
  394                         continue;
  395 
  396                 if (rule->rr_amount < minavailable)
  397                         minavailable = rule->rr_amount;
  398         }
  399 
  400         if (racct->r_resources[resource] < minavailable) {
  401                 racct->r_resources[resource] = 0;
  402         } else {
  403                 /*
  404                  * Cap utilization counter at ten times the limit.  Otherwise,
  405                  * if we changed the rule lowering the allowed amount, it could
  406                  * take unreasonably long time for the accumulated resource
  407                  * usage to drop.
  408                  */
  409                 if (racct->r_resources[resource] > minavailable * 10)
  410                         racct->r_resources[resource] = minavailable * 10;
  411 
  412                 racct->r_resources[resource] -= minavailable;
  413         }
  414 }
  415 
  416 /*
  417  * Special version of rctl_get_available() for the %CPU resource.
  418  * We slightly cheat here and return less than we normally would.
  419  */
  420 int64_t
  421 rctl_pcpu_available(const struct proc *p) {
  422         struct rctl_rule *rule;
  423         struct rctl_rule_link *link;
  424         int64_t available, minavailable, limit;
  425 
  426         ASSERT_RACCT_ENABLED();
  427         RACCT_LOCK_ASSERT();
  428 
  429         minavailable = INT64_MAX;
  430         limit = 0;
  431 
  432         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  433                 rule = link->rrl_rule;
  434                 if (rule->rr_resource != RACCT_PCTCPU)
  435                         continue;
  436                 if (rule->rr_action != RCTL_ACTION_DENY)
  437                         continue;
  438                 available = rctl_available_resource(p, rule);
  439                 if (available < minavailable) {
  440                         minavailable = available;
  441                         limit = rule->rr_amount;
  442                 }
  443         }
  444 
  445         /*
  446          * Return slightly less than actual value of the available
  447          * %cpu resource.  This makes %cpu throttling more aggressive
  448          * and lets us act sooner than the limits are already exceeded.
  449          */
  450         if (limit != 0) {
  451                 if (limit > 2 * RCTL_PCPU_SHIFT)
  452                         minavailable -= RCTL_PCPU_SHIFT;
  453                 else
  454                         minavailable -= (limit / 2);
  455         }
  456 
  457         return (minavailable);
  458 }
  459 
  460 static uint64_t
  461 xadd(uint64_t a, uint64_t b)
  462 {
  463         uint64_t c;
  464 
  465         c = a + b;
  466 
  467         /*
  468          * Detect overflow.
  469          */
  470         if (c < a || c < b)
  471                 return (UINT64_MAX);
  472 
  473         return (c);
  474 }
  475 
  476 static uint64_t
  477 xmul(uint64_t a, uint64_t b)
  478 {
  479 
  480         if (b != 0 && a > UINT64_MAX / b)
  481                 return (UINT64_MAX);
  482 
  483         return (a * b);
  484 }
  485 
  486 /*
  487  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
  488  * to what it keeps allocated now.  Returns non-zero if the allocation should
  489  * be denied, 0 otherwise.
  490  */
  491 int
  492 rctl_enforce(struct proc *p, int resource, uint64_t amount)
  493 {
  494         static struct timeval log_lasttime, devctl_lasttime;
  495         static int log_curtime = 0, devctl_curtime = 0;
  496         struct rctl_rule *rule;
  497         struct rctl_rule_link *link;
  498         struct sbuf sb;
  499         char *buf;
  500         int64_t available;
  501         uint64_t sleep_ms, sleep_ratio;
  502         int should_deny = 0;
  503 
  504         ASSERT_RACCT_ENABLED();
  505         RACCT_LOCK_ASSERT();
  506 
  507         /*
  508          * There may be more than one matching rule; go through all of them.
  509          * Denial should be done last, after logging and sending signals.
  510          */
  511         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  512                 rule = link->rrl_rule;
  513                 if (rule->rr_resource != resource)
  514                         continue;
  515 
  516                 available = rctl_available_resource(p, rule);
  517                 if (available >= (int64_t)amount) {
  518                         link->rrl_exceeded = 0;
  519                         continue;
  520                 }
  521 
  522                 switch (rule->rr_action) {
  523                 case RCTL_ACTION_DENY:
  524                         should_deny = 1;
  525                         continue;
  526                 case RCTL_ACTION_LOG:
  527                         /*
  528                          * If rrl_exceeded != 0, it means we've already
  529                          * logged a warning for this process.
  530                          */
  531                         if (link->rrl_exceeded != 0)
  532                                 continue;
  533 
  534                         /*
  535                          * If the process state is not fully initialized yet,
  536                          * we can't access most of the required fields, e.g.
  537                          * p->p_comm.  This happens when called from fork1().
  538                          * Ignore this rule for now; it will be processed just
  539                          * after fork, when called from racct_proc_fork_done().
  540                          */
  541                         if (p->p_state != PRS_NORMAL)
  542                                 continue;
  543 
  544                         if (!ppsratecheck(&log_lasttime, &log_curtime,
  545                             rctl_log_rate_limit))
  546                                 continue;
  547 
  548                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  549                         if (buf == NULL) {
  550                                 printf("rctl_enforce: out of memory\n");
  551                                 continue;
  552                         }
  553                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  554                         rctl_rule_to_sbuf(&sb, rule);
  555                         sbuf_finish(&sb);
  556                         printf("rctl: rule \"%s\" matched by pid %d "
  557                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
  558                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
  559                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  560                         sbuf_delete(&sb);
  561                         free(buf, M_RCTL);
  562                         link->rrl_exceeded = 1;
  563                         continue;
  564                 case RCTL_ACTION_DEVCTL:
  565                         if (link->rrl_exceeded != 0)
  566                                 continue;
  567 
  568                         if (p->p_state != PRS_NORMAL)
  569                                 continue;
  570 
  571                         if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
  572                             rctl_devctl_rate_limit))
  573                                 continue;
  574 
  575                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
  576                         if (buf == NULL) {
  577                                 printf("rctl_enforce: out of memory\n");
  578                                 continue;
  579                         }
  580                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
  581                         sbuf_printf(&sb, "rule=");
  582                         rctl_rule_to_sbuf(&sb, rule);
  583                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
  584                             p->p_pid, p->p_ucred->cr_ruid,
  585                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
  586                         sbuf_finish(&sb);
  587                         devctl_notify_f("RCTL", "rule", "matched",
  588                             sbuf_data(&sb), M_NOWAIT);
  589                         sbuf_delete(&sb);
  590                         free(buf, M_RCTL);
  591                         link->rrl_exceeded = 1;
  592                         continue;
  593                 case RCTL_ACTION_THROTTLE:
  594                         if (p->p_state != PRS_NORMAL)
  595                                 continue;
  596 
  597                         /*
  598                          * Make the process sleep for a fraction of second
  599                          * proportional to the ratio of process' resource
  600                          * utilization compared to the limit.  The point is
  601                          * to penalize resource hogs: processes that consume
  602                          * more of the available resources sleep for longer.
  603                          *
  604                          * We're trying to defer division until the very end,
  605                          * to minimize the rounding effects.  The following
  606                          * calculation could have been written in a clearer
  607                          * way like this:
  608                          *
  609                          * sleep_ms = hz * p->p_racct->r_resources[resource] /
  610                          *     rule->rr_amount;
  611                          * sleep_ms *= rctl_throttle_pct / 100;
  612                          * if (sleep_ms < rctl_throttle_min)
  613                          *         sleep_ms = rctl_throttle_min;
  614                          *
  615                          */
  616                         sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
  617                         sleep_ms = xmul(sleep_ms,  rctl_throttle_pct) / 100;
  618                         if (sleep_ms < rctl_throttle_min * rule->rr_amount)
  619                                 sleep_ms = rctl_throttle_min * rule->rr_amount;
  620 
  621                         /*
  622                          * Multiply that by the ratio of the resource
  623                          * consumption for the container compared to the limit,
  624                          * squared.  In other words, a process in a container
  625                          * that is two times over the limit will be throttled
  626                          * four times as much for hitting the same rule.  The
  627                          * point is to penalize processes more if the container
  628                          * itself (eg certain UID or jail) is above the limit.
  629                          */
  630                         if (available < 0)
  631                                 sleep_ratio = -available / rule->rr_amount;
  632                         else
  633                                 sleep_ratio = 0;
  634                         sleep_ratio = xmul(sleep_ratio, sleep_ratio);
  635                         sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
  636                         sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
  637 
  638                         /*
  639                          * Finally the division.
  640                          */
  641                         sleep_ms /= rule->rr_amount;
  642 
  643                         if (sleep_ms > rctl_throttle_max)
  644                                 sleep_ms = rctl_throttle_max;
  645 #if 0
  646                         printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
  647                            __func__, p->p_pid, p->p_comm,
  648                            p->p_racct->r_resources[resource],
  649                            rule->rr_amount, (uintmax_t)sleep_ms,
  650                            (uintmax_t)sleep_ratio, (intmax_t)available);
  651 #endif
  652 
  653                         KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
  654                             __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
  655                         racct_proc_throttle(p, sleep_ms);
  656                         continue;
  657                 default:
  658                         if (link->rrl_exceeded != 0)
  659                                 continue;
  660 
  661                         if (p->p_state != PRS_NORMAL)
  662                                 continue;
  663 
  664                         KASSERT(rule->rr_action > 0 &&
  665                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
  666                             ("rctl_enforce: unknown action %d",
  667                              rule->rr_action));
  668 
  669                         /*
  670                          * We're using the fact that RCTL_ACTION_SIG* values
  671                          * are equal to their counterparts from sys/signal.h.
  672                          */
  673                         kern_psignal(p, rule->rr_action);
  674                         link->rrl_exceeded = 1;
  675                         continue;
  676                 }
  677         }
  678 
  679         if (should_deny) {
  680                 /*
  681                  * Return fake error code; the caller should change it
  682                  * into one proper for the situation - EFSIZ, ENOMEM etc.
  683                  */
  684                 return (EDOOFUS);
  685         }
  686 
  687         return (0);
  688 }
  689 
  690 uint64_t
  691 rctl_get_limit(struct proc *p, int resource)
  692 {
  693         struct rctl_rule *rule;
  694         struct rctl_rule_link *link;
  695         uint64_t amount = UINT64_MAX;
  696 
  697         ASSERT_RACCT_ENABLED();
  698         RACCT_LOCK_ASSERT();
  699 
  700         /*
  701          * There may be more than one matching rule; go through all of them.
  702          * Denial should be done last, after logging and sending signals.
  703          */
  704         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  705                 rule = link->rrl_rule;
  706                 if (rule->rr_resource != resource)
  707                         continue;
  708                 if (rule->rr_action != RCTL_ACTION_DENY)
  709                         continue;
  710                 if (rule->rr_amount < amount)
  711                         amount = rule->rr_amount;
  712         }
  713 
  714         return (amount);
  715 }
  716 
  717 uint64_t
  718 rctl_get_available(struct proc *p, int resource)
  719 {
  720         struct rctl_rule *rule;
  721         struct rctl_rule_link *link;
  722         int64_t available, minavailable, allocated;
  723 
  724         minavailable = INT64_MAX;
  725 
  726         ASSERT_RACCT_ENABLED();
  727         RACCT_LOCK_ASSERT();
  728 
  729         /*
  730          * There may be more than one matching rule; go through all of them.
  731          * Denial should be done last, after logging and sending signals.
  732          */
  733         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
  734                 rule = link->rrl_rule;
  735                 if (rule->rr_resource != resource)
  736                         continue;
  737                 if (rule->rr_action != RCTL_ACTION_DENY)
  738                         continue;
  739                 available = rctl_available_resource(p, rule);
  740                 if (available < minavailable)
  741                         minavailable = available;
  742         }
  743 
  744         /*
  745          * XXX: Think about this _hard_.
  746          */
  747         allocated = p->p_racct->r_resources[resource];
  748         if (minavailable < INT64_MAX - allocated)
  749                 minavailable += allocated;
  750         if (minavailable < 0)
  751                 minavailable = 0;
  752 
  753         return (minavailable);
  754 }
  755 
  756 static int
  757 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
  758 {
  759 
  760         ASSERT_RACCT_ENABLED();
  761 
  762         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
  763                 if (rule->rr_subject_type != filter->rr_subject_type)
  764                         return (0);
  765 
  766                 switch (filter->rr_subject_type) {
  767                 case RCTL_SUBJECT_TYPE_PROCESS:
  768                         if (filter->rr_subject.rs_proc != NULL &&
  769                             rule->rr_subject.rs_proc !=
  770                             filter->rr_subject.rs_proc)
  771                                 return (0);
  772                         break;
  773                 case RCTL_SUBJECT_TYPE_USER:
  774                         if (filter->rr_subject.rs_uip != NULL &&
  775                             rule->rr_subject.rs_uip !=
  776                             filter->rr_subject.rs_uip)
  777                                 return (0);
  778                         break;
  779                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
  780                         if (filter->rr_subject.rs_loginclass != NULL &&
  781                             rule->rr_subject.rs_loginclass !=
  782                             filter->rr_subject.rs_loginclass)
  783                                 return (0);
  784                         break;
  785                 case RCTL_SUBJECT_TYPE_JAIL:
  786                         if (filter->rr_subject.rs_prison_racct != NULL &&
  787                             rule->rr_subject.rs_prison_racct !=
  788                             filter->rr_subject.rs_prison_racct)
  789                                 return (0);
  790                         break;
  791                 default:
  792                         panic("rctl_rule_matches: unknown subject type %d",
  793                             filter->rr_subject_type);
  794                 }
  795         }
  796 
  797         if (filter->rr_resource != RACCT_UNDEFINED) {
  798                 if (rule->rr_resource != filter->rr_resource)
  799                         return (0);
  800         }
  801 
  802         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
  803                 if (rule->rr_action != filter->rr_action)
  804                         return (0);
  805         }
  806 
  807         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
  808                 if (rule->rr_amount != filter->rr_amount)
  809                         return (0);
  810         }
  811 
  812         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
  813                 if (rule->rr_per != filter->rr_per)
  814                         return (0);
  815         }
  816 
  817         return (1);
  818 }
  819 
  820 static int
  821 str2value(const char *str, int *value, struct dict *table)
  822 {
  823         int i;
  824 
  825         if (value == NULL)
  826                 return (EINVAL);
  827 
  828         for (i = 0; table[i].d_name != NULL; i++) {
  829                 if (strcasecmp(table[i].d_name, str) == 0) {
  830                         *value =  table[i].d_value;
  831                         return (0);
  832                 }
  833         }
  834 
  835         return (EINVAL);
  836 }
  837 
  838 static int
  839 str2id(const char *str, id_t *value)
  840 {
  841         char *end;
  842 
  843         if (str == NULL)
  844                 return (EINVAL);
  845 
  846         *value = strtoul(str, &end, 10);
  847         if ((size_t)(end - str) != strlen(str))
  848                 return (EINVAL);
  849 
  850         return (0);
  851 }
  852 
  853 static int
  854 str2int64(const char *str, int64_t *value)
  855 {
  856         char *end;
  857 
  858         if (str == NULL)
  859                 return (EINVAL);
  860 
  861         *value = strtoul(str, &end, 10);
  862         if ((size_t)(end - str) != strlen(str))
  863                 return (EINVAL);
  864 
  865         if (*value < 0)
  866                 return (ERANGE);
  867 
  868         return (0);
  869 }
  870 
  871 /*
  872  * Connect the rule to the racct, increasing refcount for the rule.
  873  */
  874 static void
  875 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
  876 {
  877         struct rctl_rule_link *link;
  878 
  879         ASSERT_RACCT_ENABLED();
  880         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  881 
  882         rctl_rule_acquire(rule);
  883         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
  884         link->rrl_rule = rule;
  885         link->rrl_exceeded = 0;
  886 
  887         RACCT_LOCK();
  888         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  889         RACCT_UNLOCK();
  890 }
  891 
  892 static int
  893 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
  894 {
  895         struct rctl_rule_link *link;
  896 
  897         ASSERT_RACCT_ENABLED();
  898         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
  899         RACCT_LOCK_ASSERT();
  900 
  901         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
  902         if (link == NULL)
  903                 return (ENOMEM);
  904         rctl_rule_acquire(rule);
  905         link->rrl_rule = rule;
  906         link->rrl_exceeded = 0;
  907 
  908         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
  909 
  910         return (0);
  911 }
  912 
  913 /*
  914  * Remove limits for a rules matching the filter and release
  915  * the refcounts for the rules, possibly freeing them.  Returns
  916  * the number of limit structures removed.
  917  */
  918 static int
  919 rctl_racct_remove_rules(struct racct *racct,
  920     const struct rctl_rule *filter)
  921 {
  922         struct rctl_rule_link *link, *linktmp;
  923         int removed = 0;
  924 
  925         ASSERT_RACCT_ENABLED();
  926         RACCT_LOCK_ASSERT();
  927 
  928         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
  929                 if (!rctl_rule_matches(link->rrl_rule, filter))
  930                         continue;
  931 
  932                 LIST_REMOVE(link, rrl_next);
  933                 rctl_rule_release(link->rrl_rule);
  934                 uma_zfree(rctl_rule_link_zone, link);
  935                 removed++;
  936         }
  937         return (removed);
  938 }
  939 
  940 static void
  941 rctl_rule_acquire_subject(struct rctl_rule *rule)
  942 {
  943 
  944         ASSERT_RACCT_ENABLED();
  945 
  946         switch (rule->rr_subject_type) {
  947         case RCTL_SUBJECT_TYPE_UNDEFINED:
  948         case RCTL_SUBJECT_TYPE_PROCESS:
  949                 break;
  950         case RCTL_SUBJECT_TYPE_JAIL:
  951                 if (rule->rr_subject.rs_prison_racct != NULL)
  952                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
  953                 break;
  954         case RCTL_SUBJECT_TYPE_USER:
  955                 if (rule->rr_subject.rs_uip != NULL)
  956                         uihold(rule->rr_subject.rs_uip);
  957                 break;
  958         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  959                 if (rule->rr_subject.rs_loginclass != NULL)
  960                         loginclass_hold(rule->rr_subject.rs_loginclass);
  961                 break;
  962         default:
  963                 panic("rctl_rule_acquire_subject: unknown subject type %d",
  964                     rule->rr_subject_type);
  965         }
  966 }
  967 
  968 static void
  969 rctl_rule_release_subject(struct rctl_rule *rule)
  970 {
  971 
  972         ASSERT_RACCT_ENABLED();
  973 
  974         switch (rule->rr_subject_type) {
  975         case RCTL_SUBJECT_TYPE_UNDEFINED:
  976         case RCTL_SUBJECT_TYPE_PROCESS:
  977                 break;
  978         case RCTL_SUBJECT_TYPE_JAIL:
  979                 if (rule->rr_subject.rs_prison_racct != NULL)
  980                         prison_racct_free(rule->rr_subject.rs_prison_racct);
  981                 break;
  982         case RCTL_SUBJECT_TYPE_USER:
  983                 if (rule->rr_subject.rs_uip != NULL)
  984                         uifree(rule->rr_subject.rs_uip);
  985                 break;
  986         case RCTL_SUBJECT_TYPE_LOGINCLASS:
  987                 if (rule->rr_subject.rs_loginclass != NULL)
  988                         loginclass_free(rule->rr_subject.rs_loginclass);
  989                 break;
  990         default:
  991                 panic("rctl_rule_release_subject: unknown subject type %d",
  992                     rule->rr_subject_type);
  993         }
  994 }
  995 
  996 struct rctl_rule *
  997 rctl_rule_alloc(int flags)
  998 {
  999         struct rctl_rule *rule;
 1000 
 1001         ASSERT_RACCT_ENABLED();
 1002 
 1003         rule = uma_zalloc(rctl_rule_zone, flags);
 1004         if (rule == NULL)
 1005                 return (NULL);
 1006         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1007         rule->rr_subject.rs_proc = NULL;
 1008         rule->rr_subject.rs_uip = NULL;
 1009         rule->rr_subject.rs_loginclass = NULL;
 1010         rule->rr_subject.rs_prison_racct = NULL;
 1011         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1012         rule->rr_resource = RACCT_UNDEFINED;
 1013         rule->rr_action = RCTL_ACTION_UNDEFINED;
 1014         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1015         refcount_init(&rule->rr_refcount, 1);
 1016 
 1017         return (rule);
 1018 }
 1019 
 1020 struct rctl_rule *
 1021 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
 1022 {
 1023         struct rctl_rule *copy;
 1024 
 1025         ASSERT_RACCT_ENABLED();
 1026 
 1027         copy = uma_zalloc(rctl_rule_zone, flags);
 1028         if (copy == NULL)
 1029                 return (NULL);
 1030         copy->rr_subject_type = rule->rr_subject_type;
 1031         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
 1032         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
 1033         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
 1034         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
 1035         copy->rr_per = rule->rr_per;
 1036         copy->rr_resource = rule->rr_resource;
 1037         copy->rr_action = rule->rr_action;
 1038         copy->rr_amount = rule->rr_amount;
 1039         refcount_init(&copy->rr_refcount, 1);
 1040         rctl_rule_acquire_subject(copy);
 1041 
 1042         return (copy);
 1043 }
 1044 
 1045 void
 1046 rctl_rule_acquire(struct rctl_rule *rule)
 1047 {
 1048 
 1049         ASSERT_RACCT_ENABLED();
 1050         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1051 
 1052         refcount_acquire(&rule->rr_refcount);
 1053 }
 1054 
 1055 static void
 1056 rctl_rule_free(void *context, int pending)
 1057 {
 1058         struct rctl_rule *rule;
 1059         
 1060         rule = (struct rctl_rule *)context;
 1061 
 1062         ASSERT_RACCT_ENABLED();
 1063         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
 1064         
 1065         /*
 1066          * We don't need locking here; rule is guaranteed to be inaccessible.
 1067          */
 1068         
 1069         rctl_rule_release_subject(rule);
 1070         uma_zfree(rctl_rule_zone, rule);
 1071 }
 1072 
 1073 void
 1074 rctl_rule_release(struct rctl_rule *rule)
 1075 {
 1076 
 1077         ASSERT_RACCT_ENABLED();
 1078         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 1079 
 1080         if (refcount_release(&rule->rr_refcount)) {
 1081                 /*
 1082                  * rctl_rule_release() is often called when iterating
 1083                  * over all the uidinfo structures in the system,
 1084                  * holding uihashtbl_lock.  Since rctl_rule_free()
 1085                  * might end up calling uifree(), this would lead
 1086                  * to lock recursion.  Use taskqueue to avoid this.
 1087                  */
 1088                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
 1089                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
 1090         }
 1091 }
 1092 
 1093 static int
 1094 rctl_rule_fully_specified(const struct rctl_rule *rule)
 1095 {
 1096 
 1097         ASSERT_RACCT_ENABLED();
 1098 
 1099         switch (rule->rr_subject_type) {
 1100         case RCTL_SUBJECT_TYPE_UNDEFINED:
 1101                 return (0);
 1102         case RCTL_SUBJECT_TYPE_PROCESS:
 1103                 if (rule->rr_subject.rs_proc == NULL)
 1104                         return (0);
 1105                 break;
 1106         case RCTL_SUBJECT_TYPE_USER:
 1107                 if (rule->rr_subject.rs_uip == NULL)
 1108                         return (0);
 1109                 break;
 1110         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1111                 if (rule->rr_subject.rs_loginclass == NULL)
 1112                         return (0);
 1113                 break;
 1114         case RCTL_SUBJECT_TYPE_JAIL:
 1115                 if (rule->rr_subject.rs_prison_racct == NULL)
 1116                         return (0);
 1117                 break;
 1118         default:
 1119                 panic("rctl_rule_fully_specified: unknown subject type %d",
 1120                     rule->rr_subject_type);
 1121         }
 1122         if (rule->rr_resource == RACCT_UNDEFINED)
 1123                 return (0);
 1124         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
 1125                 return (0);
 1126         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
 1127                 return (0);
 1128         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
 1129                 return (0);
 1130 
 1131         return (1);
 1132 }
 1133 
 1134 static int
 1135 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
 1136 {
 1137         struct rctl_rule *rule;
 1138         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
 1139              *amountstr, *perstr;
 1140         id_t id;
 1141         int error = 0;
 1142 
 1143         ASSERT_RACCT_ENABLED();
 1144 
 1145         rule = rctl_rule_alloc(M_WAITOK);
 1146 
 1147         subjectstr = strsep(&rulestr, ":");
 1148         subject_idstr = strsep(&rulestr, ":");
 1149         resourcestr = strsep(&rulestr, ":");
 1150         actionstr = strsep(&rulestr, "=/");
 1151         amountstr = strsep(&rulestr, "/");
 1152         perstr = rulestr;
 1153 
 1154         if (subjectstr == NULL || subjectstr[0] == '\0')
 1155                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
 1156         else {
 1157                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
 1158                 if (error != 0)
 1159                         goto out;
 1160         }
 1161 
 1162         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
 1163                 rule->rr_subject.rs_proc = NULL;
 1164                 rule->rr_subject.rs_uip = NULL;
 1165                 rule->rr_subject.rs_loginclass = NULL;
 1166                 rule->rr_subject.rs_prison_racct = NULL;
 1167         } else {
 1168                 switch (rule->rr_subject_type) {
 1169                 case RCTL_SUBJECT_TYPE_UNDEFINED:
 1170                         error = EINVAL;
 1171                         goto out;
 1172                 case RCTL_SUBJECT_TYPE_PROCESS:
 1173                         error = str2id(subject_idstr, &id);
 1174                         if (error != 0)
 1175                                 goto out;
 1176                         sx_assert(&allproc_lock, SA_LOCKED);
 1177                         rule->rr_subject.rs_proc = pfind(id);
 1178                         if (rule->rr_subject.rs_proc == NULL) {
 1179                                 error = ESRCH;
 1180                                 goto out;
 1181                         }
 1182                         PROC_UNLOCK(rule->rr_subject.rs_proc);
 1183                         break;
 1184                 case RCTL_SUBJECT_TYPE_USER:
 1185                         error = str2id(subject_idstr, &id);
 1186                         if (error != 0)
 1187                                 goto out;
 1188                         rule->rr_subject.rs_uip = uifind(id);
 1189                         break;
 1190                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1191                         rule->rr_subject.rs_loginclass =
 1192                             loginclass_find(subject_idstr);
 1193                         if (rule->rr_subject.rs_loginclass == NULL) {
 1194                                 error = ENAMETOOLONG;
 1195                                 goto out;
 1196                         }
 1197                         break;
 1198                 case RCTL_SUBJECT_TYPE_JAIL:
 1199                         rule->rr_subject.rs_prison_racct =
 1200                             prison_racct_find(subject_idstr);
 1201                         if (rule->rr_subject.rs_prison_racct == NULL) {
 1202                                 error = ENAMETOOLONG;
 1203                                 goto out;
 1204                         }
 1205                         break;
 1206                default:
 1207                        panic("rctl_string_to_rule: unknown subject type %d",
 1208                            rule->rr_subject_type);
 1209                }
 1210         }
 1211 
 1212         if (resourcestr == NULL || resourcestr[0] == '\0')
 1213                 rule->rr_resource = RACCT_UNDEFINED;
 1214         else {
 1215                 error = str2value(resourcestr, &rule->rr_resource,
 1216                     resourcenames);
 1217                 if (error != 0)
 1218                         goto out;
 1219         }
 1220 
 1221         if (actionstr == NULL || actionstr[0] == '\0')
 1222                 rule->rr_action = RCTL_ACTION_UNDEFINED;
 1223         else {
 1224                 error = str2value(actionstr, &rule->rr_action, actionnames);
 1225                 if (error != 0)
 1226                         goto out;
 1227         }
 1228 
 1229         if (amountstr == NULL || amountstr[0] == '\0')
 1230                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1231         else {
 1232                 error = str2int64(amountstr, &rule->rr_amount);
 1233                 if (error != 0)
 1234                         goto out;
 1235                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
 1236                         if (rule->rr_amount > INT64_MAX / 1000000) {
 1237                                 error = ERANGE;
 1238                                 goto out;
 1239                         }
 1240                         rule->rr_amount *= 1000000;
 1241                 }
 1242         }
 1243 
 1244         if (perstr == NULL || perstr[0] == '\0')
 1245                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
 1246         else {
 1247                 error = str2value(perstr, &rule->rr_per, subjectnames);
 1248                 if (error != 0)
 1249                         goto out;
 1250         }
 1251 
 1252 out:
 1253         if (error == 0)
 1254                 *rulep = rule;
 1255         else
 1256                 rctl_rule_release(rule);
 1257 
 1258         return (error);
 1259 }
 1260 
 1261 /*
 1262  * Link a rule with all the subjects it applies to.
 1263  */
 1264 int
 1265 rctl_rule_add(struct rctl_rule *rule)
 1266 {
 1267         struct proc *p;
 1268         struct ucred *cred;
 1269         struct uidinfo *uip;
 1270         struct prison *pr;
 1271         struct prison_racct *prr;
 1272         struct loginclass *lc;
 1273         struct rctl_rule *rule2;
 1274         int match;
 1275 
 1276         ASSERT_RACCT_ENABLED();
 1277         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 1278 
 1279         /*
 1280          * Some rules just don't make sense, like "deny" rule for an undeniable
 1281          * resource.  The exception are the RSS and %CPU resources - they are
 1282          * not deniable in the racct sense, but the limit is enforced in
 1283          * a different way.
 1284          */
 1285         if (rule->rr_action == RCTL_ACTION_DENY &&
 1286             !RACCT_IS_DENIABLE(rule->rr_resource) &&
 1287             rule->rr_resource != RACCT_RSS &&
 1288             rule->rr_resource != RACCT_PCTCPU) {
 1289                 return (EOPNOTSUPP);
 1290         }
 1291 
 1292         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1293             !RACCT_IS_DECAYING(rule->rr_resource)) {
 1294                 return (EOPNOTSUPP);
 1295         }
 1296 
 1297         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
 1298             rule->rr_resource == RACCT_PCTCPU) {
 1299                 return (EOPNOTSUPP);
 1300         }
 1301 
 1302         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
 1303             RACCT_IS_SLOPPY(rule->rr_resource)) {
 1304                 return (EOPNOTSUPP);
 1305         }
 1306 
 1307         /*
 1308          * Make sure there are no duplicated rules.  Also, for the "deny"
 1309          * rules, remove ones differing only by "amount".
 1310          */
 1311         if (rule->rr_action == RCTL_ACTION_DENY) {
 1312                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
 1313                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
 1314                 rctl_rule_remove(rule2);
 1315                 rctl_rule_release(rule2);
 1316         } else
 1317                 rctl_rule_remove(rule);
 1318 
 1319         switch (rule->rr_subject_type) {
 1320         case RCTL_SUBJECT_TYPE_PROCESS:
 1321                 p = rule->rr_subject.rs_proc;
 1322                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
 1323 
 1324                 rctl_racct_add_rule(p->p_racct, rule);
 1325                 /*
 1326                  * In case of per-process rule, we don't have anything more
 1327                  * to do.
 1328                  */
 1329                 return (0);
 1330 
 1331         case RCTL_SUBJECT_TYPE_USER:
 1332                 uip = rule->rr_subject.rs_uip;
 1333                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
 1334                 rctl_racct_add_rule(uip->ui_racct, rule);
 1335                 break;
 1336 
 1337         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1338                 lc = rule->rr_subject.rs_loginclass;
 1339                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
 1340                 rctl_racct_add_rule(lc->lc_racct, rule);
 1341                 break;
 1342 
 1343         case RCTL_SUBJECT_TYPE_JAIL:
 1344                 prr = rule->rr_subject.rs_prison_racct;
 1345                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
 1346                 rctl_racct_add_rule(prr->prr_racct, rule);
 1347                 break;
 1348 
 1349         default:
 1350                 panic("rctl_rule_add: unknown subject type %d",
 1351                     rule->rr_subject_type);
 1352         }
 1353 
 1354         /*
 1355          * Now go through all the processes and add the new rule to the ones
 1356          * it applies to.
 1357          */
 1358         sx_assert(&allproc_lock, SA_LOCKED);
 1359         FOREACH_PROC_IN_SYSTEM(p) {
 1360                 cred = p->p_ucred;
 1361                 switch (rule->rr_subject_type) {
 1362                 case RCTL_SUBJECT_TYPE_USER:
 1363                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
 1364                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
 1365                                 break;
 1366                         continue;
 1367                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1368                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
 1369                                 break;
 1370                         continue;
 1371                 case RCTL_SUBJECT_TYPE_JAIL:
 1372                         match = 0;
 1373                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
 1374                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
 1375                                         match = 1;
 1376                                         break;
 1377                                 }
 1378                         }
 1379                         if (match)
 1380                                 break;
 1381                         continue;
 1382                 default:
 1383                         panic("rctl_rule_add: unknown subject type %d",
 1384                             rule->rr_subject_type);
 1385                 }
 1386 
 1387                 rctl_racct_add_rule(p->p_racct, rule);
 1388         }
 1389 
 1390         return (0);
 1391 }
 1392 
 1393 static void
 1394 rctl_rule_pre_callback(void)
 1395 {
 1396 
 1397         RACCT_LOCK();
 1398 }
 1399 
 1400 static void
 1401 rctl_rule_post_callback(void)
 1402 {
 1403 
 1404         RACCT_UNLOCK();
 1405 }
 1406 
 1407 static void
 1408 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
 1409 {
 1410         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1411         int found = 0;
 1412 
 1413         ASSERT_RACCT_ENABLED();
 1414         RACCT_LOCK_ASSERT();
 1415 
 1416         found += rctl_racct_remove_rules(racct, filter);
 1417 
 1418         *((int *)arg3) += found;
 1419 }
 1420 
 1421 /*
 1422  * Remove all rules that match the filter.
 1423  */
 1424 int
 1425 rctl_rule_remove(struct rctl_rule *filter)
 1426 {
 1427         struct proc *p;
 1428         int found = 0;
 1429 
 1430         ASSERT_RACCT_ENABLED();
 1431 
 1432         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
 1433             filter->rr_subject.rs_proc != NULL) {
 1434                 p = filter->rr_subject.rs_proc;
 1435                 RACCT_LOCK();
 1436                 found = rctl_racct_remove_rules(p->p_racct, filter);
 1437                 RACCT_UNLOCK();
 1438                 if (found)
 1439                         return (0);
 1440                 return (ESRCH);
 1441         }
 1442 
 1443         loginclass_racct_foreach(rctl_rule_remove_callback,
 1444             rctl_rule_pre_callback, rctl_rule_post_callback,
 1445             filter, (void *)&found);
 1446         ui_racct_foreach(rctl_rule_remove_callback,
 1447             rctl_rule_pre_callback, rctl_rule_post_callback,
 1448             filter, (void *)&found);
 1449         prison_racct_foreach(rctl_rule_remove_callback,
 1450             rctl_rule_pre_callback, rctl_rule_post_callback,
 1451             filter, (void *)&found);
 1452 
 1453         sx_assert(&allproc_lock, SA_LOCKED);
 1454         RACCT_LOCK();
 1455         FOREACH_PROC_IN_SYSTEM(p) {
 1456                 found += rctl_racct_remove_rules(p->p_racct, filter);
 1457         }
 1458         RACCT_UNLOCK();
 1459 
 1460         if (found)
 1461                 return (0);
 1462         return (ESRCH);
 1463 }
 1464 
 1465 /*
 1466  * Appends a rule to the sbuf.
 1467  */
 1468 static void
 1469 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
 1470 {
 1471         int64_t amount;
 1472 
 1473         ASSERT_RACCT_ENABLED();
 1474 
 1475         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
 1476 
 1477         switch (rule->rr_subject_type) {
 1478         case RCTL_SUBJECT_TYPE_PROCESS:
 1479                 if (rule->rr_subject.rs_proc == NULL)
 1480                         sbuf_printf(sb, ":");
 1481                 else
 1482                         sbuf_printf(sb, "%d:",
 1483                             rule->rr_subject.rs_proc->p_pid);
 1484                 break;
 1485         case RCTL_SUBJECT_TYPE_USER:
 1486                 if (rule->rr_subject.rs_uip == NULL)
 1487                         sbuf_printf(sb, ":");
 1488                 else
 1489                         sbuf_printf(sb, "%d:",
 1490                             rule->rr_subject.rs_uip->ui_uid);
 1491                 break;
 1492         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1493                 if (rule->rr_subject.rs_loginclass == NULL)
 1494                         sbuf_printf(sb, ":");
 1495                 else
 1496                         sbuf_printf(sb, "%s:",
 1497                             rule->rr_subject.rs_loginclass->lc_name);
 1498                 break;
 1499         case RCTL_SUBJECT_TYPE_JAIL:
 1500                 if (rule->rr_subject.rs_prison_racct == NULL)
 1501                         sbuf_printf(sb, ":");
 1502                 else
 1503                         sbuf_printf(sb, "%s:",
 1504                             rule->rr_subject.rs_prison_racct->prr_name);
 1505                 break;
 1506         default:
 1507                 panic("rctl_rule_to_sbuf: unknown subject type %d",
 1508                     rule->rr_subject_type);
 1509         }
 1510 
 1511         amount = rule->rr_amount;
 1512         if (amount != RCTL_AMOUNT_UNDEFINED &&
 1513             RACCT_IS_IN_MILLIONS(rule->rr_resource))
 1514                 amount /= 1000000;
 1515 
 1516         sbuf_printf(sb, "%s:%s=%jd",
 1517             rctl_resource_name(rule->rr_resource),
 1518             rctl_action_name(rule->rr_action),
 1519             amount);
 1520 
 1521         if (rule->rr_per != rule->rr_subject_type)
 1522                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
 1523 }
 1524 
 1525 /*
 1526  * Routine used by RCTL syscalls to read in input string.
 1527  */
 1528 static int
 1529 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
 1530 {
 1531         char *str;
 1532         int error;
 1533 
 1534         ASSERT_RACCT_ENABLED();
 1535 
 1536         if (inbuflen <= 0)
 1537                 return (EINVAL);
 1538         if (inbuflen > RCTL_MAX_INBUFSIZE)
 1539                 return (E2BIG);
 1540 
 1541         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
 1542         error = copyinstr(inbufp, str, inbuflen, NULL);
 1543         if (error != 0) {
 1544                 free(str, M_RCTL);
 1545                 return (error);
 1546         }
 1547 
 1548         *inputstr = str;
 1549 
 1550         return (0);
 1551 }
 1552 
 1553 /*
 1554  * Routine used by RCTL syscalls to write out output string.
 1555  */
 1556 static int
 1557 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
 1558 {
 1559         int error;
 1560 
 1561         ASSERT_RACCT_ENABLED();
 1562 
 1563         if (outputsbuf == NULL)
 1564                 return (0);
 1565 
 1566         sbuf_finish(outputsbuf);
 1567         if (outbuflen < sbuf_len(outputsbuf) + 1) {
 1568                 sbuf_delete(outputsbuf);
 1569                 return (ERANGE);
 1570         }
 1571         error = copyout(sbuf_data(outputsbuf), outbufp,
 1572             sbuf_len(outputsbuf) + 1);
 1573         sbuf_delete(outputsbuf);
 1574         return (error);
 1575 }
 1576 
 1577 static struct sbuf *
 1578 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
 1579 {
 1580         struct sbuf *sb;
 1581         int64_t amount;
 1582         int i;
 1583 
 1584         ASSERT_RACCT_ENABLED();
 1585 
 1586         sb = sbuf_new_auto();
 1587         for (i = 0; i <= RACCT_MAX; i++) {
 1588                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
 1589                         continue;
 1590                 RACCT_LOCK();
 1591                 amount = racct->r_resources[i];
 1592                 RACCT_UNLOCK();
 1593                 if (RACCT_IS_IN_MILLIONS(i))
 1594                         amount /= 1000000;
 1595                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
 1596         }
 1597         sbuf_setpos(sb, sbuf_len(sb) - 1);
 1598         return (sb);
 1599 }
 1600 
 1601 int
 1602 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 1603 {
 1604         struct rctl_rule *filter;
 1605         struct sbuf *outputsbuf = NULL;
 1606         struct proc *p;
 1607         struct uidinfo *uip;
 1608         struct loginclass *lc;
 1609         struct prison_racct *prr;
 1610         char *inputstr;
 1611         int error;
 1612 
 1613         if (!racct_enable)
 1614                 return (ENOSYS);
 1615 
 1616         error = priv_check(td, PRIV_RCTL_GET_RACCT);
 1617         if (error != 0)
 1618                 return (error);
 1619 
 1620         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1621         if (error != 0)
 1622                 return (error);
 1623 
 1624         sx_slock(&allproc_lock);
 1625         error = rctl_string_to_rule(inputstr, &filter);
 1626         free(inputstr, M_RCTL);
 1627         if (error != 0) {
 1628                 sx_sunlock(&allproc_lock);
 1629                 return (error);
 1630         }
 1631 
 1632         switch (filter->rr_subject_type) {
 1633         case RCTL_SUBJECT_TYPE_PROCESS:
 1634                 p = filter->rr_subject.rs_proc;
 1635                 if (p == NULL) {
 1636                         error = EINVAL;
 1637                         goto out;
 1638                 }
 1639                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
 1640                 break;
 1641         case RCTL_SUBJECT_TYPE_USER:
 1642                 uip = filter->rr_subject.rs_uip;
 1643                 if (uip == NULL) {
 1644                         error = EINVAL;
 1645                         goto out;
 1646                 }
 1647                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
 1648                 break;
 1649         case RCTL_SUBJECT_TYPE_LOGINCLASS:
 1650                 lc = filter->rr_subject.rs_loginclass;
 1651                 if (lc == NULL) {
 1652                         error = EINVAL;
 1653                         goto out;
 1654                 }
 1655                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
 1656                 break;
 1657         case RCTL_SUBJECT_TYPE_JAIL:
 1658                 prr = filter->rr_subject.rs_prison_racct;
 1659                 if (prr == NULL) {
 1660                         error = EINVAL;
 1661                         goto out;
 1662                 }
 1663                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
 1664                 break;
 1665         default:
 1666                 error = EINVAL;
 1667         }
 1668 out:
 1669         rctl_rule_release(filter);
 1670         sx_sunlock(&allproc_lock);
 1671         if (error != 0)
 1672                 return (error);
 1673 
 1674         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
 1675 
 1676         return (error);
 1677 }
 1678 
 1679 static void
 1680 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
 1681 {
 1682         struct rctl_rule *filter = (struct rctl_rule *)arg2;
 1683         struct rctl_rule_link *link;
 1684         struct sbuf *sb = (struct sbuf *)arg3;
 1685 
 1686         ASSERT_RACCT_ENABLED();
 1687         RACCT_LOCK_ASSERT();
 1688 
 1689         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
 1690                 if (!rctl_rule_matches(link->rrl_rule, filter))
 1691                         continue;
 1692                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1693                 sbuf_printf(sb, ",");
 1694         }
 1695 }
 1696 
 1697 int
 1698 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 1699 {
 1700         struct sbuf *sb;
 1701         struct rctl_rule *filter;
 1702         struct rctl_rule_link *link;
 1703         struct proc *p;
 1704         char *inputstr, *buf;
 1705         size_t bufsize;
 1706         int error;
 1707 
 1708         if (!racct_enable)
 1709                 return (ENOSYS);
 1710 
 1711         error = priv_check(td, PRIV_RCTL_GET_RULES);
 1712         if (error != 0)
 1713                 return (error);
 1714 
 1715         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1716         if (error != 0)
 1717                 return (error);
 1718 
 1719         sx_slock(&allproc_lock);
 1720         error = rctl_string_to_rule(inputstr, &filter);
 1721         free(inputstr, M_RCTL);
 1722         if (error != 0) {
 1723                 sx_sunlock(&allproc_lock);
 1724                 return (error);
 1725         }
 1726 
 1727         bufsize = uap->outbuflen;
 1728         if (bufsize > rctl_maxbufsize) {
 1729                 sx_sunlock(&allproc_lock);
 1730                 return (E2BIG);
 1731         }
 1732 
 1733         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1734         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1735         KASSERT(sb != NULL, ("sbuf_new failed"));
 1736 
 1737         FOREACH_PROC_IN_SYSTEM(p) {
 1738                 RACCT_LOCK();
 1739                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1740                         /*
 1741                          * Non-process rules will be added to the buffer later.
 1742                          * Adding them here would result in duplicated output.
 1743                          */
 1744                         if (link->rrl_rule->rr_subject_type !=
 1745                             RCTL_SUBJECT_TYPE_PROCESS)
 1746                                 continue;
 1747                         if (!rctl_rule_matches(link->rrl_rule, filter))
 1748                                 continue;
 1749                         rctl_rule_to_sbuf(sb, link->rrl_rule);
 1750                         sbuf_printf(sb, ",");
 1751                 }
 1752                 RACCT_UNLOCK();
 1753         }
 1754 
 1755         loginclass_racct_foreach(rctl_get_rules_callback,
 1756             rctl_rule_pre_callback, rctl_rule_post_callback,
 1757             filter, sb);
 1758         ui_racct_foreach(rctl_get_rules_callback,
 1759             rctl_rule_pre_callback, rctl_rule_post_callback,
 1760             filter, sb);
 1761         prison_racct_foreach(rctl_get_rules_callback,
 1762             rctl_rule_pre_callback, rctl_rule_post_callback,
 1763             filter, sb);
 1764         if (sbuf_error(sb) == ENOMEM) {
 1765                 error = ERANGE;
 1766                 goto out;
 1767         }
 1768 
 1769         /*
 1770          * Remove trailing ",".
 1771          */
 1772         if (sbuf_len(sb) > 0)
 1773                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1774 
 1775         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1776 out:
 1777         rctl_rule_release(filter);
 1778         sx_sunlock(&allproc_lock);
 1779         free(buf, M_RCTL);
 1780         return (error);
 1781 }
 1782 
 1783 int
 1784 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 1785 {
 1786         struct sbuf *sb;
 1787         struct rctl_rule *filter;
 1788         struct rctl_rule_link *link;
 1789         char *inputstr, *buf;
 1790         size_t bufsize;
 1791         int error;
 1792 
 1793         if (!racct_enable)
 1794                 return (ENOSYS);
 1795 
 1796         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
 1797         if (error != 0)
 1798                 return (error);
 1799 
 1800         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1801         if (error != 0)
 1802                 return (error);
 1803 
 1804         sx_slock(&allproc_lock);
 1805         error = rctl_string_to_rule(inputstr, &filter);
 1806         free(inputstr, M_RCTL);
 1807         if (error != 0) {
 1808                 sx_sunlock(&allproc_lock);
 1809                 return (error);
 1810         }
 1811 
 1812         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
 1813                 rctl_rule_release(filter);
 1814                 sx_sunlock(&allproc_lock);
 1815                 return (EINVAL);
 1816         }
 1817         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
 1818                 rctl_rule_release(filter);
 1819                 sx_sunlock(&allproc_lock);
 1820                 return (EOPNOTSUPP);
 1821         }
 1822         if (filter->rr_subject.rs_proc == NULL) {
 1823                 rctl_rule_release(filter);
 1824                 sx_sunlock(&allproc_lock);
 1825                 return (EINVAL);
 1826         }
 1827 
 1828         bufsize = uap->outbuflen;
 1829         if (bufsize > rctl_maxbufsize) {
 1830                 rctl_rule_release(filter);
 1831                 sx_sunlock(&allproc_lock);
 1832                 return (E2BIG);
 1833         }
 1834 
 1835         buf = malloc(bufsize, M_RCTL, M_WAITOK);
 1836         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
 1837         KASSERT(sb != NULL, ("sbuf_new failed"));
 1838 
 1839         RACCT_LOCK();
 1840         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
 1841             rrl_next) {
 1842                 rctl_rule_to_sbuf(sb, link->rrl_rule);
 1843                 sbuf_printf(sb, ",");
 1844         }
 1845         RACCT_UNLOCK();
 1846         if (sbuf_error(sb) == ENOMEM) {
 1847                 error = ERANGE;
 1848                 sbuf_delete(sb);
 1849                 goto out;
 1850         }
 1851 
 1852         /*
 1853          * Remove trailing ",".
 1854          */
 1855         if (sbuf_len(sb) > 0)
 1856                 sbuf_setpos(sb, sbuf_len(sb) - 1);
 1857 
 1858         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
 1859 out:
 1860         rctl_rule_release(filter);
 1861         sx_sunlock(&allproc_lock);
 1862         free(buf, M_RCTL);
 1863         return (error);
 1864 }
 1865 
 1866 int
 1867 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 1868 {
 1869         struct rctl_rule *rule;
 1870         char *inputstr;
 1871         int error;
 1872 
 1873         if (!racct_enable)
 1874                 return (ENOSYS);
 1875 
 1876         error = priv_check(td, PRIV_RCTL_ADD_RULE);
 1877         if (error != 0)
 1878                 return (error);
 1879 
 1880         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1881         if (error != 0)
 1882                 return (error);
 1883 
 1884         sx_slock(&allproc_lock);
 1885         error = rctl_string_to_rule(inputstr, &rule);
 1886         free(inputstr, M_RCTL);
 1887         if (error != 0) {
 1888                 sx_sunlock(&allproc_lock);
 1889                 return (error);
 1890         }
 1891         /*
 1892          * The 'per' part of a rule is optional.
 1893          */
 1894         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
 1895             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
 1896                 rule->rr_per = rule->rr_subject_type;
 1897 
 1898         if (!rctl_rule_fully_specified(rule)) {
 1899                 error = EINVAL;
 1900                 goto out;
 1901         }
 1902 
 1903         error = rctl_rule_add(rule);
 1904 
 1905 out:
 1906         rctl_rule_release(rule);
 1907         sx_sunlock(&allproc_lock);
 1908         return (error);
 1909 }
 1910 
 1911 int
 1912 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 1913 {
 1914         struct rctl_rule *filter;
 1915         char *inputstr;
 1916         int error;
 1917 
 1918         if (!racct_enable)
 1919                 return (ENOSYS);
 1920 
 1921         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
 1922         if (error != 0)
 1923                 return (error);
 1924 
 1925         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
 1926         if (error != 0)
 1927                 return (error);
 1928 
 1929         sx_slock(&allproc_lock);
 1930         error = rctl_string_to_rule(inputstr, &filter);
 1931         free(inputstr, M_RCTL);
 1932         if (error != 0) {
 1933                 sx_sunlock(&allproc_lock);
 1934                 return (error);
 1935         }
 1936 
 1937         error = rctl_rule_remove(filter);
 1938         rctl_rule_release(filter);
 1939         sx_sunlock(&allproc_lock);
 1940 
 1941         return (error);
 1942 }
 1943 
 1944 /*
 1945  * Update RCTL rule list after credential change.
 1946  */
 1947 void
 1948 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
 1949 {
 1950         LIST_HEAD(, rctl_rule_link) newrules;
 1951         struct rctl_rule_link *link, *newlink;
 1952         struct uidinfo *newuip;
 1953         struct loginclass *newlc;
 1954         struct prison_racct *newprr;
 1955         int rulecnt, i;
 1956 
 1957         ASSERT_RACCT_ENABLED();
 1958 
 1959         newuip = newcred->cr_ruidinfo;
 1960         newlc = newcred->cr_loginclass;
 1961         newprr = newcred->cr_prison->pr_prison_racct;
 1962         
 1963         LIST_INIT(&newrules);
 1964 
 1965 again:
 1966         /*
 1967          * First, count the rules that apply to the process with new
 1968          * credentials.
 1969          */
 1970         rulecnt = 0;
 1971         RACCT_LOCK();
 1972         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 1973                 if (link->rrl_rule->rr_subject_type ==
 1974                     RCTL_SUBJECT_TYPE_PROCESS)
 1975                         rulecnt++;
 1976         }
 1977         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
 1978                 rulecnt++;
 1979         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
 1980                 rulecnt++;
 1981         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
 1982                 rulecnt++;
 1983         RACCT_UNLOCK();
 1984 
 1985         /*
 1986          * Create temporary list.  We've dropped the rctl_lock in order
 1987          * to use M_WAITOK.
 1988          */
 1989         for (i = 0; i < rulecnt; i++) {
 1990                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
 1991                 newlink->rrl_rule = NULL;
 1992                 newlink->rrl_exceeded = 0;
 1993                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
 1994         }
 1995 
 1996         newlink = LIST_FIRST(&newrules);
 1997 
 1998         /*
 1999          * Assign rules to the newly allocated list entries.
 2000          */
 2001         RACCT_LOCK();
 2002         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
 2003                 if (link->rrl_rule->rr_subject_type ==
 2004                     RCTL_SUBJECT_TYPE_PROCESS) {
 2005                         if (newlink == NULL)
 2006                                 goto goaround;
 2007                         rctl_rule_acquire(link->rrl_rule);
 2008                         newlink->rrl_rule = link->rrl_rule;
 2009                         newlink->rrl_exceeded = link->rrl_exceeded;
 2010                         newlink = LIST_NEXT(newlink, rrl_next);
 2011                         rulecnt--;
 2012                 }
 2013         }
 2014         
 2015         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
 2016                 if (newlink == NULL)
 2017                         goto goaround;
 2018                 rctl_rule_acquire(link->rrl_rule);
 2019                 newlink->rrl_rule = link->rrl_rule;
 2020                 newlink->rrl_exceeded = link->rrl_exceeded;
 2021                 newlink = LIST_NEXT(newlink, rrl_next);
 2022                 rulecnt--;
 2023         }
 2024 
 2025         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
 2026                 if (newlink == NULL)
 2027                         goto goaround;
 2028                 rctl_rule_acquire(link->rrl_rule);
 2029                 newlink->rrl_rule = link->rrl_rule;
 2030                 newlink->rrl_exceeded = link->rrl_exceeded;
 2031                 newlink = LIST_NEXT(newlink, rrl_next);
 2032                 rulecnt--;
 2033         }
 2034 
 2035         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
 2036                 if (newlink == NULL)
 2037                         goto goaround;
 2038                 rctl_rule_acquire(link->rrl_rule);
 2039                 newlink->rrl_rule = link->rrl_rule;
 2040                 newlink->rrl_exceeded = link->rrl_exceeded;
 2041                 newlink = LIST_NEXT(newlink, rrl_next);
 2042                 rulecnt--;
 2043         }
 2044 
 2045         if (rulecnt == 0) {
 2046                 /*
 2047                  * Free the old rule list.
 2048                  */
 2049                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
 2050                         link = LIST_FIRST(&p->p_racct->r_rule_links);
 2051                         LIST_REMOVE(link, rrl_next);
 2052                         rctl_rule_release(link->rrl_rule);
 2053                         uma_zfree(rctl_rule_link_zone, link);
 2054                 }
 2055 
 2056                 /*
 2057                  * Replace lists and we're done.
 2058                  *
 2059                  * XXX: Is there any way to switch list heads instead
 2060                  *      of iterating here?
 2061                  */
 2062                 while (!LIST_EMPTY(&newrules)) {
 2063                         newlink = LIST_FIRST(&newrules);
 2064                         LIST_REMOVE(newlink, rrl_next);
 2065                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
 2066                             newlink, rrl_next);
 2067                 }
 2068 
 2069                 RACCT_UNLOCK();
 2070 
 2071                 return;
 2072         }
 2073 
 2074 goaround:
 2075         RACCT_UNLOCK();
 2076 
 2077         /*
 2078          * Rule list changed while we were not holding the rctl_lock.
 2079          * Free the new list and try again.
 2080          */
 2081         while (!LIST_EMPTY(&newrules)) {
 2082                 newlink = LIST_FIRST(&newrules);
 2083                 LIST_REMOVE(newlink, rrl_next);
 2084                 if (newlink->rrl_rule != NULL)
 2085                         rctl_rule_release(newlink->rrl_rule);
 2086                 uma_zfree(rctl_rule_link_zone, newlink);
 2087         }
 2088 
 2089         goto again;
 2090 }
 2091 
 2092 /*
 2093  * Assign RCTL rules to the newly created process.
 2094  */
 2095 int
 2096 rctl_proc_fork(struct proc *parent, struct proc *child)
 2097 {
 2098         struct rctl_rule *rule;
 2099         struct rctl_rule_link *link;
 2100         int error;
 2101 
 2102         ASSERT_RACCT_ENABLED();
 2103         RACCT_LOCK_ASSERT();
 2104         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
 2105 
 2106         LIST_INIT(&child->p_racct->r_rule_links);
 2107 
 2108         /*
 2109          * Go through limits applicable to the parent and assign them
 2110          * to the child.  Rules with 'process' subject have to be duplicated
 2111          * in order to make their rr_subject point to the new process.
 2112          */
 2113         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
 2114                 if (link->rrl_rule->rr_subject_type ==
 2115                     RCTL_SUBJECT_TYPE_PROCESS) {
 2116                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
 2117                         if (rule == NULL)
 2118                                 goto fail;
 2119                         KASSERT(rule->rr_subject.rs_proc == parent,
 2120                             ("rule->rr_subject.rs_proc != parent"));
 2121                         rule->rr_subject.rs_proc = child;
 2122                         error = rctl_racct_add_rule_locked(child->p_racct,
 2123                             rule);
 2124                         rctl_rule_release(rule);
 2125                         if (error != 0)
 2126                                 goto fail;
 2127                 } else {
 2128                         error = rctl_racct_add_rule_locked(child->p_racct,
 2129                             link->rrl_rule);
 2130                         if (error != 0)
 2131                                 goto fail;
 2132                 }
 2133         }
 2134 
 2135         return (0);
 2136 
 2137 fail:
 2138         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
 2139                 link = LIST_FIRST(&child->p_racct->r_rule_links);
 2140                 LIST_REMOVE(link, rrl_next);
 2141                 rctl_rule_release(link->rrl_rule);
 2142                 uma_zfree(rctl_rule_link_zone, link);
 2143         }
 2144 
 2145         return (EAGAIN);
 2146 }
 2147 
 2148 /*
 2149  * Release rules attached to the racct.
 2150  */
 2151 void
 2152 rctl_racct_release(struct racct *racct)
 2153 {
 2154         struct rctl_rule_link *link;
 2155 
 2156         ASSERT_RACCT_ENABLED();
 2157         RACCT_LOCK_ASSERT();
 2158 
 2159         while (!LIST_EMPTY(&racct->r_rule_links)) {
 2160                 link = LIST_FIRST(&racct->r_rule_links);
 2161                 LIST_REMOVE(link, rrl_next);
 2162                 rctl_rule_release(link->rrl_rule);
 2163                 uma_zfree(rctl_rule_link_zone, link);
 2164         }
 2165 }
 2166 
 2167 static void
 2168 rctl_init(void)
 2169 {
 2170 
 2171         if (!racct_enable)
 2172                 return;
 2173 
 2174         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
 2175             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 2176         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
 2177             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
 2178             UMA_ALIGN_PTR, 0);
 2179 
 2180         /*
 2181          * Set default values, making sure not to overwrite the ones
 2182          * fetched from tunables.  Most of those could be set at the
 2183          * declaration, except for the rctl_throttle_max - we cannot
 2184          * set it there due to hz not being compile time constant.
 2185          */
 2186         if (rctl_throttle_min < 1)
 2187                 rctl_throttle_min = 1;
 2188         if (rctl_throttle_max < rctl_throttle_min)
 2189                 rctl_throttle_max = 2 * hz;
 2190         if (rctl_throttle_pct < 0)
 2191                 rctl_throttle_pct = 100;
 2192         if (rctl_throttle_pct2 < 0)
 2193                 rctl_throttle_pct2 = 100;
 2194 }
 2195 
 2196 #else /* !RCTL */
 2197 
 2198 int
 2199 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
 2200 {
 2201         
 2202         return (ENOSYS);
 2203 }
 2204 
 2205 int
 2206 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
 2207 {
 2208         
 2209         return (ENOSYS);
 2210 }
 2211 
 2212 int
 2213 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
 2214 {
 2215         
 2216         return (ENOSYS);
 2217 }
 2218 
 2219 int
 2220 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
 2221 {
 2222         
 2223         return (ENOSYS);
 2224 }
 2225 
 2226 int
 2227 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
 2228 {
 2229         
 2230         return (ENOSYS);
 2231 }
 2232 
 2233 #endif /* !RCTL */

Cache object: ac527342eb40f3db1df9b63155390c74


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.