FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rctl.c
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2010 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * This software was developed by Edward Tomasz Napierala under sponsorship
8 * from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * $FreeBSD: releng/12.0/sys/kern/kern_rctl.c 332816 2018-04-20 13:08:04Z avg $
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: releng/12.0/sys/kern/kern_rctl.c 332816 2018-04-20 13:08:04Z avg $");
36
37 #include <sys/param.h>
38 #include <sys/bus.h>
39 #include <sys/malloc.h>
40 #include <sys/queue.h>
41 #include <sys/refcount.h>
42 #include <sys/jail.h>
43 #include <sys/kernel.h>
44 #include <sys/limits.h>
45 #include <sys/loginclass.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/racct.h>
49 #include <sys/rctl.h>
50 #include <sys/resourcevar.h>
51 #include <sys/sx.h>
52 #include <sys/sysent.h>
53 #include <sys/sysproto.h>
54 #include <sys/systm.h>
55 #include <sys/types.h>
56 #include <sys/eventhandler.h>
57 #include <sys/lock.h>
58 #include <sys/mutex.h>
59 #include <sys/rwlock.h>
60 #include <sys/sbuf.h>
61 #include <sys/taskqueue.h>
62 #include <sys/tree.h>
63 #include <vm/uma.h>
64
65 #ifdef RCTL
66 #ifndef RACCT
67 #error "The RCTL option requires the RACCT option"
68 #endif
69
70 FEATURE(rctl, "Resource Limits");
71
72 #define HRF_DEFAULT 0
73 #define HRF_DONT_INHERIT 1
74 #define HRF_DONT_ACCUMULATE 2
75
76 #define RCTL_MAX_INBUFSIZE 4 * 1024
77 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
78 #define RCTL_LOG_BUFSIZE 128
79
80 #define RCTL_PCPU_SHIFT (10 * 1000000)
81
82 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
83 static int rctl_log_rate_limit = 10;
84 static int rctl_devctl_rate_limit = 10;
85
86 /*
87 * Values below are initialized in rctl_init().
88 */
89 static int rctl_throttle_min = -1;
90 static int rctl_throttle_max = -1;
91 static int rctl_throttle_pct = -1;
92 static int rctl_throttle_pct2 = -1;
93
94 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
95 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
96 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
97 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
98
99 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
101 &rctl_maxbufsize, 0, "Maximum output buffer size");
102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
103 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
105 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
107 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU",
108 "Shortest throttling duration, in hz");
109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
111 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU",
112 "Longest throttling duration, in hz");
113 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
114 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
115 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU",
116 "Throttling penalty for process consumption, in percent");
117 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
118 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
119 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU",
120 "Throttling penalty for container consumption, in percent");
121 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
122
123 /*
124 * 'rctl_rule_link' connects a rule with every racct it's related to.
125 * For example, rule 'user:X:openfiles:deny=N/process' is linked
126 * with uidinfo for user X, and to each process of that user.
127 */
128 struct rctl_rule_link {
129 LIST_ENTRY(rctl_rule_link) rrl_next;
130 struct rctl_rule *rrl_rule;
131 int rrl_exceeded;
132 };
133
134 struct dict {
135 const char *d_name;
136 int d_value;
137 };
138
139 static struct dict subjectnames[] = {
140 { "process", RCTL_SUBJECT_TYPE_PROCESS },
141 { "user", RCTL_SUBJECT_TYPE_USER },
142 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
143 { "jail", RCTL_SUBJECT_TYPE_JAIL },
144 { NULL, -1 }};
145
146 static struct dict resourcenames[] = {
147 { "cputime", RACCT_CPU },
148 { "datasize", RACCT_DATA },
149 { "stacksize", RACCT_STACK },
150 { "coredumpsize", RACCT_CORE },
151 { "memoryuse", RACCT_RSS },
152 { "memorylocked", RACCT_MEMLOCK },
153 { "maxproc", RACCT_NPROC },
154 { "openfiles", RACCT_NOFILE },
155 { "vmemoryuse", RACCT_VMEM },
156 { "pseudoterminals", RACCT_NPTS },
157 { "swapuse", RACCT_SWAP },
158 { "nthr", RACCT_NTHR },
159 { "msgqqueued", RACCT_MSGQQUEUED },
160 { "msgqsize", RACCT_MSGQSIZE },
161 { "nmsgq", RACCT_NMSGQ },
162 { "nsem", RACCT_NSEM },
163 { "nsemop", RACCT_NSEMOP },
164 { "nshm", RACCT_NSHM },
165 { "shmsize", RACCT_SHMSIZE },
166 { "wallclock", RACCT_WALLCLOCK },
167 { "pcpu", RACCT_PCTCPU },
168 { "readbps", RACCT_READBPS },
169 { "writebps", RACCT_WRITEBPS },
170 { "readiops", RACCT_READIOPS },
171 { "writeiops", RACCT_WRITEIOPS },
172 { NULL, -1 }};
173
174 static struct dict actionnames[] = {
175 { "sighup", RCTL_ACTION_SIGHUP },
176 { "sigint", RCTL_ACTION_SIGINT },
177 { "sigquit", RCTL_ACTION_SIGQUIT },
178 { "sigill", RCTL_ACTION_SIGILL },
179 { "sigtrap", RCTL_ACTION_SIGTRAP },
180 { "sigabrt", RCTL_ACTION_SIGABRT },
181 { "sigemt", RCTL_ACTION_SIGEMT },
182 { "sigfpe", RCTL_ACTION_SIGFPE },
183 { "sigkill", RCTL_ACTION_SIGKILL },
184 { "sigbus", RCTL_ACTION_SIGBUS },
185 { "sigsegv", RCTL_ACTION_SIGSEGV },
186 { "sigsys", RCTL_ACTION_SIGSYS },
187 { "sigpipe", RCTL_ACTION_SIGPIPE },
188 { "sigalrm", RCTL_ACTION_SIGALRM },
189 { "sigterm", RCTL_ACTION_SIGTERM },
190 { "sigurg", RCTL_ACTION_SIGURG },
191 { "sigstop", RCTL_ACTION_SIGSTOP },
192 { "sigtstp", RCTL_ACTION_SIGTSTP },
193 { "sigchld", RCTL_ACTION_SIGCHLD },
194 { "sigttin", RCTL_ACTION_SIGTTIN },
195 { "sigttou", RCTL_ACTION_SIGTTOU },
196 { "sigio", RCTL_ACTION_SIGIO },
197 { "sigxcpu", RCTL_ACTION_SIGXCPU },
198 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
199 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
200 { "sigprof", RCTL_ACTION_SIGPROF },
201 { "sigwinch", RCTL_ACTION_SIGWINCH },
202 { "siginfo", RCTL_ACTION_SIGINFO },
203 { "sigusr1", RCTL_ACTION_SIGUSR1 },
204 { "sigusr2", RCTL_ACTION_SIGUSR2 },
205 { "sigthr", RCTL_ACTION_SIGTHR },
206 { "deny", RCTL_ACTION_DENY },
207 { "log", RCTL_ACTION_LOG },
208 { "devctl", RCTL_ACTION_DEVCTL },
209 { "throttle", RCTL_ACTION_THROTTLE },
210 { NULL, -1 }};
211
212 static void rctl_init(void);
213 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
214
215 static uma_zone_t rctl_rule_zone;
216 static uma_zone_t rctl_rule_link_zone;
217
218 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
219 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
220
221 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
222
223 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
224 {
225 int error, val = rctl_throttle_min;
226
227 error = sysctl_handle_int(oidp, &val, 0, req);
228 if (error || !req->newptr)
229 return (error);
230 if (val < 1 || val > rctl_throttle_max)
231 return (EINVAL);
232
233 RACCT_LOCK();
234 rctl_throttle_min = val;
235 RACCT_UNLOCK();
236
237 return (0);
238 }
239
240 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
241 {
242 int error, val = rctl_throttle_max;
243
244 error = sysctl_handle_int(oidp, &val, 0, req);
245 if (error || !req->newptr)
246 return (error);
247 if (val < rctl_throttle_min)
248 return (EINVAL);
249
250 RACCT_LOCK();
251 rctl_throttle_max = val;
252 RACCT_UNLOCK();
253
254 return (0);
255 }
256
257 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
258 {
259 int error, val = rctl_throttle_pct;
260
261 error = sysctl_handle_int(oidp, &val, 0, req);
262 if (error || !req->newptr)
263 return (error);
264 if (val < 0)
265 return (EINVAL);
266
267 RACCT_LOCK();
268 rctl_throttle_pct = val;
269 RACCT_UNLOCK();
270
271 return (0);
272 }
273
274 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
275 {
276 int error, val = rctl_throttle_pct2;
277
278 error = sysctl_handle_int(oidp, &val, 0, req);
279 if (error || !req->newptr)
280 return (error);
281 if (val < 0)
282 return (EINVAL);
283
284 RACCT_LOCK();
285 rctl_throttle_pct2 = val;
286 RACCT_UNLOCK();
287
288 return (0);
289 }
290
291 static const char *
292 rctl_subject_type_name(int subject)
293 {
294 int i;
295
296 for (i = 0; subjectnames[i].d_name != NULL; i++) {
297 if (subjectnames[i].d_value == subject)
298 return (subjectnames[i].d_name);
299 }
300
301 panic("rctl_subject_type_name: unknown subject type %d", subject);
302 }
303
304 static const char *
305 rctl_action_name(int action)
306 {
307 int i;
308
309 for (i = 0; actionnames[i].d_name != NULL; i++) {
310 if (actionnames[i].d_value == action)
311 return (actionnames[i].d_name);
312 }
313
314 panic("rctl_action_name: unknown action %d", action);
315 }
316
317 const char *
318 rctl_resource_name(int resource)
319 {
320 int i;
321
322 for (i = 0; resourcenames[i].d_name != NULL; i++) {
323 if (resourcenames[i].d_value == resource)
324 return (resourcenames[i].d_name);
325 }
326
327 panic("rctl_resource_name: unknown resource %d", resource);
328 }
329
330 static struct racct *
331 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
332 {
333 struct ucred *cred = p->p_ucred;
334
335 ASSERT_RACCT_ENABLED();
336 RACCT_LOCK_ASSERT();
337
338 switch (rule->rr_per) {
339 case RCTL_SUBJECT_TYPE_PROCESS:
340 return (p->p_racct);
341 case RCTL_SUBJECT_TYPE_USER:
342 return (cred->cr_ruidinfo->ui_racct);
343 case RCTL_SUBJECT_TYPE_LOGINCLASS:
344 return (cred->cr_loginclass->lc_racct);
345 case RCTL_SUBJECT_TYPE_JAIL:
346 return (cred->cr_prison->pr_prison_racct->prr_racct);
347 default:
348 panic("%s: unknown per %d", __func__, rule->rr_per);
349 }
350 }
351
352 /*
353 * Return the amount of resource that can be allocated by 'p' before
354 * hitting 'rule'.
355 */
356 static int64_t
357 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
358 {
359 const struct racct *racct;
360 int64_t available;
361
362 ASSERT_RACCT_ENABLED();
363 RACCT_LOCK_ASSERT();
364
365 racct = rctl_proc_rule_to_racct(p, rule);
366 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
367
368 return (available);
369 }
370
371 /*
372 * Called every second for proc, uidinfo, loginclass, and jail containers.
373 * If the limit isn't exceeded, it decreases the usage amount to zero.
374 * Otherwise, it decreases it by the value of the limit. This way
375 * resource consumption exceeding the limit "carries over" to the next
376 * period.
377 */
378 void
379 rctl_throttle_decay(struct racct *racct, int resource)
380 {
381 struct rctl_rule *rule;
382 struct rctl_rule_link *link;
383 int64_t minavailable;
384
385 ASSERT_RACCT_ENABLED();
386 RACCT_LOCK_ASSERT();
387
388 minavailable = INT64_MAX;
389
390 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
391 rule = link->rrl_rule;
392
393 if (rule->rr_resource != resource)
394 continue;
395 if (rule->rr_action != RCTL_ACTION_THROTTLE)
396 continue;
397
398 if (rule->rr_amount < minavailable)
399 minavailable = rule->rr_amount;
400 }
401
402 if (racct->r_resources[resource] < minavailable) {
403 racct->r_resources[resource] = 0;
404 } else {
405 /*
406 * Cap utilization counter at ten times the limit. Otherwise,
407 * if we changed the rule lowering the allowed amount, it could
408 * take unreasonably long time for the accumulated resource
409 * usage to drop.
410 */
411 if (racct->r_resources[resource] > minavailable * 10)
412 racct->r_resources[resource] = minavailable * 10;
413
414 racct->r_resources[resource] -= minavailable;
415 }
416 }
417
418 /*
419 * Special version of rctl_get_available() for the %CPU resource.
420 * We slightly cheat here and return less than we normally would.
421 */
422 int64_t
423 rctl_pcpu_available(const struct proc *p) {
424 struct rctl_rule *rule;
425 struct rctl_rule_link *link;
426 int64_t available, minavailable, limit;
427
428 ASSERT_RACCT_ENABLED();
429 RACCT_LOCK_ASSERT();
430
431 minavailable = INT64_MAX;
432 limit = 0;
433
434 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
435 rule = link->rrl_rule;
436 if (rule->rr_resource != RACCT_PCTCPU)
437 continue;
438 if (rule->rr_action != RCTL_ACTION_DENY)
439 continue;
440 available = rctl_available_resource(p, rule);
441 if (available < minavailable) {
442 minavailable = available;
443 limit = rule->rr_amount;
444 }
445 }
446
447 /*
448 * Return slightly less than actual value of the available
449 * %cpu resource. This makes %cpu throttling more aggressive
450 * and lets us act sooner than the limits are already exceeded.
451 */
452 if (limit != 0) {
453 if (limit > 2 * RCTL_PCPU_SHIFT)
454 minavailable -= RCTL_PCPU_SHIFT;
455 else
456 minavailable -= (limit / 2);
457 }
458
459 return (minavailable);
460 }
461
462 static uint64_t
463 xadd(uint64_t a, uint64_t b)
464 {
465 uint64_t c;
466
467 c = a + b;
468
469 /*
470 * Detect overflow.
471 */
472 if (c < a || c < b)
473 return (UINT64_MAX);
474
475 return (c);
476 }
477
478 static uint64_t
479 xmul(uint64_t a, uint64_t b)
480 {
481
482 if (b != 0 && a > UINT64_MAX / b)
483 return (UINT64_MAX);
484
485 return (a * b);
486 }
487
488 /*
489 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
490 * to what it keeps allocated now. Returns non-zero if the allocation should
491 * be denied, 0 otherwise.
492 */
493 int
494 rctl_enforce(struct proc *p, int resource, uint64_t amount)
495 {
496 static struct timeval log_lasttime, devctl_lasttime;
497 static int log_curtime = 0, devctl_curtime = 0;
498 struct rctl_rule *rule;
499 struct rctl_rule_link *link;
500 struct sbuf sb;
501 char *buf;
502 int64_t available;
503 uint64_t sleep_ms, sleep_ratio;
504 int should_deny = 0;
505
506 ASSERT_RACCT_ENABLED();
507 RACCT_LOCK_ASSERT();
508
509 /*
510 * There may be more than one matching rule; go through all of them.
511 * Denial should be done last, after logging and sending signals.
512 */
513 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
514 rule = link->rrl_rule;
515 if (rule->rr_resource != resource)
516 continue;
517
518 available = rctl_available_resource(p, rule);
519 if (available >= (int64_t)amount) {
520 link->rrl_exceeded = 0;
521 continue;
522 }
523
524 switch (rule->rr_action) {
525 case RCTL_ACTION_DENY:
526 should_deny = 1;
527 continue;
528 case RCTL_ACTION_LOG:
529 /*
530 * If rrl_exceeded != 0, it means we've already
531 * logged a warning for this process.
532 */
533 if (link->rrl_exceeded != 0)
534 continue;
535
536 /*
537 * If the process state is not fully initialized yet,
538 * we can't access most of the required fields, e.g.
539 * p->p_comm. This happens when called from fork1().
540 * Ignore this rule for now; it will be processed just
541 * after fork, when called from racct_proc_fork_done().
542 */
543 if (p->p_state != PRS_NORMAL)
544 continue;
545
546 if (!ppsratecheck(&log_lasttime, &log_curtime,
547 rctl_log_rate_limit))
548 continue;
549
550 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
551 if (buf == NULL) {
552 printf("rctl_enforce: out of memory\n");
553 continue;
554 }
555 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
556 rctl_rule_to_sbuf(&sb, rule);
557 sbuf_finish(&sb);
558 printf("rctl: rule \"%s\" matched by pid %d "
559 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
560 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
561 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
562 sbuf_delete(&sb);
563 free(buf, M_RCTL);
564 link->rrl_exceeded = 1;
565 continue;
566 case RCTL_ACTION_DEVCTL:
567 if (link->rrl_exceeded != 0)
568 continue;
569
570 if (p->p_state != PRS_NORMAL)
571 continue;
572
573 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
574 rctl_devctl_rate_limit))
575 continue;
576
577 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
578 if (buf == NULL) {
579 printf("rctl_enforce: out of memory\n");
580 continue;
581 }
582 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
583 sbuf_printf(&sb, "rule=");
584 rctl_rule_to_sbuf(&sb, rule);
585 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
586 p->p_pid, p->p_ucred->cr_ruid,
587 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
588 sbuf_finish(&sb);
589 devctl_notify_f("RCTL", "rule", "matched",
590 sbuf_data(&sb), M_NOWAIT);
591 sbuf_delete(&sb);
592 free(buf, M_RCTL);
593 link->rrl_exceeded = 1;
594 continue;
595 case RCTL_ACTION_THROTTLE:
596 if (p->p_state != PRS_NORMAL)
597 continue;
598
599 /*
600 * Make the process sleep for a fraction of second
601 * proportional to the ratio of process' resource
602 * utilization compared to the limit. The point is
603 * to penalize resource hogs: processes that consume
604 * more of the available resources sleep for longer.
605 *
606 * We're trying to defer division until the very end,
607 * to minimize the rounding effects. The following
608 * calculation could have been written in a clearer
609 * way like this:
610 *
611 * sleep_ms = hz * p->p_racct->r_resources[resource] /
612 * rule->rr_amount;
613 * sleep_ms *= rctl_throttle_pct / 100;
614 * if (sleep_ms < rctl_throttle_min)
615 * sleep_ms = rctl_throttle_min;
616 *
617 */
618 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
619 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
620 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
621 sleep_ms = rctl_throttle_min * rule->rr_amount;
622
623 /*
624 * Multiply that by the ratio of the resource
625 * consumption for the container compared to the limit,
626 * squared. In other words, a process in a container
627 * that is two times over the limit will be throttled
628 * four times as much for hitting the same rule. The
629 * point is to penalize processes more if the container
630 * itself (eg certain UID or jail) is above the limit.
631 */
632 if (available < 0)
633 sleep_ratio = -available / rule->rr_amount;
634 else
635 sleep_ratio = 0;
636 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
637 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
638 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
639
640 /*
641 * Finally the division.
642 */
643 sleep_ms /= rule->rr_amount;
644
645 if (sleep_ms > rctl_throttle_max)
646 sleep_ms = rctl_throttle_max;
647 #if 0
648 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
649 __func__, p->p_pid, p->p_comm,
650 p->p_racct->r_resources[resource],
651 rule->rr_amount, (uintmax_t)sleep_ms,
652 (uintmax_t)sleep_ratio, (intmax_t)available);
653 #endif
654
655 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
656 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
657 racct_proc_throttle(p, sleep_ms);
658 continue;
659 default:
660 if (link->rrl_exceeded != 0)
661 continue;
662
663 if (p->p_state != PRS_NORMAL)
664 continue;
665
666 KASSERT(rule->rr_action > 0 &&
667 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
668 ("rctl_enforce: unknown action %d",
669 rule->rr_action));
670
671 /*
672 * We're using the fact that RCTL_ACTION_SIG* values
673 * are equal to their counterparts from sys/signal.h.
674 */
675 kern_psignal(p, rule->rr_action);
676 link->rrl_exceeded = 1;
677 continue;
678 }
679 }
680
681 if (should_deny) {
682 /*
683 * Return fake error code; the caller should change it
684 * into one proper for the situation - EFSIZ, ENOMEM etc.
685 */
686 return (EDOOFUS);
687 }
688
689 return (0);
690 }
691
692 uint64_t
693 rctl_get_limit(struct proc *p, int resource)
694 {
695 struct rctl_rule *rule;
696 struct rctl_rule_link *link;
697 uint64_t amount = UINT64_MAX;
698
699 ASSERT_RACCT_ENABLED();
700 RACCT_LOCK_ASSERT();
701
702 /*
703 * There may be more than one matching rule; go through all of them.
704 * Denial should be done last, after logging and sending signals.
705 */
706 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
707 rule = link->rrl_rule;
708 if (rule->rr_resource != resource)
709 continue;
710 if (rule->rr_action != RCTL_ACTION_DENY)
711 continue;
712 if (rule->rr_amount < amount)
713 amount = rule->rr_amount;
714 }
715
716 return (amount);
717 }
718
719 uint64_t
720 rctl_get_available(struct proc *p, int resource)
721 {
722 struct rctl_rule *rule;
723 struct rctl_rule_link *link;
724 int64_t available, minavailable, allocated;
725
726 minavailable = INT64_MAX;
727
728 ASSERT_RACCT_ENABLED();
729 RACCT_LOCK_ASSERT();
730
731 /*
732 * There may be more than one matching rule; go through all of them.
733 * Denial should be done last, after logging and sending signals.
734 */
735 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
736 rule = link->rrl_rule;
737 if (rule->rr_resource != resource)
738 continue;
739 if (rule->rr_action != RCTL_ACTION_DENY)
740 continue;
741 available = rctl_available_resource(p, rule);
742 if (available < minavailable)
743 minavailable = available;
744 }
745
746 /*
747 * XXX: Think about this _hard_.
748 */
749 allocated = p->p_racct->r_resources[resource];
750 if (minavailable < INT64_MAX - allocated)
751 minavailable += allocated;
752 if (minavailable < 0)
753 minavailable = 0;
754
755 return (minavailable);
756 }
757
758 static int
759 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
760 {
761
762 ASSERT_RACCT_ENABLED();
763
764 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
765 if (rule->rr_subject_type != filter->rr_subject_type)
766 return (0);
767
768 switch (filter->rr_subject_type) {
769 case RCTL_SUBJECT_TYPE_PROCESS:
770 if (filter->rr_subject.rs_proc != NULL &&
771 rule->rr_subject.rs_proc !=
772 filter->rr_subject.rs_proc)
773 return (0);
774 break;
775 case RCTL_SUBJECT_TYPE_USER:
776 if (filter->rr_subject.rs_uip != NULL &&
777 rule->rr_subject.rs_uip !=
778 filter->rr_subject.rs_uip)
779 return (0);
780 break;
781 case RCTL_SUBJECT_TYPE_LOGINCLASS:
782 if (filter->rr_subject.rs_loginclass != NULL &&
783 rule->rr_subject.rs_loginclass !=
784 filter->rr_subject.rs_loginclass)
785 return (0);
786 break;
787 case RCTL_SUBJECT_TYPE_JAIL:
788 if (filter->rr_subject.rs_prison_racct != NULL &&
789 rule->rr_subject.rs_prison_racct !=
790 filter->rr_subject.rs_prison_racct)
791 return (0);
792 break;
793 default:
794 panic("rctl_rule_matches: unknown subject type %d",
795 filter->rr_subject_type);
796 }
797 }
798
799 if (filter->rr_resource != RACCT_UNDEFINED) {
800 if (rule->rr_resource != filter->rr_resource)
801 return (0);
802 }
803
804 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
805 if (rule->rr_action != filter->rr_action)
806 return (0);
807 }
808
809 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
810 if (rule->rr_amount != filter->rr_amount)
811 return (0);
812 }
813
814 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
815 if (rule->rr_per != filter->rr_per)
816 return (0);
817 }
818
819 return (1);
820 }
821
822 static int
823 str2value(const char *str, int *value, struct dict *table)
824 {
825 int i;
826
827 if (value == NULL)
828 return (EINVAL);
829
830 for (i = 0; table[i].d_name != NULL; i++) {
831 if (strcasecmp(table[i].d_name, str) == 0) {
832 *value = table[i].d_value;
833 return (0);
834 }
835 }
836
837 return (EINVAL);
838 }
839
840 static int
841 str2id(const char *str, id_t *value)
842 {
843 char *end;
844
845 if (str == NULL)
846 return (EINVAL);
847
848 *value = strtoul(str, &end, 10);
849 if ((size_t)(end - str) != strlen(str))
850 return (EINVAL);
851
852 return (0);
853 }
854
855 static int
856 str2int64(const char *str, int64_t *value)
857 {
858 char *end;
859
860 if (str == NULL)
861 return (EINVAL);
862
863 *value = strtoul(str, &end, 10);
864 if ((size_t)(end - str) != strlen(str))
865 return (EINVAL);
866
867 if (*value < 0)
868 return (ERANGE);
869
870 return (0);
871 }
872
873 /*
874 * Connect the rule to the racct, increasing refcount for the rule.
875 */
876 static void
877 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
878 {
879 struct rctl_rule_link *link;
880
881 ASSERT_RACCT_ENABLED();
882 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
883
884 rctl_rule_acquire(rule);
885 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
886 link->rrl_rule = rule;
887 link->rrl_exceeded = 0;
888
889 RACCT_LOCK();
890 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
891 RACCT_UNLOCK();
892 }
893
894 static int
895 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
896 {
897 struct rctl_rule_link *link;
898
899 ASSERT_RACCT_ENABLED();
900 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
901 RACCT_LOCK_ASSERT();
902
903 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
904 if (link == NULL)
905 return (ENOMEM);
906 rctl_rule_acquire(rule);
907 link->rrl_rule = rule;
908 link->rrl_exceeded = 0;
909
910 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
911
912 return (0);
913 }
914
915 /*
916 * Remove limits for a rules matching the filter and release
917 * the refcounts for the rules, possibly freeing them. Returns
918 * the number of limit structures removed.
919 */
920 static int
921 rctl_racct_remove_rules(struct racct *racct,
922 const struct rctl_rule *filter)
923 {
924 struct rctl_rule_link *link, *linktmp;
925 int removed = 0;
926
927 ASSERT_RACCT_ENABLED();
928 RACCT_LOCK_ASSERT();
929
930 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
931 if (!rctl_rule_matches(link->rrl_rule, filter))
932 continue;
933
934 LIST_REMOVE(link, rrl_next);
935 rctl_rule_release(link->rrl_rule);
936 uma_zfree(rctl_rule_link_zone, link);
937 removed++;
938 }
939 return (removed);
940 }
941
942 static void
943 rctl_rule_acquire_subject(struct rctl_rule *rule)
944 {
945
946 ASSERT_RACCT_ENABLED();
947
948 switch (rule->rr_subject_type) {
949 case RCTL_SUBJECT_TYPE_UNDEFINED:
950 case RCTL_SUBJECT_TYPE_PROCESS:
951 break;
952 case RCTL_SUBJECT_TYPE_JAIL:
953 if (rule->rr_subject.rs_prison_racct != NULL)
954 prison_racct_hold(rule->rr_subject.rs_prison_racct);
955 break;
956 case RCTL_SUBJECT_TYPE_USER:
957 if (rule->rr_subject.rs_uip != NULL)
958 uihold(rule->rr_subject.rs_uip);
959 break;
960 case RCTL_SUBJECT_TYPE_LOGINCLASS:
961 if (rule->rr_subject.rs_loginclass != NULL)
962 loginclass_hold(rule->rr_subject.rs_loginclass);
963 break;
964 default:
965 panic("rctl_rule_acquire_subject: unknown subject type %d",
966 rule->rr_subject_type);
967 }
968 }
969
970 static void
971 rctl_rule_release_subject(struct rctl_rule *rule)
972 {
973
974 ASSERT_RACCT_ENABLED();
975
976 switch (rule->rr_subject_type) {
977 case RCTL_SUBJECT_TYPE_UNDEFINED:
978 case RCTL_SUBJECT_TYPE_PROCESS:
979 break;
980 case RCTL_SUBJECT_TYPE_JAIL:
981 if (rule->rr_subject.rs_prison_racct != NULL)
982 prison_racct_free(rule->rr_subject.rs_prison_racct);
983 break;
984 case RCTL_SUBJECT_TYPE_USER:
985 if (rule->rr_subject.rs_uip != NULL)
986 uifree(rule->rr_subject.rs_uip);
987 break;
988 case RCTL_SUBJECT_TYPE_LOGINCLASS:
989 if (rule->rr_subject.rs_loginclass != NULL)
990 loginclass_free(rule->rr_subject.rs_loginclass);
991 break;
992 default:
993 panic("rctl_rule_release_subject: unknown subject type %d",
994 rule->rr_subject_type);
995 }
996 }
997
998 struct rctl_rule *
999 rctl_rule_alloc(int flags)
1000 {
1001 struct rctl_rule *rule;
1002
1003 ASSERT_RACCT_ENABLED();
1004
1005 rule = uma_zalloc(rctl_rule_zone, flags);
1006 if (rule == NULL)
1007 return (NULL);
1008 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1009 rule->rr_subject.rs_proc = NULL;
1010 rule->rr_subject.rs_uip = NULL;
1011 rule->rr_subject.rs_loginclass = NULL;
1012 rule->rr_subject.rs_prison_racct = NULL;
1013 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1014 rule->rr_resource = RACCT_UNDEFINED;
1015 rule->rr_action = RCTL_ACTION_UNDEFINED;
1016 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1017 refcount_init(&rule->rr_refcount, 1);
1018
1019 return (rule);
1020 }
1021
1022 struct rctl_rule *
1023 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1024 {
1025 struct rctl_rule *copy;
1026
1027 ASSERT_RACCT_ENABLED();
1028
1029 copy = uma_zalloc(rctl_rule_zone, flags);
1030 if (copy == NULL)
1031 return (NULL);
1032 copy->rr_subject_type = rule->rr_subject_type;
1033 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1034 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1035 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1036 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1037 copy->rr_per = rule->rr_per;
1038 copy->rr_resource = rule->rr_resource;
1039 copy->rr_action = rule->rr_action;
1040 copy->rr_amount = rule->rr_amount;
1041 refcount_init(©->rr_refcount, 1);
1042 rctl_rule_acquire_subject(copy);
1043
1044 return (copy);
1045 }
1046
1047 void
1048 rctl_rule_acquire(struct rctl_rule *rule)
1049 {
1050
1051 ASSERT_RACCT_ENABLED();
1052 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1053
1054 refcount_acquire(&rule->rr_refcount);
1055 }
1056
1057 static void
1058 rctl_rule_free(void *context, int pending)
1059 {
1060 struct rctl_rule *rule;
1061
1062 rule = (struct rctl_rule *)context;
1063
1064 ASSERT_RACCT_ENABLED();
1065 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1066
1067 /*
1068 * We don't need locking here; rule is guaranteed to be inaccessible.
1069 */
1070
1071 rctl_rule_release_subject(rule);
1072 uma_zfree(rctl_rule_zone, rule);
1073 }
1074
1075 void
1076 rctl_rule_release(struct rctl_rule *rule)
1077 {
1078
1079 ASSERT_RACCT_ENABLED();
1080 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1081
1082 if (refcount_release(&rule->rr_refcount)) {
1083 /*
1084 * rctl_rule_release() is often called when iterating
1085 * over all the uidinfo structures in the system,
1086 * holding uihashtbl_lock. Since rctl_rule_free()
1087 * might end up calling uifree(), this would lead
1088 * to lock recursion. Use taskqueue to avoid this.
1089 */
1090 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1091 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1092 }
1093 }
1094
1095 static int
1096 rctl_rule_fully_specified(const struct rctl_rule *rule)
1097 {
1098
1099 ASSERT_RACCT_ENABLED();
1100
1101 switch (rule->rr_subject_type) {
1102 case RCTL_SUBJECT_TYPE_UNDEFINED:
1103 return (0);
1104 case RCTL_SUBJECT_TYPE_PROCESS:
1105 if (rule->rr_subject.rs_proc == NULL)
1106 return (0);
1107 break;
1108 case RCTL_SUBJECT_TYPE_USER:
1109 if (rule->rr_subject.rs_uip == NULL)
1110 return (0);
1111 break;
1112 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1113 if (rule->rr_subject.rs_loginclass == NULL)
1114 return (0);
1115 break;
1116 case RCTL_SUBJECT_TYPE_JAIL:
1117 if (rule->rr_subject.rs_prison_racct == NULL)
1118 return (0);
1119 break;
1120 default:
1121 panic("rctl_rule_fully_specified: unknown subject type %d",
1122 rule->rr_subject_type);
1123 }
1124 if (rule->rr_resource == RACCT_UNDEFINED)
1125 return (0);
1126 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1127 return (0);
1128 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1129 return (0);
1130 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1131 return (0);
1132
1133 return (1);
1134 }
1135
1136 static int
1137 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1138 {
1139 struct rctl_rule *rule;
1140 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1141 *amountstr, *perstr;
1142 id_t id;
1143 int error = 0;
1144
1145 ASSERT_RACCT_ENABLED();
1146
1147 rule = rctl_rule_alloc(M_WAITOK);
1148
1149 subjectstr = strsep(&rulestr, ":");
1150 subject_idstr = strsep(&rulestr, ":");
1151 resourcestr = strsep(&rulestr, ":");
1152 actionstr = strsep(&rulestr, "=/");
1153 amountstr = strsep(&rulestr, "/");
1154 perstr = rulestr;
1155
1156 if (subjectstr == NULL || subjectstr[0] == '\0')
1157 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1158 else {
1159 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1160 if (error != 0)
1161 goto out;
1162 }
1163
1164 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1165 rule->rr_subject.rs_proc = NULL;
1166 rule->rr_subject.rs_uip = NULL;
1167 rule->rr_subject.rs_loginclass = NULL;
1168 rule->rr_subject.rs_prison_racct = NULL;
1169 } else {
1170 switch (rule->rr_subject_type) {
1171 case RCTL_SUBJECT_TYPE_UNDEFINED:
1172 error = EINVAL;
1173 goto out;
1174 case RCTL_SUBJECT_TYPE_PROCESS:
1175 error = str2id(subject_idstr, &id);
1176 if (error != 0)
1177 goto out;
1178 sx_assert(&allproc_lock, SA_LOCKED);
1179 rule->rr_subject.rs_proc = pfind(id);
1180 if (rule->rr_subject.rs_proc == NULL) {
1181 error = ESRCH;
1182 goto out;
1183 }
1184 PROC_UNLOCK(rule->rr_subject.rs_proc);
1185 break;
1186 case RCTL_SUBJECT_TYPE_USER:
1187 error = str2id(subject_idstr, &id);
1188 if (error != 0)
1189 goto out;
1190 rule->rr_subject.rs_uip = uifind(id);
1191 break;
1192 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1193 rule->rr_subject.rs_loginclass =
1194 loginclass_find(subject_idstr);
1195 if (rule->rr_subject.rs_loginclass == NULL) {
1196 error = ENAMETOOLONG;
1197 goto out;
1198 }
1199 break;
1200 case RCTL_SUBJECT_TYPE_JAIL:
1201 rule->rr_subject.rs_prison_racct =
1202 prison_racct_find(subject_idstr);
1203 if (rule->rr_subject.rs_prison_racct == NULL) {
1204 error = ENAMETOOLONG;
1205 goto out;
1206 }
1207 break;
1208 default:
1209 panic("rctl_string_to_rule: unknown subject type %d",
1210 rule->rr_subject_type);
1211 }
1212 }
1213
1214 if (resourcestr == NULL || resourcestr[0] == '\0')
1215 rule->rr_resource = RACCT_UNDEFINED;
1216 else {
1217 error = str2value(resourcestr, &rule->rr_resource,
1218 resourcenames);
1219 if (error != 0)
1220 goto out;
1221 }
1222
1223 if (actionstr == NULL || actionstr[0] == '\0')
1224 rule->rr_action = RCTL_ACTION_UNDEFINED;
1225 else {
1226 error = str2value(actionstr, &rule->rr_action, actionnames);
1227 if (error != 0)
1228 goto out;
1229 }
1230
1231 if (amountstr == NULL || amountstr[0] == '\0')
1232 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1233 else {
1234 error = str2int64(amountstr, &rule->rr_amount);
1235 if (error != 0)
1236 goto out;
1237 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1238 if (rule->rr_amount > INT64_MAX / 1000000) {
1239 error = ERANGE;
1240 goto out;
1241 }
1242 rule->rr_amount *= 1000000;
1243 }
1244 }
1245
1246 if (perstr == NULL || perstr[0] == '\0')
1247 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1248 else {
1249 error = str2value(perstr, &rule->rr_per, subjectnames);
1250 if (error != 0)
1251 goto out;
1252 }
1253
1254 out:
1255 if (error == 0)
1256 *rulep = rule;
1257 else
1258 rctl_rule_release(rule);
1259
1260 return (error);
1261 }
1262
1263 /*
1264 * Link a rule with all the subjects it applies to.
1265 */
1266 int
1267 rctl_rule_add(struct rctl_rule *rule)
1268 {
1269 struct proc *p;
1270 struct ucred *cred;
1271 struct uidinfo *uip;
1272 struct prison *pr;
1273 struct prison_racct *prr;
1274 struct loginclass *lc;
1275 struct rctl_rule *rule2;
1276 int match;
1277
1278 ASSERT_RACCT_ENABLED();
1279 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1280
1281 /*
1282 * Some rules just don't make sense, like "deny" rule for an undeniable
1283 * resource. The exception are the RSS and %CPU resources - they are
1284 * not deniable in the racct sense, but the limit is enforced in
1285 * a different way.
1286 */
1287 if (rule->rr_action == RCTL_ACTION_DENY &&
1288 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1289 rule->rr_resource != RACCT_RSS &&
1290 rule->rr_resource != RACCT_PCTCPU) {
1291 return (EOPNOTSUPP);
1292 }
1293
1294 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1295 !RACCT_IS_DECAYING(rule->rr_resource)) {
1296 return (EOPNOTSUPP);
1297 }
1298
1299 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1300 rule->rr_resource == RACCT_PCTCPU) {
1301 return (EOPNOTSUPP);
1302 }
1303
1304 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1305 RACCT_IS_SLOPPY(rule->rr_resource)) {
1306 return (EOPNOTSUPP);
1307 }
1308
1309 /*
1310 * Make sure there are no duplicated rules. Also, for the "deny"
1311 * rules, remove ones differing only by "amount".
1312 */
1313 if (rule->rr_action == RCTL_ACTION_DENY) {
1314 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1315 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1316 rctl_rule_remove(rule2);
1317 rctl_rule_release(rule2);
1318 } else
1319 rctl_rule_remove(rule);
1320
1321 switch (rule->rr_subject_type) {
1322 case RCTL_SUBJECT_TYPE_PROCESS:
1323 p = rule->rr_subject.rs_proc;
1324 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1325
1326 rctl_racct_add_rule(p->p_racct, rule);
1327 /*
1328 * In case of per-process rule, we don't have anything more
1329 * to do.
1330 */
1331 return (0);
1332
1333 case RCTL_SUBJECT_TYPE_USER:
1334 uip = rule->rr_subject.rs_uip;
1335 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1336 rctl_racct_add_rule(uip->ui_racct, rule);
1337 break;
1338
1339 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1340 lc = rule->rr_subject.rs_loginclass;
1341 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1342 rctl_racct_add_rule(lc->lc_racct, rule);
1343 break;
1344
1345 case RCTL_SUBJECT_TYPE_JAIL:
1346 prr = rule->rr_subject.rs_prison_racct;
1347 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1348 rctl_racct_add_rule(prr->prr_racct, rule);
1349 break;
1350
1351 default:
1352 panic("rctl_rule_add: unknown subject type %d",
1353 rule->rr_subject_type);
1354 }
1355
1356 /*
1357 * Now go through all the processes and add the new rule to the ones
1358 * it applies to.
1359 */
1360 sx_assert(&allproc_lock, SA_LOCKED);
1361 FOREACH_PROC_IN_SYSTEM(p) {
1362 cred = p->p_ucred;
1363 switch (rule->rr_subject_type) {
1364 case RCTL_SUBJECT_TYPE_USER:
1365 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1366 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1367 break;
1368 continue;
1369 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1370 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1371 break;
1372 continue;
1373 case RCTL_SUBJECT_TYPE_JAIL:
1374 match = 0;
1375 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1376 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1377 match = 1;
1378 break;
1379 }
1380 }
1381 if (match)
1382 break;
1383 continue;
1384 default:
1385 panic("rctl_rule_add: unknown subject type %d",
1386 rule->rr_subject_type);
1387 }
1388
1389 rctl_racct_add_rule(p->p_racct, rule);
1390 }
1391
1392 return (0);
1393 }
1394
1395 static void
1396 rctl_rule_pre_callback(void)
1397 {
1398
1399 RACCT_LOCK();
1400 }
1401
1402 static void
1403 rctl_rule_post_callback(void)
1404 {
1405
1406 RACCT_UNLOCK();
1407 }
1408
1409 static void
1410 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1411 {
1412 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1413 int found = 0;
1414
1415 ASSERT_RACCT_ENABLED();
1416 RACCT_LOCK_ASSERT();
1417
1418 found += rctl_racct_remove_rules(racct, filter);
1419
1420 *((int *)arg3) += found;
1421 }
1422
1423 /*
1424 * Remove all rules that match the filter.
1425 */
1426 int
1427 rctl_rule_remove(struct rctl_rule *filter)
1428 {
1429 struct proc *p;
1430 int found = 0;
1431
1432 ASSERT_RACCT_ENABLED();
1433
1434 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1435 filter->rr_subject.rs_proc != NULL) {
1436 p = filter->rr_subject.rs_proc;
1437 RACCT_LOCK();
1438 found = rctl_racct_remove_rules(p->p_racct, filter);
1439 RACCT_UNLOCK();
1440 if (found)
1441 return (0);
1442 return (ESRCH);
1443 }
1444
1445 loginclass_racct_foreach(rctl_rule_remove_callback,
1446 rctl_rule_pre_callback, rctl_rule_post_callback,
1447 filter, (void *)&found);
1448 ui_racct_foreach(rctl_rule_remove_callback,
1449 rctl_rule_pre_callback, rctl_rule_post_callback,
1450 filter, (void *)&found);
1451 prison_racct_foreach(rctl_rule_remove_callback,
1452 rctl_rule_pre_callback, rctl_rule_post_callback,
1453 filter, (void *)&found);
1454
1455 sx_assert(&allproc_lock, SA_LOCKED);
1456 RACCT_LOCK();
1457 FOREACH_PROC_IN_SYSTEM(p) {
1458 found += rctl_racct_remove_rules(p->p_racct, filter);
1459 }
1460 RACCT_UNLOCK();
1461
1462 if (found)
1463 return (0);
1464 return (ESRCH);
1465 }
1466
1467 /*
1468 * Appends a rule to the sbuf.
1469 */
1470 static void
1471 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1472 {
1473 int64_t amount;
1474
1475 ASSERT_RACCT_ENABLED();
1476
1477 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1478
1479 switch (rule->rr_subject_type) {
1480 case RCTL_SUBJECT_TYPE_PROCESS:
1481 if (rule->rr_subject.rs_proc == NULL)
1482 sbuf_printf(sb, ":");
1483 else
1484 sbuf_printf(sb, "%d:",
1485 rule->rr_subject.rs_proc->p_pid);
1486 break;
1487 case RCTL_SUBJECT_TYPE_USER:
1488 if (rule->rr_subject.rs_uip == NULL)
1489 sbuf_printf(sb, ":");
1490 else
1491 sbuf_printf(sb, "%d:",
1492 rule->rr_subject.rs_uip->ui_uid);
1493 break;
1494 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1495 if (rule->rr_subject.rs_loginclass == NULL)
1496 sbuf_printf(sb, ":");
1497 else
1498 sbuf_printf(sb, "%s:",
1499 rule->rr_subject.rs_loginclass->lc_name);
1500 break;
1501 case RCTL_SUBJECT_TYPE_JAIL:
1502 if (rule->rr_subject.rs_prison_racct == NULL)
1503 sbuf_printf(sb, ":");
1504 else
1505 sbuf_printf(sb, "%s:",
1506 rule->rr_subject.rs_prison_racct->prr_name);
1507 break;
1508 default:
1509 panic("rctl_rule_to_sbuf: unknown subject type %d",
1510 rule->rr_subject_type);
1511 }
1512
1513 amount = rule->rr_amount;
1514 if (amount != RCTL_AMOUNT_UNDEFINED &&
1515 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1516 amount /= 1000000;
1517
1518 sbuf_printf(sb, "%s:%s=%jd",
1519 rctl_resource_name(rule->rr_resource),
1520 rctl_action_name(rule->rr_action),
1521 amount);
1522
1523 if (rule->rr_per != rule->rr_subject_type)
1524 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1525 }
1526
1527 /*
1528 * Routine used by RCTL syscalls to read in input string.
1529 */
1530 static int
1531 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1532 {
1533 char *str;
1534 int error;
1535
1536 ASSERT_RACCT_ENABLED();
1537
1538 if (inbuflen <= 0)
1539 return (EINVAL);
1540 if (inbuflen > RCTL_MAX_INBUFSIZE)
1541 return (E2BIG);
1542
1543 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1544 error = copyinstr(inbufp, str, inbuflen, NULL);
1545 if (error != 0) {
1546 free(str, M_RCTL);
1547 return (error);
1548 }
1549
1550 *inputstr = str;
1551
1552 return (0);
1553 }
1554
1555 /*
1556 * Routine used by RCTL syscalls to write out output string.
1557 */
1558 static int
1559 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1560 {
1561 int error;
1562
1563 ASSERT_RACCT_ENABLED();
1564
1565 if (outputsbuf == NULL)
1566 return (0);
1567
1568 sbuf_finish(outputsbuf);
1569 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1570 sbuf_delete(outputsbuf);
1571 return (ERANGE);
1572 }
1573 error = copyout(sbuf_data(outputsbuf), outbufp,
1574 sbuf_len(outputsbuf) + 1);
1575 sbuf_delete(outputsbuf);
1576 return (error);
1577 }
1578
1579 static struct sbuf *
1580 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1581 {
1582 struct sbuf *sb;
1583 int64_t amount;
1584 int i;
1585
1586 ASSERT_RACCT_ENABLED();
1587
1588 sb = sbuf_new_auto();
1589 for (i = 0; i <= RACCT_MAX; i++) {
1590 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1591 continue;
1592 RACCT_LOCK();
1593 amount = racct->r_resources[i];
1594 RACCT_UNLOCK();
1595 if (RACCT_IS_IN_MILLIONS(i))
1596 amount /= 1000000;
1597 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1598 }
1599 sbuf_setpos(sb, sbuf_len(sb) - 1);
1600 return (sb);
1601 }
1602
1603 int
1604 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1605 {
1606 struct rctl_rule *filter;
1607 struct sbuf *outputsbuf = NULL;
1608 struct proc *p;
1609 struct uidinfo *uip;
1610 struct loginclass *lc;
1611 struct prison_racct *prr;
1612 char *inputstr;
1613 int error;
1614
1615 if (!racct_enable)
1616 return (ENOSYS);
1617
1618 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1619 if (error != 0)
1620 return (error);
1621
1622 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1623 if (error != 0)
1624 return (error);
1625
1626 sx_slock(&allproc_lock);
1627 error = rctl_string_to_rule(inputstr, &filter);
1628 free(inputstr, M_RCTL);
1629 if (error != 0) {
1630 sx_sunlock(&allproc_lock);
1631 return (error);
1632 }
1633
1634 switch (filter->rr_subject_type) {
1635 case RCTL_SUBJECT_TYPE_PROCESS:
1636 p = filter->rr_subject.rs_proc;
1637 if (p == NULL) {
1638 error = EINVAL;
1639 goto out;
1640 }
1641 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1642 break;
1643 case RCTL_SUBJECT_TYPE_USER:
1644 uip = filter->rr_subject.rs_uip;
1645 if (uip == NULL) {
1646 error = EINVAL;
1647 goto out;
1648 }
1649 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1650 break;
1651 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1652 lc = filter->rr_subject.rs_loginclass;
1653 if (lc == NULL) {
1654 error = EINVAL;
1655 goto out;
1656 }
1657 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1658 break;
1659 case RCTL_SUBJECT_TYPE_JAIL:
1660 prr = filter->rr_subject.rs_prison_racct;
1661 if (prr == NULL) {
1662 error = EINVAL;
1663 goto out;
1664 }
1665 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1666 break;
1667 default:
1668 error = EINVAL;
1669 }
1670 out:
1671 rctl_rule_release(filter);
1672 sx_sunlock(&allproc_lock);
1673 if (error != 0)
1674 return (error);
1675
1676 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1677
1678 return (error);
1679 }
1680
1681 static void
1682 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1683 {
1684 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1685 struct rctl_rule_link *link;
1686 struct sbuf *sb = (struct sbuf *)arg3;
1687
1688 ASSERT_RACCT_ENABLED();
1689 RACCT_LOCK_ASSERT();
1690
1691 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1692 if (!rctl_rule_matches(link->rrl_rule, filter))
1693 continue;
1694 rctl_rule_to_sbuf(sb, link->rrl_rule);
1695 sbuf_printf(sb, ",");
1696 }
1697 }
1698
1699 int
1700 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1701 {
1702 struct sbuf *sb;
1703 struct rctl_rule *filter;
1704 struct rctl_rule_link *link;
1705 struct proc *p;
1706 char *inputstr, *buf;
1707 size_t bufsize;
1708 int error;
1709
1710 if (!racct_enable)
1711 return (ENOSYS);
1712
1713 error = priv_check(td, PRIV_RCTL_GET_RULES);
1714 if (error != 0)
1715 return (error);
1716
1717 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1718 if (error != 0)
1719 return (error);
1720
1721 sx_slock(&allproc_lock);
1722 error = rctl_string_to_rule(inputstr, &filter);
1723 free(inputstr, M_RCTL);
1724 if (error != 0) {
1725 sx_sunlock(&allproc_lock);
1726 return (error);
1727 }
1728
1729 bufsize = uap->outbuflen;
1730 if (bufsize > rctl_maxbufsize) {
1731 sx_sunlock(&allproc_lock);
1732 return (E2BIG);
1733 }
1734
1735 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1736 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1737 KASSERT(sb != NULL, ("sbuf_new failed"));
1738
1739 FOREACH_PROC_IN_SYSTEM(p) {
1740 RACCT_LOCK();
1741 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1742 /*
1743 * Non-process rules will be added to the buffer later.
1744 * Adding them here would result in duplicated output.
1745 */
1746 if (link->rrl_rule->rr_subject_type !=
1747 RCTL_SUBJECT_TYPE_PROCESS)
1748 continue;
1749 if (!rctl_rule_matches(link->rrl_rule, filter))
1750 continue;
1751 rctl_rule_to_sbuf(sb, link->rrl_rule);
1752 sbuf_printf(sb, ",");
1753 }
1754 RACCT_UNLOCK();
1755 }
1756
1757 loginclass_racct_foreach(rctl_get_rules_callback,
1758 rctl_rule_pre_callback, rctl_rule_post_callback,
1759 filter, sb);
1760 ui_racct_foreach(rctl_get_rules_callback,
1761 rctl_rule_pre_callback, rctl_rule_post_callback,
1762 filter, sb);
1763 prison_racct_foreach(rctl_get_rules_callback,
1764 rctl_rule_pre_callback, rctl_rule_post_callback,
1765 filter, sb);
1766 if (sbuf_error(sb) == ENOMEM) {
1767 error = ERANGE;
1768 goto out;
1769 }
1770
1771 /*
1772 * Remove trailing ",".
1773 */
1774 if (sbuf_len(sb) > 0)
1775 sbuf_setpos(sb, sbuf_len(sb) - 1);
1776
1777 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1778 out:
1779 rctl_rule_release(filter);
1780 sx_sunlock(&allproc_lock);
1781 free(buf, M_RCTL);
1782 return (error);
1783 }
1784
1785 int
1786 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1787 {
1788 struct sbuf *sb;
1789 struct rctl_rule *filter;
1790 struct rctl_rule_link *link;
1791 char *inputstr, *buf;
1792 size_t bufsize;
1793 int error;
1794
1795 if (!racct_enable)
1796 return (ENOSYS);
1797
1798 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1799 if (error != 0)
1800 return (error);
1801
1802 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1803 if (error != 0)
1804 return (error);
1805
1806 sx_slock(&allproc_lock);
1807 error = rctl_string_to_rule(inputstr, &filter);
1808 free(inputstr, M_RCTL);
1809 if (error != 0) {
1810 sx_sunlock(&allproc_lock);
1811 return (error);
1812 }
1813
1814 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1815 rctl_rule_release(filter);
1816 sx_sunlock(&allproc_lock);
1817 return (EINVAL);
1818 }
1819 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1820 rctl_rule_release(filter);
1821 sx_sunlock(&allproc_lock);
1822 return (EOPNOTSUPP);
1823 }
1824 if (filter->rr_subject.rs_proc == NULL) {
1825 rctl_rule_release(filter);
1826 sx_sunlock(&allproc_lock);
1827 return (EINVAL);
1828 }
1829
1830 bufsize = uap->outbuflen;
1831 if (bufsize > rctl_maxbufsize) {
1832 rctl_rule_release(filter);
1833 sx_sunlock(&allproc_lock);
1834 return (E2BIG);
1835 }
1836
1837 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1838 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1839 KASSERT(sb != NULL, ("sbuf_new failed"));
1840
1841 RACCT_LOCK();
1842 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1843 rrl_next) {
1844 rctl_rule_to_sbuf(sb, link->rrl_rule);
1845 sbuf_printf(sb, ",");
1846 }
1847 RACCT_UNLOCK();
1848 if (sbuf_error(sb) == ENOMEM) {
1849 error = ERANGE;
1850 sbuf_delete(sb);
1851 goto out;
1852 }
1853
1854 /*
1855 * Remove trailing ",".
1856 */
1857 if (sbuf_len(sb) > 0)
1858 sbuf_setpos(sb, sbuf_len(sb) - 1);
1859
1860 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1861 out:
1862 rctl_rule_release(filter);
1863 sx_sunlock(&allproc_lock);
1864 free(buf, M_RCTL);
1865 return (error);
1866 }
1867
1868 int
1869 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1870 {
1871 struct rctl_rule *rule;
1872 char *inputstr;
1873 int error;
1874
1875 if (!racct_enable)
1876 return (ENOSYS);
1877
1878 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1879 if (error != 0)
1880 return (error);
1881
1882 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1883 if (error != 0)
1884 return (error);
1885
1886 sx_slock(&allproc_lock);
1887 error = rctl_string_to_rule(inputstr, &rule);
1888 free(inputstr, M_RCTL);
1889 if (error != 0) {
1890 sx_sunlock(&allproc_lock);
1891 return (error);
1892 }
1893 /*
1894 * The 'per' part of a rule is optional.
1895 */
1896 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1897 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1898 rule->rr_per = rule->rr_subject_type;
1899
1900 if (!rctl_rule_fully_specified(rule)) {
1901 error = EINVAL;
1902 goto out;
1903 }
1904
1905 error = rctl_rule_add(rule);
1906
1907 out:
1908 rctl_rule_release(rule);
1909 sx_sunlock(&allproc_lock);
1910 return (error);
1911 }
1912
1913 int
1914 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1915 {
1916 struct rctl_rule *filter;
1917 char *inputstr;
1918 int error;
1919
1920 if (!racct_enable)
1921 return (ENOSYS);
1922
1923 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1924 if (error != 0)
1925 return (error);
1926
1927 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1928 if (error != 0)
1929 return (error);
1930
1931 sx_slock(&allproc_lock);
1932 error = rctl_string_to_rule(inputstr, &filter);
1933 free(inputstr, M_RCTL);
1934 if (error != 0) {
1935 sx_sunlock(&allproc_lock);
1936 return (error);
1937 }
1938
1939 error = rctl_rule_remove(filter);
1940 rctl_rule_release(filter);
1941 sx_sunlock(&allproc_lock);
1942
1943 return (error);
1944 }
1945
1946 /*
1947 * Update RCTL rule list after credential change.
1948 */
1949 void
1950 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1951 {
1952 LIST_HEAD(, rctl_rule_link) newrules;
1953 struct rctl_rule_link *link, *newlink;
1954 struct uidinfo *newuip;
1955 struct loginclass *newlc;
1956 struct prison_racct *newprr;
1957 int rulecnt, i;
1958
1959 if (!racct_enable)
1960 return;
1961
1962 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1963
1964 newuip = newcred->cr_ruidinfo;
1965 newlc = newcred->cr_loginclass;
1966 newprr = newcred->cr_prison->pr_prison_racct;
1967
1968 LIST_INIT(&newrules);
1969
1970 again:
1971 /*
1972 * First, count the rules that apply to the process with new
1973 * credentials.
1974 */
1975 rulecnt = 0;
1976 RACCT_LOCK();
1977 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1978 if (link->rrl_rule->rr_subject_type ==
1979 RCTL_SUBJECT_TYPE_PROCESS)
1980 rulecnt++;
1981 }
1982 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1983 rulecnt++;
1984 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1985 rulecnt++;
1986 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1987 rulecnt++;
1988 RACCT_UNLOCK();
1989
1990 /*
1991 * Create temporary list. We've dropped the rctl_lock in order
1992 * to use M_WAITOK.
1993 */
1994 for (i = 0; i < rulecnt; i++) {
1995 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1996 newlink->rrl_rule = NULL;
1997 newlink->rrl_exceeded = 0;
1998 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1999 }
2000
2001 newlink = LIST_FIRST(&newrules);
2002
2003 /*
2004 * Assign rules to the newly allocated list entries.
2005 */
2006 RACCT_LOCK();
2007 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2008 if (link->rrl_rule->rr_subject_type ==
2009 RCTL_SUBJECT_TYPE_PROCESS) {
2010 if (newlink == NULL)
2011 goto goaround;
2012 rctl_rule_acquire(link->rrl_rule);
2013 newlink->rrl_rule = link->rrl_rule;
2014 newlink->rrl_exceeded = link->rrl_exceeded;
2015 newlink = LIST_NEXT(newlink, rrl_next);
2016 rulecnt--;
2017 }
2018 }
2019
2020 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2021 if (newlink == NULL)
2022 goto goaround;
2023 rctl_rule_acquire(link->rrl_rule);
2024 newlink->rrl_rule = link->rrl_rule;
2025 newlink->rrl_exceeded = link->rrl_exceeded;
2026 newlink = LIST_NEXT(newlink, rrl_next);
2027 rulecnt--;
2028 }
2029
2030 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2031 if (newlink == NULL)
2032 goto goaround;
2033 rctl_rule_acquire(link->rrl_rule);
2034 newlink->rrl_rule = link->rrl_rule;
2035 newlink->rrl_exceeded = link->rrl_exceeded;
2036 newlink = LIST_NEXT(newlink, rrl_next);
2037 rulecnt--;
2038 }
2039
2040 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2041 if (newlink == NULL)
2042 goto goaround;
2043 rctl_rule_acquire(link->rrl_rule);
2044 newlink->rrl_rule = link->rrl_rule;
2045 newlink->rrl_exceeded = link->rrl_exceeded;
2046 newlink = LIST_NEXT(newlink, rrl_next);
2047 rulecnt--;
2048 }
2049
2050 if (rulecnt == 0) {
2051 /*
2052 * Free the old rule list.
2053 */
2054 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2055 link = LIST_FIRST(&p->p_racct->r_rule_links);
2056 LIST_REMOVE(link, rrl_next);
2057 rctl_rule_release(link->rrl_rule);
2058 uma_zfree(rctl_rule_link_zone, link);
2059 }
2060
2061 /*
2062 * Replace lists and we're done.
2063 *
2064 * XXX: Is there any way to switch list heads instead
2065 * of iterating here?
2066 */
2067 while (!LIST_EMPTY(&newrules)) {
2068 newlink = LIST_FIRST(&newrules);
2069 LIST_REMOVE(newlink, rrl_next);
2070 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2071 newlink, rrl_next);
2072 }
2073
2074 RACCT_UNLOCK();
2075
2076 return;
2077 }
2078
2079 goaround:
2080 RACCT_UNLOCK();
2081
2082 /*
2083 * Rule list changed while we were not holding the rctl_lock.
2084 * Free the new list and try again.
2085 */
2086 while (!LIST_EMPTY(&newrules)) {
2087 newlink = LIST_FIRST(&newrules);
2088 LIST_REMOVE(newlink, rrl_next);
2089 if (newlink->rrl_rule != NULL)
2090 rctl_rule_release(newlink->rrl_rule);
2091 uma_zfree(rctl_rule_link_zone, newlink);
2092 }
2093
2094 goto again;
2095 }
2096
2097 /*
2098 * Assign RCTL rules to the newly created process.
2099 */
2100 int
2101 rctl_proc_fork(struct proc *parent, struct proc *child)
2102 {
2103 struct rctl_rule *rule;
2104 struct rctl_rule_link *link;
2105 int error;
2106
2107 ASSERT_RACCT_ENABLED();
2108 RACCT_LOCK_ASSERT();
2109 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2110
2111 LIST_INIT(&child->p_racct->r_rule_links);
2112
2113 /*
2114 * Go through limits applicable to the parent and assign them
2115 * to the child. Rules with 'process' subject have to be duplicated
2116 * in order to make their rr_subject point to the new process.
2117 */
2118 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2119 if (link->rrl_rule->rr_subject_type ==
2120 RCTL_SUBJECT_TYPE_PROCESS) {
2121 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2122 if (rule == NULL)
2123 goto fail;
2124 KASSERT(rule->rr_subject.rs_proc == parent,
2125 ("rule->rr_subject.rs_proc != parent"));
2126 rule->rr_subject.rs_proc = child;
2127 error = rctl_racct_add_rule_locked(child->p_racct,
2128 rule);
2129 rctl_rule_release(rule);
2130 if (error != 0)
2131 goto fail;
2132 } else {
2133 error = rctl_racct_add_rule_locked(child->p_racct,
2134 link->rrl_rule);
2135 if (error != 0)
2136 goto fail;
2137 }
2138 }
2139
2140 return (0);
2141
2142 fail:
2143 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2144 link = LIST_FIRST(&child->p_racct->r_rule_links);
2145 LIST_REMOVE(link, rrl_next);
2146 rctl_rule_release(link->rrl_rule);
2147 uma_zfree(rctl_rule_link_zone, link);
2148 }
2149
2150 return (EAGAIN);
2151 }
2152
2153 /*
2154 * Release rules attached to the racct.
2155 */
2156 void
2157 rctl_racct_release(struct racct *racct)
2158 {
2159 struct rctl_rule_link *link;
2160
2161 ASSERT_RACCT_ENABLED();
2162 RACCT_LOCK_ASSERT();
2163
2164 while (!LIST_EMPTY(&racct->r_rule_links)) {
2165 link = LIST_FIRST(&racct->r_rule_links);
2166 LIST_REMOVE(link, rrl_next);
2167 rctl_rule_release(link->rrl_rule);
2168 uma_zfree(rctl_rule_link_zone, link);
2169 }
2170 }
2171
2172 static void
2173 rctl_init(void)
2174 {
2175
2176 if (!racct_enable)
2177 return;
2178
2179 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2180 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2181 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2182 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2183 UMA_ALIGN_PTR, 0);
2184
2185 /*
2186 * Set default values, making sure not to overwrite the ones
2187 * fetched from tunables. Most of those could be set at the
2188 * declaration, except for the rctl_throttle_max - we cannot
2189 * set it there due to hz not being compile time constant.
2190 */
2191 if (rctl_throttle_min < 1)
2192 rctl_throttle_min = 1;
2193 if (rctl_throttle_max < rctl_throttle_min)
2194 rctl_throttle_max = 2 * hz;
2195 if (rctl_throttle_pct < 0)
2196 rctl_throttle_pct = 100;
2197 if (rctl_throttle_pct2 < 0)
2198 rctl_throttle_pct2 = 100;
2199 }
2200
2201 #else /* !RCTL */
2202
2203 int
2204 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2205 {
2206
2207 return (ENOSYS);
2208 }
2209
2210 int
2211 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2212 {
2213
2214 return (ENOSYS);
2215 }
2216
2217 int
2218 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2219 {
2220
2221 return (ENOSYS);
2222 }
2223
2224 int
2225 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2226 {
2227
2228 return (ENOSYS);
2229 }
2230
2231 int
2232 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
2233 {
2234
2235 return (ENOSYS);
2236 }
2237
2238 #endif /* !RCTL */
Cache object: 77f35f979d2db22d328d309814a9a5c7
|