FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rctl.c
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2010 The FreeBSD Foundation
5 *
6 * This software was developed by Edward Tomasz Napierala under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD$
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/devctl.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/refcount.h>
41 #include <sys/jail.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
44 #include <sys/loginclass.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/racct.h>
48 #include <sys/rctl.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sx.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT 0
71 #define HRF_DONT_INHERIT 1
72 #define HRF_DONT_ACCUMULATE 2
73
74 #define RCTL_MAX_INBUFSIZE 4 * 1024
75 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
76 #define RCTL_LOG_BUFSIZE 128
77
78 #define RCTL_PCPU_SHIFT (10 * 1000000)
79
80 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
81 static int rctl_log_rate_limit = 10;
82 static int rctl_devctl_rate_limit = 10;
83
84 /*
85 * Values below are initialized in rctl_init().
86 */
87 static int rctl_throttle_min = -1;
88 static int rctl_throttle_max = -1;
89 static int rctl_throttle_pct = -1;
90 static int rctl_throttle_pct2 = -1;
91
92 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
93 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
94 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
95 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
96
97 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
98 "Resource Limits");
99 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
100 &rctl_maxbufsize, 0, "Maximum output buffer size");
101 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
102 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
103 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
104 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
105 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
106 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
107 &rctl_throttle_min_sysctl, "IU",
108 "Shortest throttling duration, in hz");
109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
111 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
112 &rctl_throttle_max_sysctl, "IU",
113 "Longest throttling duration, in hz");
114 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
115 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
116 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
117 &rctl_throttle_pct_sysctl, "IU",
118 "Throttling penalty for process consumption, in percent");
119 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
120 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
121 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
122 &rctl_throttle_pct2_sysctl, "IU",
123 "Throttling penalty for container consumption, in percent");
124 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
125
126 /*
127 * 'rctl_rule_link' connects a rule with every racct it's related to.
128 * For example, rule 'user:X:openfiles:deny=N/process' is linked
129 * with uidinfo for user X, and to each process of that user.
130 */
131 struct rctl_rule_link {
132 LIST_ENTRY(rctl_rule_link) rrl_next;
133 struct rctl_rule *rrl_rule;
134 int rrl_exceeded;
135 };
136
137 struct dict {
138 const char *d_name;
139 int d_value;
140 };
141
142 static struct dict subjectnames[] = {
143 { "process", RCTL_SUBJECT_TYPE_PROCESS },
144 { "user", RCTL_SUBJECT_TYPE_USER },
145 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
146 { "jail", RCTL_SUBJECT_TYPE_JAIL },
147 { NULL, -1 }};
148
149 static struct dict resourcenames[] = {
150 { "cputime", RACCT_CPU },
151 { "datasize", RACCT_DATA },
152 { "stacksize", RACCT_STACK },
153 { "coredumpsize", RACCT_CORE },
154 { "memoryuse", RACCT_RSS },
155 { "memorylocked", RACCT_MEMLOCK },
156 { "maxproc", RACCT_NPROC },
157 { "openfiles", RACCT_NOFILE },
158 { "vmemoryuse", RACCT_VMEM },
159 { "pseudoterminals", RACCT_NPTS },
160 { "swapuse", RACCT_SWAP },
161 { "nthr", RACCT_NTHR },
162 { "msgqqueued", RACCT_MSGQQUEUED },
163 { "msgqsize", RACCT_MSGQSIZE },
164 { "nmsgq", RACCT_NMSGQ },
165 { "nsem", RACCT_NSEM },
166 { "nsemop", RACCT_NSEMOP },
167 { "nshm", RACCT_NSHM },
168 { "shmsize", RACCT_SHMSIZE },
169 { "wallclock", RACCT_WALLCLOCK },
170 { "pcpu", RACCT_PCTCPU },
171 { "readbps", RACCT_READBPS },
172 { "writebps", RACCT_WRITEBPS },
173 { "readiops", RACCT_READIOPS },
174 { "writeiops", RACCT_WRITEIOPS },
175 { NULL, -1 }};
176
177 static struct dict actionnames[] = {
178 { "sighup", RCTL_ACTION_SIGHUP },
179 { "sigint", RCTL_ACTION_SIGINT },
180 { "sigquit", RCTL_ACTION_SIGQUIT },
181 { "sigill", RCTL_ACTION_SIGILL },
182 { "sigtrap", RCTL_ACTION_SIGTRAP },
183 { "sigabrt", RCTL_ACTION_SIGABRT },
184 { "sigemt", RCTL_ACTION_SIGEMT },
185 { "sigfpe", RCTL_ACTION_SIGFPE },
186 { "sigkill", RCTL_ACTION_SIGKILL },
187 { "sigbus", RCTL_ACTION_SIGBUS },
188 { "sigsegv", RCTL_ACTION_SIGSEGV },
189 { "sigsys", RCTL_ACTION_SIGSYS },
190 { "sigpipe", RCTL_ACTION_SIGPIPE },
191 { "sigalrm", RCTL_ACTION_SIGALRM },
192 { "sigterm", RCTL_ACTION_SIGTERM },
193 { "sigurg", RCTL_ACTION_SIGURG },
194 { "sigstop", RCTL_ACTION_SIGSTOP },
195 { "sigtstp", RCTL_ACTION_SIGTSTP },
196 { "sigchld", RCTL_ACTION_SIGCHLD },
197 { "sigttin", RCTL_ACTION_SIGTTIN },
198 { "sigttou", RCTL_ACTION_SIGTTOU },
199 { "sigio", RCTL_ACTION_SIGIO },
200 { "sigxcpu", RCTL_ACTION_SIGXCPU },
201 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
202 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
203 { "sigprof", RCTL_ACTION_SIGPROF },
204 { "sigwinch", RCTL_ACTION_SIGWINCH },
205 { "siginfo", RCTL_ACTION_SIGINFO },
206 { "sigusr1", RCTL_ACTION_SIGUSR1 },
207 { "sigusr2", RCTL_ACTION_SIGUSR2 },
208 { "sigthr", RCTL_ACTION_SIGTHR },
209 { "deny", RCTL_ACTION_DENY },
210 { "log", RCTL_ACTION_LOG },
211 { "devctl", RCTL_ACTION_DEVCTL },
212 { "throttle", RCTL_ACTION_THROTTLE },
213 { NULL, -1 }};
214
215 static void rctl_init(void);
216 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
217
218 static uma_zone_t rctl_rule_zone;
219 static uma_zone_t rctl_rule_link_zone;
220
221 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
222 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
223
224 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
225
226 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
227 {
228 int error, val = rctl_throttle_min;
229
230 error = sysctl_handle_int(oidp, &val, 0, req);
231 if (error || !req->newptr)
232 return (error);
233 if (val < 1 || val > rctl_throttle_max)
234 return (EINVAL);
235
236 RACCT_LOCK();
237 rctl_throttle_min = val;
238 RACCT_UNLOCK();
239
240 return (0);
241 }
242
243 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
244 {
245 int error, val = rctl_throttle_max;
246
247 error = sysctl_handle_int(oidp, &val, 0, req);
248 if (error || !req->newptr)
249 return (error);
250 if (val < rctl_throttle_min)
251 return (EINVAL);
252
253 RACCT_LOCK();
254 rctl_throttle_max = val;
255 RACCT_UNLOCK();
256
257 return (0);
258 }
259
260 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
261 {
262 int error, val = rctl_throttle_pct;
263
264 error = sysctl_handle_int(oidp, &val, 0, req);
265 if (error || !req->newptr)
266 return (error);
267 if (val < 0)
268 return (EINVAL);
269
270 RACCT_LOCK();
271 rctl_throttle_pct = val;
272 RACCT_UNLOCK();
273
274 return (0);
275 }
276
277 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
278 {
279 int error, val = rctl_throttle_pct2;
280
281 error = sysctl_handle_int(oidp, &val, 0, req);
282 if (error || !req->newptr)
283 return (error);
284 if (val < 0)
285 return (EINVAL);
286
287 RACCT_LOCK();
288 rctl_throttle_pct2 = val;
289 RACCT_UNLOCK();
290
291 return (0);
292 }
293
294 static const char *
295 rctl_subject_type_name(int subject)
296 {
297 int i;
298
299 for (i = 0; subjectnames[i].d_name != NULL; i++) {
300 if (subjectnames[i].d_value == subject)
301 return (subjectnames[i].d_name);
302 }
303
304 panic("rctl_subject_type_name: unknown subject type %d", subject);
305 }
306
307 static const char *
308 rctl_action_name(int action)
309 {
310 int i;
311
312 for (i = 0; actionnames[i].d_name != NULL; i++) {
313 if (actionnames[i].d_value == action)
314 return (actionnames[i].d_name);
315 }
316
317 panic("rctl_action_name: unknown action %d", action);
318 }
319
320 const char *
321 rctl_resource_name(int resource)
322 {
323 int i;
324
325 for (i = 0; resourcenames[i].d_name != NULL; i++) {
326 if (resourcenames[i].d_value == resource)
327 return (resourcenames[i].d_name);
328 }
329
330 panic("rctl_resource_name: unknown resource %d", resource);
331 }
332
333 static struct racct *
334 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
335 {
336 struct ucred *cred = p->p_ucred;
337
338 ASSERT_RACCT_ENABLED();
339 RACCT_LOCK_ASSERT();
340
341 switch (rule->rr_per) {
342 case RCTL_SUBJECT_TYPE_PROCESS:
343 return (p->p_racct);
344 case RCTL_SUBJECT_TYPE_USER:
345 return (cred->cr_ruidinfo->ui_racct);
346 case RCTL_SUBJECT_TYPE_LOGINCLASS:
347 return (cred->cr_loginclass->lc_racct);
348 case RCTL_SUBJECT_TYPE_JAIL:
349 return (cred->cr_prison->pr_prison_racct->prr_racct);
350 default:
351 panic("%s: unknown per %d", __func__, rule->rr_per);
352 }
353 }
354
355 /*
356 * Return the amount of resource that can be allocated by 'p' before
357 * hitting 'rule'.
358 */
359 static int64_t
360 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
361 {
362 const struct racct *racct;
363 int64_t available;
364
365 ASSERT_RACCT_ENABLED();
366 RACCT_LOCK_ASSERT();
367
368 racct = rctl_proc_rule_to_racct(p, rule);
369 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
370
371 return (available);
372 }
373
374 /*
375 * Called every second for proc, uidinfo, loginclass, and jail containers.
376 * If the limit isn't exceeded, it decreases the usage amount to zero.
377 * Otherwise, it decreases it by the value of the limit. This way
378 * resource consumption exceeding the limit "carries over" to the next
379 * period.
380 */
381 void
382 rctl_throttle_decay(struct racct *racct, int resource)
383 {
384 struct rctl_rule *rule;
385 struct rctl_rule_link *link;
386 int64_t minavailable;
387
388 ASSERT_RACCT_ENABLED();
389 RACCT_LOCK_ASSERT();
390
391 minavailable = INT64_MAX;
392
393 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
394 rule = link->rrl_rule;
395
396 if (rule->rr_resource != resource)
397 continue;
398 if (rule->rr_action != RCTL_ACTION_THROTTLE)
399 continue;
400
401 if (rule->rr_amount < minavailable)
402 minavailable = rule->rr_amount;
403 }
404
405 if (racct->r_resources[resource] < minavailable) {
406 racct->r_resources[resource] = 0;
407 } else {
408 /*
409 * Cap utilization counter at ten times the limit. Otherwise,
410 * if we changed the rule lowering the allowed amount, it could
411 * take unreasonably long time for the accumulated resource
412 * usage to drop.
413 */
414 if (racct->r_resources[resource] > minavailable * 10)
415 racct->r_resources[resource] = minavailable * 10;
416
417 racct->r_resources[resource] -= minavailable;
418 }
419 }
420
421 /*
422 * Special version of rctl_get_available() for the %CPU resource.
423 * We slightly cheat here and return less than we normally would.
424 */
425 int64_t
426 rctl_pcpu_available(const struct proc *p) {
427 struct rctl_rule *rule;
428 struct rctl_rule_link *link;
429 int64_t available, minavailable, limit;
430
431 ASSERT_RACCT_ENABLED();
432 RACCT_LOCK_ASSERT();
433
434 minavailable = INT64_MAX;
435 limit = 0;
436
437 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
438 rule = link->rrl_rule;
439 if (rule->rr_resource != RACCT_PCTCPU)
440 continue;
441 if (rule->rr_action != RCTL_ACTION_DENY)
442 continue;
443 available = rctl_available_resource(p, rule);
444 if (available < minavailable) {
445 minavailable = available;
446 limit = rule->rr_amount;
447 }
448 }
449
450 /*
451 * Return slightly less than actual value of the available
452 * %cpu resource. This makes %cpu throttling more aggressive
453 * and lets us act sooner than the limits are already exceeded.
454 */
455 if (limit != 0) {
456 if (limit > 2 * RCTL_PCPU_SHIFT)
457 minavailable -= RCTL_PCPU_SHIFT;
458 else
459 minavailable -= (limit / 2);
460 }
461
462 return (minavailable);
463 }
464
465 static uint64_t
466 xadd(uint64_t a, uint64_t b)
467 {
468 uint64_t c;
469
470 c = a + b;
471
472 /*
473 * Detect overflow.
474 */
475 if (c < a || c < b)
476 return (UINT64_MAX);
477
478 return (c);
479 }
480
481 static uint64_t
482 xmul(uint64_t a, uint64_t b)
483 {
484
485 if (b != 0 && a > UINT64_MAX / b)
486 return (UINT64_MAX);
487
488 return (a * b);
489 }
490
491 /*
492 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
493 * to what it keeps allocated now. Returns non-zero if the allocation should
494 * be denied, 0 otherwise.
495 */
496 int
497 rctl_enforce(struct proc *p, int resource, uint64_t amount)
498 {
499 static struct timeval log_lasttime, devctl_lasttime;
500 static int log_curtime = 0, devctl_curtime = 0;
501 struct rctl_rule *rule;
502 struct rctl_rule_link *link;
503 struct sbuf sb;
504 char *buf;
505 int64_t available;
506 uint64_t sleep_ms, sleep_ratio;
507 int should_deny = 0;
508
509 ASSERT_RACCT_ENABLED();
510 RACCT_LOCK_ASSERT();
511
512 /*
513 * There may be more than one matching rule; go through all of them.
514 * Denial should be done last, after logging and sending signals.
515 */
516 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
517 rule = link->rrl_rule;
518 if (rule->rr_resource != resource)
519 continue;
520
521 available = rctl_available_resource(p, rule);
522 if (available >= (int64_t)amount) {
523 link->rrl_exceeded = 0;
524 continue;
525 }
526
527 switch (rule->rr_action) {
528 case RCTL_ACTION_DENY:
529 should_deny = 1;
530 continue;
531 case RCTL_ACTION_LOG:
532 /*
533 * If rrl_exceeded != 0, it means we've already
534 * logged a warning for this process.
535 */
536 if (link->rrl_exceeded != 0)
537 continue;
538
539 /*
540 * If the process state is not fully initialized yet,
541 * we can't access most of the required fields, e.g.
542 * p->p_comm. This happens when called from fork1().
543 * Ignore this rule for now; it will be processed just
544 * after fork, when called from racct_proc_fork_done().
545 */
546 if (p->p_state != PRS_NORMAL)
547 continue;
548
549 if (!ppsratecheck(&log_lasttime, &log_curtime,
550 rctl_log_rate_limit))
551 continue;
552
553 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
554 if (buf == NULL) {
555 printf("rctl_enforce: out of memory\n");
556 continue;
557 }
558 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
559 rctl_rule_to_sbuf(&sb, rule);
560 sbuf_finish(&sb);
561 printf("rctl: rule \"%s\" matched by pid %d "
562 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
563 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
564 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
565 sbuf_delete(&sb);
566 free(buf, M_RCTL);
567 link->rrl_exceeded = 1;
568 continue;
569 case RCTL_ACTION_DEVCTL:
570 if (link->rrl_exceeded != 0)
571 continue;
572
573 if (p->p_state != PRS_NORMAL)
574 continue;
575
576 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
577 rctl_devctl_rate_limit))
578 continue;
579
580 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
581 if (buf == NULL) {
582 printf("rctl_enforce: out of memory\n");
583 continue;
584 }
585 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
586 sbuf_printf(&sb, "rule=");
587 rctl_rule_to_sbuf(&sb, rule);
588 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
589 p->p_pid, p->p_ucred->cr_ruid,
590 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
591 sbuf_finish(&sb);
592 devctl_notify("RCTL", "rule", "matched",
593 sbuf_data(&sb));
594 sbuf_delete(&sb);
595 free(buf, M_RCTL);
596 link->rrl_exceeded = 1;
597 continue;
598 case RCTL_ACTION_THROTTLE:
599 if (p->p_state != PRS_NORMAL)
600 continue;
601
602 if (rule->rr_amount == 0) {
603 racct_proc_throttle(p, rctl_throttle_max);
604 continue;
605 }
606
607 /*
608 * Make the process sleep for a fraction of second
609 * proportional to the ratio of process' resource
610 * utilization compared to the limit. The point is
611 * to penalize resource hogs: processes that consume
612 * more of the available resources sleep for longer.
613 *
614 * We're trying to defer division until the very end,
615 * to minimize the rounding effects. The following
616 * calculation could have been written in a clearer
617 * way like this:
618 *
619 * sleep_ms = hz * p->p_racct->r_resources[resource] /
620 * rule->rr_amount;
621 * sleep_ms *= rctl_throttle_pct / 100;
622 * if (sleep_ms < rctl_throttle_min)
623 * sleep_ms = rctl_throttle_min;
624 *
625 */
626 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
627 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
628 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
629 sleep_ms = rctl_throttle_min * rule->rr_amount;
630
631 /*
632 * Multiply that by the ratio of the resource
633 * consumption for the container compared to the limit,
634 * squared. In other words, a process in a container
635 * that is two times over the limit will be throttled
636 * four times as much for hitting the same rule. The
637 * point is to penalize processes more if the container
638 * itself (eg certain UID or jail) is above the limit.
639 */
640 if (available < 0)
641 sleep_ratio = -available / rule->rr_amount;
642 else
643 sleep_ratio = 0;
644 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
645 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
646 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
647
648 /*
649 * Finally the division.
650 */
651 sleep_ms /= rule->rr_amount;
652
653 if (sleep_ms > rctl_throttle_max)
654 sleep_ms = rctl_throttle_max;
655 #if 0
656 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
657 __func__, p->p_pid, p->p_comm,
658 p->p_racct->r_resources[resource],
659 rule->rr_amount, (uintmax_t)sleep_ms,
660 (uintmax_t)sleep_ratio, (intmax_t)available);
661 #endif
662
663 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
664 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
665 racct_proc_throttle(p, sleep_ms);
666 continue;
667 default:
668 if (link->rrl_exceeded != 0)
669 continue;
670
671 if (p->p_state != PRS_NORMAL)
672 continue;
673
674 KASSERT(rule->rr_action > 0 &&
675 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
676 ("rctl_enforce: unknown action %d",
677 rule->rr_action));
678
679 /*
680 * We're using the fact that RCTL_ACTION_SIG* values
681 * are equal to their counterparts from sys/signal.h.
682 */
683 kern_psignal(p, rule->rr_action);
684 link->rrl_exceeded = 1;
685 continue;
686 }
687 }
688
689 if (should_deny) {
690 /*
691 * Return fake error code; the caller should change it
692 * into one proper for the situation - EFSIZ, ENOMEM etc.
693 */
694 return (EDOOFUS);
695 }
696
697 return (0);
698 }
699
700 uint64_t
701 rctl_get_limit(struct proc *p, int resource)
702 {
703 struct rctl_rule *rule;
704 struct rctl_rule_link *link;
705 uint64_t amount = UINT64_MAX;
706
707 ASSERT_RACCT_ENABLED();
708 RACCT_LOCK_ASSERT();
709
710 /*
711 * There may be more than one matching rule; go through all of them.
712 * Denial should be done last, after logging and sending signals.
713 */
714 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
715 rule = link->rrl_rule;
716 if (rule->rr_resource != resource)
717 continue;
718 if (rule->rr_action != RCTL_ACTION_DENY)
719 continue;
720 if (rule->rr_amount < amount)
721 amount = rule->rr_amount;
722 }
723
724 return (amount);
725 }
726
727 uint64_t
728 rctl_get_available(struct proc *p, int resource)
729 {
730 struct rctl_rule *rule;
731 struct rctl_rule_link *link;
732 int64_t available, minavailable, allocated;
733
734 minavailable = INT64_MAX;
735
736 ASSERT_RACCT_ENABLED();
737 RACCT_LOCK_ASSERT();
738
739 /*
740 * There may be more than one matching rule; go through all of them.
741 * Denial should be done last, after logging and sending signals.
742 */
743 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
744 rule = link->rrl_rule;
745 if (rule->rr_resource != resource)
746 continue;
747 if (rule->rr_action != RCTL_ACTION_DENY)
748 continue;
749 available = rctl_available_resource(p, rule);
750 if (available < minavailable)
751 minavailable = available;
752 }
753
754 /*
755 * XXX: Think about this _hard_.
756 */
757 allocated = p->p_racct->r_resources[resource];
758 if (minavailable < INT64_MAX - allocated)
759 minavailable += allocated;
760 if (minavailable < 0)
761 minavailable = 0;
762
763 return (minavailable);
764 }
765
766 static int
767 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
768 {
769
770 ASSERT_RACCT_ENABLED();
771
772 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
773 if (rule->rr_subject_type != filter->rr_subject_type)
774 return (0);
775
776 switch (filter->rr_subject_type) {
777 case RCTL_SUBJECT_TYPE_PROCESS:
778 if (filter->rr_subject.rs_proc != NULL &&
779 rule->rr_subject.rs_proc !=
780 filter->rr_subject.rs_proc)
781 return (0);
782 break;
783 case RCTL_SUBJECT_TYPE_USER:
784 if (filter->rr_subject.rs_uip != NULL &&
785 rule->rr_subject.rs_uip !=
786 filter->rr_subject.rs_uip)
787 return (0);
788 break;
789 case RCTL_SUBJECT_TYPE_LOGINCLASS:
790 if (filter->rr_subject.rs_loginclass != NULL &&
791 rule->rr_subject.rs_loginclass !=
792 filter->rr_subject.rs_loginclass)
793 return (0);
794 break;
795 case RCTL_SUBJECT_TYPE_JAIL:
796 if (filter->rr_subject.rs_prison_racct != NULL &&
797 rule->rr_subject.rs_prison_racct !=
798 filter->rr_subject.rs_prison_racct)
799 return (0);
800 break;
801 default:
802 panic("rctl_rule_matches: unknown subject type %d",
803 filter->rr_subject_type);
804 }
805 }
806
807 if (filter->rr_resource != RACCT_UNDEFINED) {
808 if (rule->rr_resource != filter->rr_resource)
809 return (0);
810 }
811
812 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
813 if (rule->rr_action != filter->rr_action)
814 return (0);
815 }
816
817 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
818 if (rule->rr_amount != filter->rr_amount)
819 return (0);
820 }
821
822 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
823 if (rule->rr_per != filter->rr_per)
824 return (0);
825 }
826
827 return (1);
828 }
829
830 static int
831 str2value(const char *str, int *value, struct dict *table)
832 {
833 int i;
834
835 if (value == NULL)
836 return (EINVAL);
837
838 for (i = 0; table[i].d_name != NULL; i++) {
839 if (strcasecmp(table[i].d_name, str) == 0) {
840 *value = table[i].d_value;
841 return (0);
842 }
843 }
844
845 return (EINVAL);
846 }
847
848 static int
849 str2id(const char *str, id_t *value)
850 {
851 char *end;
852
853 if (str == NULL)
854 return (EINVAL);
855
856 *value = strtoul(str, &end, 10);
857 if ((size_t)(end - str) != strlen(str))
858 return (EINVAL);
859
860 return (0);
861 }
862
863 static int
864 str2int64(const char *str, int64_t *value)
865 {
866 char *end;
867
868 if (str == NULL)
869 return (EINVAL);
870
871 *value = strtoul(str, &end, 10);
872 if ((size_t)(end - str) != strlen(str))
873 return (EINVAL);
874
875 if (*value < 0)
876 return (ERANGE);
877
878 return (0);
879 }
880
881 /*
882 * Connect the rule to the racct, increasing refcount for the rule.
883 */
884 static void
885 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
886 {
887 struct rctl_rule_link *link;
888
889 ASSERT_RACCT_ENABLED();
890 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
891
892 rctl_rule_acquire(rule);
893 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
894 link->rrl_rule = rule;
895 link->rrl_exceeded = 0;
896
897 RACCT_LOCK();
898 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
899 RACCT_UNLOCK();
900 }
901
902 static int
903 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
904 {
905 struct rctl_rule_link *link;
906
907 ASSERT_RACCT_ENABLED();
908 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
909 RACCT_LOCK_ASSERT();
910
911 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
912 if (link == NULL)
913 return (ENOMEM);
914 rctl_rule_acquire(rule);
915 link->rrl_rule = rule;
916 link->rrl_exceeded = 0;
917
918 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
919
920 return (0);
921 }
922
923 /*
924 * Remove limits for a rules matching the filter and release
925 * the refcounts for the rules, possibly freeing them. Returns
926 * the number of limit structures removed.
927 */
928 static int
929 rctl_racct_remove_rules(struct racct *racct,
930 const struct rctl_rule *filter)
931 {
932 struct rctl_rule_link *link, *linktmp;
933 int removed = 0;
934
935 ASSERT_RACCT_ENABLED();
936 RACCT_LOCK_ASSERT();
937
938 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
939 if (!rctl_rule_matches(link->rrl_rule, filter))
940 continue;
941
942 LIST_REMOVE(link, rrl_next);
943 rctl_rule_release(link->rrl_rule);
944 uma_zfree(rctl_rule_link_zone, link);
945 removed++;
946 }
947 return (removed);
948 }
949
950 static void
951 rctl_rule_acquire_subject(struct rctl_rule *rule)
952 {
953
954 ASSERT_RACCT_ENABLED();
955
956 switch (rule->rr_subject_type) {
957 case RCTL_SUBJECT_TYPE_UNDEFINED:
958 case RCTL_SUBJECT_TYPE_PROCESS:
959 break;
960 case RCTL_SUBJECT_TYPE_JAIL:
961 if (rule->rr_subject.rs_prison_racct != NULL)
962 prison_racct_hold(rule->rr_subject.rs_prison_racct);
963 break;
964 case RCTL_SUBJECT_TYPE_USER:
965 if (rule->rr_subject.rs_uip != NULL)
966 uihold(rule->rr_subject.rs_uip);
967 break;
968 case RCTL_SUBJECT_TYPE_LOGINCLASS:
969 if (rule->rr_subject.rs_loginclass != NULL)
970 loginclass_hold(rule->rr_subject.rs_loginclass);
971 break;
972 default:
973 panic("rctl_rule_acquire_subject: unknown subject type %d",
974 rule->rr_subject_type);
975 }
976 }
977
978 static void
979 rctl_rule_release_subject(struct rctl_rule *rule)
980 {
981
982 ASSERT_RACCT_ENABLED();
983
984 switch (rule->rr_subject_type) {
985 case RCTL_SUBJECT_TYPE_UNDEFINED:
986 case RCTL_SUBJECT_TYPE_PROCESS:
987 break;
988 case RCTL_SUBJECT_TYPE_JAIL:
989 if (rule->rr_subject.rs_prison_racct != NULL)
990 prison_racct_free(rule->rr_subject.rs_prison_racct);
991 break;
992 case RCTL_SUBJECT_TYPE_USER:
993 if (rule->rr_subject.rs_uip != NULL)
994 uifree(rule->rr_subject.rs_uip);
995 break;
996 case RCTL_SUBJECT_TYPE_LOGINCLASS:
997 if (rule->rr_subject.rs_loginclass != NULL)
998 loginclass_free(rule->rr_subject.rs_loginclass);
999 break;
1000 default:
1001 panic("rctl_rule_release_subject: unknown subject type %d",
1002 rule->rr_subject_type);
1003 }
1004 }
1005
1006 struct rctl_rule *
1007 rctl_rule_alloc(int flags)
1008 {
1009 struct rctl_rule *rule;
1010
1011 ASSERT_RACCT_ENABLED();
1012
1013 rule = uma_zalloc(rctl_rule_zone, flags);
1014 if (rule == NULL)
1015 return (NULL);
1016 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1017 rule->rr_subject.rs_proc = NULL;
1018 rule->rr_subject.rs_uip = NULL;
1019 rule->rr_subject.rs_loginclass = NULL;
1020 rule->rr_subject.rs_prison_racct = NULL;
1021 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1022 rule->rr_resource = RACCT_UNDEFINED;
1023 rule->rr_action = RCTL_ACTION_UNDEFINED;
1024 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1025 refcount_init(&rule->rr_refcount, 1);
1026
1027 return (rule);
1028 }
1029
1030 struct rctl_rule *
1031 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1032 {
1033 struct rctl_rule *copy;
1034
1035 ASSERT_RACCT_ENABLED();
1036
1037 copy = uma_zalloc(rctl_rule_zone, flags);
1038 if (copy == NULL)
1039 return (NULL);
1040 copy->rr_subject_type = rule->rr_subject_type;
1041 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1042 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1043 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1044 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1045 copy->rr_per = rule->rr_per;
1046 copy->rr_resource = rule->rr_resource;
1047 copy->rr_action = rule->rr_action;
1048 copy->rr_amount = rule->rr_amount;
1049 refcount_init(©->rr_refcount, 1);
1050 rctl_rule_acquire_subject(copy);
1051
1052 return (copy);
1053 }
1054
1055 void
1056 rctl_rule_acquire(struct rctl_rule *rule)
1057 {
1058
1059 ASSERT_RACCT_ENABLED();
1060 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1061
1062 refcount_acquire(&rule->rr_refcount);
1063 }
1064
1065 static void
1066 rctl_rule_free(void *context, int pending)
1067 {
1068 struct rctl_rule *rule;
1069
1070 rule = (struct rctl_rule *)context;
1071
1072 ASSERT_RACCT_ENABLED();
1073 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1074
1075 /*
1076 * We don't need locking here; rule is guaranteed to be inaccessible.
1077 */
1078
1079 rctl_rule_release_subject(rule);
1080 uma_zfree(rctl_rule_zone, rule);
1081 }
1082
1083 void
1084 rctl_rule_release(struct rctl_rule *rule)
1085 {
1086
1087 ASSERT_RACCT_ENABLED();
1088 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1089
1090 if (refcount_release(&rule->rr_refcount)) {
1091 /*
1092 * rctl_rule_release() is often called when iterating
1093 * over all the uidinfo structures in the system,
1094 * holding uihashtbl_lock. Since rctl_rule_free()
1095 * might end up calling uifree(), this would lead
1096 * to lock recursion. Use taskqueue to avoid this.
1097 */
1098 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1099 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1100 }
1101 }
1102
1103 static int
1104 rctl_rule_fully_specified(const struct rctl_rule *rule)
1105 {
1106
1107 ASSERT_RACCT_ENABLED();
1108
1109 switch (rule->rr_subject_type) {
1110 case RCTL_SUBJECT_TYPE_UNDEFINED:
1111 return (0);
1112 case RCTL_SUBJECT_TYPE_PROCESS:
1113 if (rule->rr_subject.rs_proc == NULL)
1114 return (0);
1115 break;
1116 case RCTL_SUBJECT_TYPE_USER:
1117 if (rule->rr_subject.rs_uip == NULL)
1118 return (0);
1119 break;
1120 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1121 if (rule->rr_subject.rs_loginclass == NULL)
1122 return (0);
1123 break;
1124 case RCTL_SUBJECT_TYPE_JAIL:
1125 if (rule->rr_subject.rs_prison_racct == NULL)
1126 return (0);
1127 break;
1128 default:
1129 panic("rctl_rule_fully_specified: unknown subject type %d",
1130 rule->rr_subject_type);
1131 }
1132 if (rule->rr_resource == RACCT_UNDEFINED)
1133 return (0);
1134 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1135 return (0);
1136 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1137 return (0);
1138 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1139 return (0);
1140
1141 return (1);
1142 }
1143
1144 static int
1145 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1146 {
1147 struct rctl_rule *rule;
1148 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1149 *amountstr, *perstr;
1150 id_t id;
1151 int error = 0;
1152
1153 ASSERT_RACCT_ENABLED();
1154
1155 rule = rctl_rule_alloc(M_WAITOK);
1156
1157 subjectstr = strsep(&rulestr, ":");
1158 subject_idstr = strsep(&rulestr, ":");
1159 resourcestr = strsep(&rulestr, ":");
1160 actionstr = strsep(&rulestr, "=/");
1161 amountstr = strsep(&rulestr, "/");
1162 perstr = rulestr;
1163
1164 if (subjectstr == NULL || subjectstr[0] == '\0')
1165 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1166 else {
1167 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1168 if (error != 0)
1169 goto out;
1170 }
1171
1172 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1173 rule->rr_subject.rs_proc = NULL;
1174 rule->rr_subject.rs_uip = NULL;
1175 rule->rr_subject.rs_loginclass = NULL;
1176 rule->rr_subject.rs_prison_racct = NULL;
1177 } else {
1178 switch (rule->rr_subject_type) {
1179 case RCTL_SUBJECT_TYPE_UNDEFINED:
1180 error = EINVAL;
1181 goto out;
1182 case RCTL_SUBJECT_TYPE_PROCESS:
1183 error = str2id(subject_idstr, &id);
1184 if (error != 0)
1185 goto out;
1186 sx_assert(&allproc_lock, SA_LOCKED);
1187 rule->rr_subject.rs_proc = pfind(id);
1188 if (rule->rr_subject.rs_proc == NULL) {
1189 error = ESRCH;
1190 goto out;
1191 }
1192 PROC_UNLOCK(rule->rr_subject.rs_proc);
1193 break;
1194 case RCTL_SUBJECT_TYPE_USER:
1195 error = str2id(subject_idstr, &id);
1196 if (error != 0)
1197 goto out;
1198 rule->rr_subject.rs_uip = uifind(id);
1199 break;
1200 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1201 rule->rr_subject.rs_loginclass =
1202 loginclass_find(subject_idstr);
1203 if (rule->rr_subject.rs_loginclass == NULL) {
1204 error = ENAMETOOLONG;
1205 goto out;
1206 }
1207 break;
1208 case RCTL_SUBJECT_TYPE_JAIL:
1209 rule->rr_subject.rs_prison_racct =
1210 prison_racct_find(subject_idstr);
1211 if (rule->rr_subject.rs_prison_racct == NULL) {
1212 error = ENAMETOOLONG;
1213 goto out;
1214 }
1215 break;
1216 default:
1217 panic("rctl_string_to_rule: unknown subject type %d",
1218 rule->rr_subject_type);
1219 }
1220 }
1221
1222 if (resourcestr == NULL || resourcestr[0] == '\0')
1223 rule->rr_resource = RACCT_UNDEFINED;
1224 else {
1225 error = str2value(resourcestr, &rule->rr_resource,
1226 resourcenames);
1227 if (error != 0)
1228 goto out;
1229 }
1230
1231 if (actionstr == NULL || actionstr[0] == '\0')
1232 rule->rr_action = RCTL_ACTION_UNDEFINED;
1233 else {
1234 error = str2value(actionstr, &rule->rr_action, actionnames);
1235 if (error != 0)
1236 goto out;
1237 }
1238
1239 if (amountstr == NULL || amountstr[0] == '\0')
1240 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1241 else {
1242 error = str2int64(amountstr, &rule->rr_amount);
1243 if (error != 0)
1244 goto out;
1245 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1246 if (rule->rr_amount > INT64_MAX / 1000000) {
1247 error = ERANGE;
1248 goto out;
1249 }
1250 rule->rr_amount *= 1000000;
1251 }
1252 }
1253
1254 if (perstr == NULL || perstr[0] == '\0')
1255 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1256 else {
1257 error = str2value(perstr, &rule->rr_per, subjectnames);
1258 if (error != 0)
1259 goto out;
1260 }
1261
1262 out:
1263 if (error == 0)
1264 *rulep = rule;
1265 else
1266 rctl_rule_release(rule);
1267
1268 return (error);
1269 }
1270
1271 /*
1272 * Link a rule with all the subjects it applies to.
1273 */
1274 int
1275 rctl_rule_add(struct rctl_rule *rule)
1276 {
1277 struct proc *p;
1278 struct ucred *cred;
1279 struct uidinfo *uip;
1280 struct prison *pr;
1281 struct prison_racct *prr;
1282 struct loginclass *lc;
1283 struct rctl_rule *rule2;
1284 int match;
1285
1286 ASSERT_RACCT_ENABLED();
1287 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1288
1289 /*
1290 * Some rules just don't make sense, like "deny" rule for an undeniable
1291 * resource. The exception are the RSS and %CPU resources - they are
1292 * not deniable in the racct sense, but the limit is enforced in
1293 * a different way.
1294 */
1295 if (rule->rr_action == RCTL_ACTION_DENY &&
1296 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1297 rule->rr_resource != RACCT_RSS &&
1298 rule->rr_resource != RACCT_PCTCPU) {
1299 return (EOPNOTSUPP);
1300 }
1301
1302 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1303 !RACCT_IS_DECAYING(rule->rr_resource)) {
1304 return (EOPNOTSUPP);
1305 }
1306
1307 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1308 rule->rr_resource == RACCT_PCTCPU) {
1309 return (EOPNOTSUPP);
1310 }
1311
1312 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1313 RACCT_IS_SLOPPY(rule->rr_resource)) {
1314 return (EOPNOTSUPP);
1315 }
1316
1317 /*
1318 * Make sure there are no duplicated rules. Also, for the "deny"
1319 * rules, remove ones differing only by "amount".
1320 */
1321 if (rule->rr_action == RCTL_ACTION_DENY) {
1322 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1323 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1324 rctl_rule_remove(rule2);
1325 rctl_rule_release(rule2);
1326 } else
1327 rctl_rule_remove(rule);
1328
1329 switch (rule->rr_subject_type) {
1330 case RCTL_SUBJECT_TYPE_PROCESS:
1331 p = rule->rr_subject.rs_proc;
1332 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1333
1334 rctl_racct_add_rule(p->p_racct, rule);
1335 /*
1336 * In case of per-process rule, we don't have anything more
1337 * to do.
1338 */
1339 return (0);
1340
1341 case RCTL_SUBJECT_TYPE_USER:
1342 uip = rule->rr_subject.rs_uip;
1343 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1344 rctl_racct_add_rule(uip->ui_racct, rule);
1345 break;
1346
1347 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1348 lc = rule->rr_subject.rs_loginclass;
1349 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1350 rctl_racct_add_rule(lc->lc_racct, rule);
1351 break;
1352
1353 case RCTL_SUBJECT_TYPE_JAIL:
1354 prr = rule->rr_subject.rs_prison_racct;
1355 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1356 rctl_racct_add_rule(prr->prr_racct, rule);
1357 break;
1358
1359 default:
1360 panic("rctl_rule_add: unknown subject type %d",
1361 rule->rr_subject_type);
1362 }
1363
1364 /*
1365 * Now go through all the processes and add the new rule to the ones
1366 * it applies to.
1367 */
1368 sx_assert(&allproc_lock, SA_LOCKED);
1369 FOREACH_PROC_IN_SYSTEM(p) {
1370 cred = p->p_ucred;
1371 switch (rule->rr_subject_type) {
1372 case RCTL_SUBJECT_TYPE_USER:
1373 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1374 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1375 break;
1376 continue;
1377 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1378 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1379 break;
1380 continue;
1381 case RCTL_SUBJECT_TYPE_JAIL:
1382 match = 0;
1383 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1384 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1385 match = 1;
1386 break;
1387 }
1388 }
1389 if (match)
1390 break;
1391 continue;
1392 default:
1393 panic("rctl_rule_add: unknown subject type %d",
1394 rule->rr_subject_type);
1395 }
1396
1397 rctl_racct_add_rule(p->p_racct, rule);
1398 }
1399
1400 return (0);
1401 }
1402
1403 static void
1404 rctl_rule_pre_callback(void)
1405 {
1406
1407 RACCT_LOCK();
1408 }
1409
1410 static void
1411 rctl_rule_post_callback(void)
1412 {
1413
1414 RACCT_UNLOCK();
1415 }
1416
1417 static void
1418 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1419 {
1420 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1421 int found = 0;
1422
1423 ASSERT_RACCT_ENABLED();
1424 RACCT_LOCK_ASSERT();
1425
1426 found += rctl_racct_remove_rules(racct, filter);
1427
1428 *((int *)arg3) += found;
1429 }
1430
1431 /*
1432 * Remove all rules that match the filter.
1433 */
1434 int
1435 rctl_rule_remove(struct rctl_rule *filter)
1436 {
1437 struct proc *p;
1438 int found = 0;
1439
1440 ASSERT_RACCT_ENABLED();
1441
1442 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1443 filter->rr_subject.rs_proc != NULL) {
1444 p = filter->rr_subject.rs_proc;
1445 RACCT_LOCK();
1446 found = rctl_racct_remove_rules(p->p_racct, filter);
1447 RACCT_UNLOCK();
1448 if (found)
1449 return (0);
1450 return (ESRCH);
1451 }
1452
1453 loginclass_racct_foreach(rctl_rule_remove_callback,
1454 rctl_rule_pre_callback, rctl_rule_post_callback,
1455 filter, (void *)&found);
1456 ui_racct_foreach(rctl_rule_remove_callback,
1457 rctl_rule_pre_callback, rctl_rule_post_callback,
1458 filter, (void *)&found);
1459 prison_racct_foreach(rctl_rule_remove_callback,
1460 rctl_rule_pre_callback, rctl_rule_post_callback,
1461 filter, (void *)&found);
1462
1463 sx_assert(&allproc_lock, SA_LOCKED);
1464 RACCT_LOCK();
1465 FOREACH_PROC_IN_SYSTEM(p) {
1466 found += rctl_racct_remove_rules(p->p_racct, filter);
1467 }
1468 RACCT_UNLOCK();
1469
1470 if (found)
1471 return (0);
1472 return (ESRCH);
1473 }
1474
1475 /*
1476 * Appends a rule to the sbuf.
1477 */
1478 static void
1479 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1480 {
1481 int64_t amount;
1482
1483 ASSERT_RACCT_ENABLED();
1484
1485 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1486
1487 switch (rule->rr_subject_type) {
1488 case RCTL_SUBJECT_TYPE_PROCESS:
1489 if (rule->rr_subject.rs_proc == NULL)
1490 sbuf_printf(sb, ":");
1491 else
1492 sbuf_printf(sb, "%d:",
1493 rule->rr_subject.rs_proc->p_pid);
1494 break;
1495 case RCTL_SUBJECT_TYPE_USER:
1496 if (rule->rr_subject.rs_uip == NULL)
1497 sbuf_printf(sb, ":");
1498 else
1499 sbuf_printf(sb, "%d:",
1500 rule->rr_subject.rs_uip->ui_uid);
1501 break;
1502 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1503 if (rule->rr_subject.rs_loginclass == NULL)
1504 sbuf_printf(sb, ":");
1505 else
1506 sbuf_printf(sb, "%s:",
1507 rule->rr_subject.rs_loginclass->lc_name);
1508 break;
1509 case RCTL_SUBJECT_TYPE_JAIL:
1510 if (rule->rr_subject.rs_prison_racct == NULL)
1511 sbuf_printf(sb, ":");
1512 else
1513 sbuf_printf(sb, "%s:",
1514 rule->rr_subject.rs_prison_racct->prr_name);
1515 break;
1516 default:
1517 panic("rctl_rule_to_sbuf: unknown subject type %d",
1518 rule->rr_subject_type);
1519 }
1520
1521 amount = rule->rr_amount;
1522 if (amount != RCTL_AMOUNT_UNDEFINED &&
1523 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1524 amount /= 1000000;
1525
1526 sbuf_printf(sb, "%s:%s=%jd",
1527 rctl_resource_name(rule->rr_resource),
1528 rctl_action_name(rule->rr_action),
1529 amount);
1530
1531 if (rule->rr_per != rule->rr_subject_type)
1532 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1533 }
1534
1535 /*
1536 * Routine used by RCTL syscalls to read in input string.
1537 */
1538 static int
1539 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1540 {
1541 char *str;
1542 int error;
1543
1544 ASSERT_RACCT_ENABLED();
1545
1546 if (inbuflen <= 0)
1547 return (EINVAL);
1548 if (inbuflen > RCTL_MAX_INBUFSIZE)
1549 return (E2BIG);
1550
1551 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1552 error = copyinstr(inbufp, str, inbuflen, NULL);
1553 if (error != 0) {
1554 free(str, M_RCTL);
1555 return (error);
1556 }
1557
1558 *inputstr = str;
1559
1560 return (0);
1561 }
1562
1563 /*
1564 * Routine used by RCTL syscalls to write out output string.
1565 */
1566 static int
1567 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1568 {
1569 int error;
1570
1571 ASSERT_RACCT_ENABLED();
1572
1573 if (outputsbuf == NULL)
1574 return (0);
1575
1576 sbuf_finish(outputsbuf);
1577 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1578 sbuf_delete(outputsbuf);
1579 return (ERANGE);
1580 }
1581 error = copyout(sbuf_data(outputsbuf), outbufp,
1582 sbuf_len(outputsbuf) + 1);
1583 sbuf_delete(outputsbuf);
1584 return (error);
1585 }
1586
1587 static struct sbuf *
1588 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1589 {
1590 struct sbuf *sb;
1591 int64_t amount;
1592 int i;
1593
1594 ASSERT_RACCT_ENABLED();
1595
1596 sb = sbuf_new_auto();
1597 for (i = 0; i <= RACCT_MAX; i++) {
1598 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1599 continue;
1600 RACCT_LOCK();
1601 amount = racct->r_resources[i];
1602 RACCT_UNLOCK();
1603 if (RACCT_IS_IN_MILLIONS(i))
1604 amount /= 1000000;
1605 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1606 }
1607 sbuf_setpos(sb, sbuf_len(sb) - 1);
1608 return (sb);
1609 }
1610
1611 int
1612 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1613 {
1614 struct rctl_rule *filter;
1615 struct sbuf *outputsbuf = NULL;
1616 struct proc *p;
1617 struct uidinfo *uip;
1618 struct loginclass *lc;
1619 struct prison_racct *prr;
1620 char *inputstr;
1621 int error;
1622
1623 if (!racct_enable)
1624 return (ENOSYS);
1625
1626 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1627 if (error != 0)
1628 return (error);
1629
1630 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1631 if (error != 0)
1632 return (error);
1633
1634 sx_slock(&allproc_lock);
1635 error = rctl_string_to_rule(inputstr, &filter);
1636 free(inputstr, M_RCTL);
1637 if (error != 0) {
1638 sx_sunlock(&allproc_lock);
1639 return (error);
1640 }
1641
1642 switch (filter->rr_subject_type) {
1643 case RCTL_SUBJECT_TYPE_PROCESS:
1644 p = filter->rr_subject.rs_proc;
1645 if (p == NULL) {
1646 error = EINVAL;
1647 goto out;
1648 }
1649 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1650 break;
1651 case RCTL_SUBJECT_TYPE_USER:
1652 uip = filter->rr_subject.rs_uip;
1653 if (uip == NULL) {
1654 error = EINVAL;
1655 goto out;
1656 }
1657 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1658 break;
1659 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1660 lc = filter->rr_subject.rs_loginclass;
1661 if (lc == NULL) {
1662 error = EINVAL;
1663 goto out;
1664 }
1665 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1666 break;
1667 case RCTL_SUBJECT_TYPE_JAIL:
1668 prr = filter->rr_subject.rs_prison_racct;
1669 if (prr == NULL) {
1670 error = EINVAL;
1671 goto out;
1672 }
1673 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1674 break;
1675 default:
1676 error = EINVAL;
1677 }
1678 out:
1679 rctl_rule_release(filter);
1680 sx_sunlock(&allproc_lock);
1681 if (error != 0)
1682 return (error);
1683
1684 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1685
1686 return (error);
1687 }
1688
1689 static void
1690 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1691 {
1692 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1693 struct rctl_rule_link *link;
1694 struct sbuf *sb = (struct sbuf *)arg3;
1695
1696 ASSERT_RACCT_ENABLED();
1697 RACCT_LOCK_ASSERT();
1698
1699 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1700 if (!rctl_rule_matches(link->rrl_rule, filter))
1701 continue;
1702 rctl_rule_to_sbuf(sb, link->rrl_rule);
1703 sbuf_printf(sb, ",");
1704 }
1705 }
1706
1707 int
1708 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1709 {
1710 struct sbuf *sb;
1711 struct rctl_rule *filter;
1712 struct rctl_rule_link *link;
1713 struct proc *p;
1714 char *inputstr, *buf;
1715 size_t bufsize;
1716 int error;
1717
1718 if (!racct_enable)
1719 return (ENOSYS);
1720
1721 error = priv_check(td, PRIV_RCTL_GET_RULES);
1722 if (error != 0)
1723 return (error);
1724
1725 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1726 if (error != 0)
1727 return (error);
1728
1729 sx_slock(&allproc_lock);
1730 error = rctl_string_to_rule(inputstr, &filter);
1731 free(inputstr, M_RCTL);
1732 if (error != 0) {
1733 sx_sunlock(&allproc_lock);
1734 return (error);
1735 }
1736
1737 bufsize = uap->outbuflen;
1738 if (bufsize > rctl_maxbufsize) {
1739 sx_sunlock(&allproc_lock);
1740 return (E2BIG);
1741 }
1742
1743 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1744 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1745 KASSERT(sb != NULL, ("sbuf_new failed"));
1746
1747 FOREACH_PROC_IN_SYSTEM(p) {
1748 RACCT_LOCK();
1749 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1750 /*
1751 * Non-process rules will be added to the buffer later.
1752 * Adding them here would result in duplicated output.
1753 */
1754 if (link->rrl_rule->rr_subject_type !=
1755 RCTL_SUBJECT_TYPE_PROCESS)
1756 continue;
1757 if (!rctl_rule_matches(link->rrl_rule, filter))
1758 continue;
1759 rctl_rule_to_sbuf(sb, link->rrl_rule);
1760 sbuf_printf(sb, ",");
1761 }
1762 RACCT_UNLOCK();
1763 }
1764
1765 loginclass_racct_foreach(rctl_get_rules_callback,
1766 rctl_rule_pre_callback, rctl_rule_post_callback,
1767 filter, sb);
1768 ui_racct_foreach(rctl_get_rules_callback,
1769 rctl_rule_pre_callback, rctl_rule_post_callback,
1770 filter, sb);
1771 prison_racct_foreach(rctl_get_rules_callback,
1772 rctl_rule_pre_callback, rctl_rule_post_callback,
1773 filter, sb);
1774 if (sbuf_error(sb) == ENOMEM) {
1775 error = ERANGE;
1776 goto out;
1777 }
1778
1779 /*
1780 * Remove trailing ",".
1781 */
1782 if (sbuf_len(sb) > 0)
1783 sbuf_setpos(sb, sbuf_len(sb) - 1);
1784
1785 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1786 out:
1787 rctl_rule_release(filter);
1788 sx_sunlock(&allproc_lock);
1789 free(buf, M_RCTL);
1790 return (error);
1791 }
1792
1793 int
1794 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1795 {
1796 struct sbuf *sb;
1797 struct rctl_rule *filter;
1798 struct rctl_rule_link *link;
1799 char *inputstr, *buf;
1800 size_t bufsize;
1801 int error;
1802
1803 if (!racct_enable)
1804 return (ENOSYS);
1805
1806 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1807 if (error != 0)
1808 return (error);
1809
1810 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1811 if (error != 0)
1812 return (error);
1813
1814 sx_slock(&allproc_lock);
1815 error = rctl_string_to_rule(inputstr, &filter);
1816 free(inputstr, M_RCTL);
1817 if (error != 0) {
1818 sx_sunlock(&allproc_lock);
1819 return (error);
1820 }
1821
1822 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1823 rctl_rule_release(filter);
1824 sx_sunlock(&allproc_lock);
1825 return (EINVAL);
1826 }
1827 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1828 rctl_rule_release(filter);
1829 sx_sunlock(&allproc_lock);
1830 return (EOPNOTSUPP);
1831 }
1832 if (filter->rr_subject.rs_proc == NULL) {
1833 rctl_rule_release(filter);
1834 sx_sunlock(&allproc_lock);
1835 return (EINVAL);
1836 }
1837
1838 bufsize = uap->outbuflen;
1839 if (bufsize > rctl_maxbufsize) {
1840 rctl_rule_release(filter);
1841 sx_sunlock(&allproc_lock);
1842 return (E2BIG);
1843 }
1844
1845 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1846 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1847 KASSERT(sb != NULL, ("sbuf_new failed"));
1848
1849 RACCT_LOCK();
1850 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1851 rrl_next) {
1852 rctl_rule_to_sbuf(sb, link->rrl_rule);
1853 sbuf_printf(sb, ",");
1854 }
1855 RACCT_UNLOCK();
1856 if (sbuf_error(sb) == ENOMEM) {
1857 error = ERANGE;
1858 sbuf_delete(sb);
1859 goto out;
1860 }
1861
1862 /*
1863 * Remove trailing ",".
1864 */
1865 if (sbuf_len(sb) > 0)
1866 sbuf_setpos(sb, sbuf_len(sb) - 1);
1867
1868 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1869 out:
1870 rctl_rule_release(filter);
1871 sx_sunlock(&allproc_lock);
1872 free(buf, M_RCTL);
1873 return (error);
1874 }
1875
1876 int
1877 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1878 {
1879 struct rctl_rule *rule;
1880 char *inputstr;
1881 int error;
1882
1883 if (!racct_enable)
1884 return (ENOSYS);
1885
1886 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1887 if (error != 0)
1888 return (error);
1889
1890 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1891 if (error != 0)
1892 return (error);
1893
1894 sx_slock(&allproc_lock);
1895 error = rctl_string_to_rule(inputstr, &rule);
1896 free(inputstr, M_RCTL);
1897 if (error != 0) {
1898 sx_sunlock(&allproc_lock);
1899 return (error);
1900 }
1901 /*
1902 * The 'per' part of a rule is optional.
1903 */
1904 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1905 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1906 rule->rr_per = rule->rr_subject_type;
1907
1908 if (!rctl_rule_fully_specified(rule)) {
1909 error = EINVAL;
1910 goto out;
1911 }
1912
1913 error = rctl_rule_add(rule);
1914
1915 out:
1916 rctl_rule_release(rule);
1917 sx_sunlock(&allproc_lock);
1918 return (error);
1919 }
1920
1921 int
1922 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1923 {
1924 struct rctl_rule *filter;
1925 char *inputstr;
1926 int error;
1927
1928 if (!racct_enable)
1929 return (ENOSYS);
1930
1931 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1932 if (error != 0)
1933 return (error);
1934
1935 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1936 if (error != 0)
1937 return (error);
1938
1939 sx_slock(&allproc_lock);
1940 error = rctl_string_to_rule(inputstr, &filter);
1941 free(inputstr, M_RCTL);
1942 if (error != 0) {
1943 sx_sunlock(&allproc_lock);
1944 return (error);
1945 }
1946
1947 error = rctl_rule_remove(filter);
1948 rctl_rule_release(filter);
1949 sx_sunlock(&allproc_lock);
1950
1951 return (error);
1952 }
1953
1954 /*
1955 * Update RCTL rule list after credential change.
1956 */
1957 void
1958 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1959 {
1960 LIST_HEAD(, rctl_rule_link) newrules;
1961 struct rctl_rule_link *link, *newlink;
1962 struct uidinfo *newuip;
1963 struct loginclass *newlc;
1964 struct prison_racct *newprr;
1965 int rulecnt, i;
1966
1967 if (!racct_enable)
1968 return;
1969
1970 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1971
1972 newuip = newcred->cr_ruidinfo;
1973 newlc = newcred->cr_loginclass;
1974 newprr = newcred->cr_prison->pr_prison_racct;
1975
1976 LIST_INIT(&newrules);
1977
1978 again:
1979 /*
1980 * First, count the rules that apply to the process with new
1981 * credentials.
1982 */
1983 rulecnt = 0;
1984 RACCT_LOCK();
1985 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1986 if (link->rrl_rule->rr_subject_type ==
1987 RCTL_SUBJECT_TYPE_PROCESS)
1988 rulecnt++;
1989 }
1990 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1991 rulecnt++;
1992 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1993 rulecnt++;
1994 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1995 rulecnt++;
1996 RACCT_UNLOCK();
1997
1998 /*
1999 * Create temporary list. We've dropped the rctl_lock in order
2000 * to use M_WAITOK.
2001 */
2002 for (i = 0; i < rulecnt; i++) {
2003 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
2004 newlink->rrl_rule = NULL;
2005 newlink->rrl_exceeded = 0;
2006 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2007 }
2008
2009 newlink = LIST_FIRST(&newrules);
2010
2011 /*
2012 * Assign rules to the newly allocated list entries.
2013 */
2014 RACCT_LOCK();
2015 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2016 if (link->rrl_rule->rr_subject_type ==
2017 RCTL_SUBJECT_TYPE_PROCESS) {
2018 if (newlink == NULL)
2019 goto goaround;
2020 rctl_rule_acquire(link->rrl_rule);
2021 newlink->rrl_rule = link->rrl_rule;
2022 newlink->rrl_exceeded = link->rrl_exceeded;
2023 newlink = LIST_NEXT(newlink, rrl_next);
2024 rulecnt--;
2025 }
2026 }
2027
2028 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2029 if (newlink == NULL)
2030 goto goaround;
2031 rctl_rule_acquire(link->rrl_rule);
2032 newlink->rrl_rule = link->rrl_rule;
2033 newlink->rrl_exceeded = link->rrl_exceeded;
2034 newlink = LIST_NEXT(newlink, rrl_next);
2035 rulecnt--;
2036 }
2037
2038 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2039 if (newlink == NULL)
2040 goto goaround;
2041 rctl_rule_acquire(link->rrl_rule);
2042 newlink->rrl_rule = link->rrl_rule;
2043 newlink->rrl_exceeded = link->rrl_exceeded;
2044 newlink = LIST_NEXT(newlink, rrl_next);
2045 rulecnt--;
2046 }
2047
2048 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2049 if (newlink == NULL)
2050 goto goaround;
2051 rctl_rule_acquire(link->rrl_rule);
2052 newlink->rrl_rule = link->rrl_rule;
2053 newlink->rrl_exceeded = link->rrl_exceeded;
2054 newlink = LIST_NEXT(newlink, rrl_next);
2055 rulecnt--;
2056 }
2057
2058 if (rulecnt == 0) {
2059 /*
2060 * Free the old rule list.
2061 */
2062 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2063 link = LIST_FIRST(&p->p_racct->r_rule_links);
2064 LIST_REMOVE(link, rrl_next);
2065 rctl_rule_release(link->rrl_rule);
2066 uma_zfree(rctl_rule_link_zone, link);
2067 }
2068
2069 /*
2070 * Replace lists and we're done.
2071 *
2072 * XXX: Is there any way to switch list heads instead
2073 * of iterating here?
2074 */
2075 while (!LIST_EMPTY(&newrules)) {
2076 newlink = LIST_FIRST(&newrules);
2077 LIST_REMOVE(newlink, rrl_next);
2078 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2079 newlink, rrl_next);
2080 }
2081
2082 RACCT_UNLOCK();
2083
2084 return;
2085 }
2086
2087 goaround:
2088 RACCT_UNLOCK();
2089
2090 /*
2091 * Rule list changed while we were not holding the rctl_lock.
2092 * Free the new list and try again.
2093 */
2094 while (!LIST_EMPTY(&newrules)) {
2095 newlink = LIST_FIRST(&newrules);
2096 LIST_REMOVE(newlink, rrl_next);
2097 if (newlink->rrl_rule != NULL)
2098 rctl_rule_release(newlink->rrl_rule);
2099 uma_zfree(rctl_rule_link_zone, newlink);
2100 }
2101
2102 goto again;
2103 }
2104
2105 /*
2106 * Assign RCTL rules to the newly created process.
2107 */
2108 int
2109 rctl_proc_fork(struct proc *parent, struct proc *child)
2110 {
2111 struct rctl_rule *rule;
2112 struct rctl_rule_link *link;
2113 int error;
2114
2115 ASSERT_RACCT_ENABLED();
2116 RACCT_LOCK_ASSERT();
2117 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2118
2119 LIST_INIT(&child->p_racct->r_rule_links);
2120
2121 /*
2122 * Go through limits applicable to the parent and assign them
2123 * to the child. Rules with 'process' subject have to be duplicated
2124 * in order to make their rr_subject point to the new process.
2125 */
2126 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2127 if (link->rrl_rule->rr_subject_type ==
2128 RCTL_SUBJECT_TYPE_PROCESS) {
2129 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2130 if (rule == NULL)
2131 goto fail;
2132 KASSERT(rule->rr_subject.rs_proc == parent,
2133 ("rule->rr_subject.rs_proc != parent"));
2134 rule->rr_subject.rs_proc = child;
2135 error = rctl_racct_add_rule_locked(child->p_racct,
2136 rule);
2137 rctl_rule_release(rule);
2138 if (error != 0)
2139 goto fail;
2140 } else {
2141 error = rctl_racct_add_rule_locked(child->p_racct,
2142 link->rrl_rule);
2143 if (error != 0)
2144 goto fail;
2145 }
2146 }
2147
2148 return (0);
2149
2150 fail:
2151 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2152 link = LIST_FIRST(&child->p_racct->r_rule_links);
2153 LIST_REMOVE(link, rrl_next);
2154 rctl_rule_release(link->rrl_rule);
2155 uma_zfree(rctl_rule_link_zone, link);
2156 }
2157
2158 return (EAGAIN);
2159 }
2160
2161 /*
2162 * Release rules attached to the racct.
2163 */
2164 void
2165 rctl_racct_release(struct racct *racct)
2166 {
2167 struct rctl_rule_link *link;
2168
2169 ASSERT_RACCT_ENABLED();
2170 RACCT_LOCK_ASSERT();
2171
2172 while (!LIST_EMPTY(&racct->r_rule_links)) {
2173 link = LIST_FIRST(&racct->r_rule_links);
2174 LIST_REMOVE(link, rrl_next);
2175 rctl_rule_release(link->rrl_rule);
2176 uma_zfree(rctl_rule_link_zone, link);
2177 }
2178 }
2179
2180 static void
2181 rctl_init(void)
2182 {
2183
2184 if (!racct_enable)
2185 return;
2186
2187 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2188 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2189 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2190 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2191 UMA_ALIGN_PTR, 0);
2192
2193 /*
2194 * Set default values, making sure not to overwrite the ones
2195 * fetched from tunables. Most of those could be set at the
2196 * declaration, except for the rctl_throttle_max - we cannot
2197 * set it there due to hz not being compile time constant.
2198 */
2199 if (rctl_throttle_min < 1)
2200 rctl_throttle_min = 1;
2201 if (rctl_throttle_max < rctl_throttle_min)
2202 rctl_throttle_max = 2 * hz;
2203 if (rctl_throttle_pct < 0)
2204 rctl_throttle_pct = 100;
2205 if (rctl_throttle_pct2 < 0)
2206 rctl_throttle_pct2 = 100;
2207 }
2208
2209 #else /* !RCTL */
2210
2211 int
2212 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2213 {
2214
2215 return (ENOSYS);
2216 }
2217
2218 int
2219 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2220 {
2221
2222 return (ENOSYS);
2223 }
2224
2225 int
2226 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2227 {
2228
2229 return (ENOSYS);
2230 }
2231
2232 int
2233 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2234 {
2235
2236 return (ENOSYS);
2237 }
2238
2239 int
2240 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
2241 {
2242
2243 return (ENOSYS);
2244 }
2245
2246 #endif /* !RCTL */
Cache object: 9276034ee1e8b684fbbc2dcf4e7375dc
|