1 /*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: releng/8.4/sys/kern/kern_resource.c 230070 2012-01-13 18:58:31Z jhb $");
39
40 #include "opt_compat.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/sysproto.h>
45 #include <sys/file.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/refcount.h>
53 #include <sys/resourcevar.h>
54 #include <sys/rwlock.h>
55 #include <sys/sched.h>
56 #include <sys/sx.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysctl.h>
59 #include <sys/sysent.h>
60 #include <sys/time.h>
61 #include <sys/umtx.h>
62
63 #include <vm/vm.h>
64 #include <vm/vm_param.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_map.h>
67
68
69 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
70 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
71 #define UIHASH(uid) (&uihashtbl[(uid) & uihash])
72 static struct rwlock uihashtbl_lock;
73 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
74 static u_long uihash; /* size of hash table - 1 */
75
76 static void calcru1(struct proc *p, struct rusage_ext *ruxp,
77 struct timeval *up, struct timeval *sp);
78 static int donice(struct thread *td, struct proc *chgp, int n);
79 static struct uidinfo *uilookup(uid_t uid);
80 static void ruxagg_locked(struct rusage_ext *rux, struct thread *td);
81
82 /*
83 * Resource controls and accounting.
84 */
85 #ifndef _SYS_SYSPROTO_H_
86 struct getpriority_args {
87 int which;
88 int who;
89 };
90 #endif
91 int
92 getpriority(td, uap)
93 struct thread *td;
94 register struct getpriority_args *uap;
95 {
96 struct proc *p;
97 struct pgrp *pg;
98 int error, low;
99
100 error = 0;
101 low = PRIO_MAX + 1;
102 switch (uap->which) {
103
104 case PRIO_PROCESS:
105 if (uap->who == 0)
106 low = td->td_proc->p_nice;
107 else {
108 p = pfind(uap->who);
109 if (p == NULL)
110 break;
111 if (p_cansee(td, p) == 0)
112 low = p->p_nice;
113 PROC_UNLOCK(p);
114 }
115 break;
116
117 case PRIO_PGRP:
118 sx_slock(&proctree_lock);
119 if (uap->who == 0) {
120 pg = td->td_proc->p_pgrp;
121 PGRP_LOCK(pg);
122 } else {
123 pg = pgfind(uap->who);
124 if (pg == NULL) {
125 sx_sunlock(&proctree_lock);
126 break;
127 }
128 }
129 sx_sunlock(&proctree_lock);
130 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
131 PROC_LOCK(p);
132 if (p->p_state == PRS_NORMAL &&
133 p_cansee(td, p) == 0) {
134 if (p->p_nice < low)
135 low = p->p_nice;
136 }
137 PROC_UNLOCK(p);
138 }
139 PGRP_UNLOCK(pg);
140 break;
141
142 case PRIO_USER:
143 if (uap->who == 0)
144 uap->who = td->td_ucred->cr_uid;
145 sx_slock(&allproc_lock);
146 FOREACH_PROC_IN_SYSTEM(p) {
147 PROC_LOCK(p);
148 if (p->p_state == PRS_NORMAL &&
149 p_cansee(td, p) == 0 &&
150 p->p_ucred->cr_uid == uap->who) {
151 if (p->p_nice < low)
152 low = p->p_nice;
153 }
154 PROC_UNLOCK(p);
155 }
156 sx_sunlock(&allproc_lock);
157 break;
158
159 default:
160 error = EINVAL;
161 break;
162 }
163 if (low == PRIO_MAX + 1 && error == 0)
164 error = ESRCH;
165 td->td_retval[0] = low;
166 return (error);
167 }
168
169 #ifndef _SYS_SYSPROTO_H_
170 struct setpriority_args {
171 int which;
172 int who;
173 int prio;
174 };
175 #endif
176 int
177 setpriority(td, uap)
178 struct thread *td;
179 struct setpriority_args *uap;
180 {
181 struct proc *curp, *p;
182 struct pgrp *pg;
183 int found = 0, error = 0;
184
185 curp = td->td_proc;
186 switch (uap->which) {
187 case PRIO_PROCESS:
188 if (uap->who == 0) {
189 PROC_LOCK(curp);
190 error = donice(td, curp, uap->prio);
191 PROC_UNLOCK(curp);
192 } else {
193 p = pfind(uap->who);
194 if (p == NULL)
195 break;
196 error = p_cansee(td, p);
197 if (error == 0)
198 error = donice(td, p, uap->prio);
199 PROC_UNLOCK(p);
200 }
201 found++;
202 break;
203
204 case PRIO_PGRP:
205 sx_slock(&proctree_lock);
206 if (uap->who == 0) {
207 pg = curp->p_pgrp;
208 PGRP_LOCK(pg);
209 } else {
210 pg = pgfind(uap->who);
211 if (pg == NULL) {
212 sx_sunlock(&proctree_lock);
213 break;
214 }
215 }
216 sx_sunlock(&proctree_lock);
217 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
218 PROC_LOCK(p);
219 if (p->p_state == PRS_NORMAL &&
220 p_cansee(td, p) == 0) {
221 error = donice(td, p, uap->prio);
222 found++;
223 }
224 PROC_UNLOCK(p);
225 }
226 PGRP_UNLOCK(pg);
227 break;
228
229 case PRIO_USER:
230 if (uap->who == 0)
231 uap->who = td->td_ucred->cr_uid;
232 sx_slock(&allproc_lock);
233 FOREACH_PROC_IN_SYSTEM(p) {
234 PROC_LOCK(p);
235 if (p->p_state == PRS_NORMAL &&
236 p->p_ucred->cr_uid == uap->who &&
237 p_cansee(td, p) == 0) {
238 error = donice(td, p, uap->prio);
239 found++;
240 }
241 PROC_UNLOCK(p);
242 }
243 sx_sunlock(&allproc_lock);
244 break;
245
246 default:
247 error = EINVAL;
248 break;
249 }
250 if (found == 0 && error == 0)
251 error = ESRCH;
252 return (error);
253 }
254
255 /*
256 * Set "nice" for a (whole) process.
257 */
258 static int
259 donice(struct thread *td, struct proc *p, int n)
260 {
261 int error;
262
263 PROC_LOCK_ASSERT(p, MA_OWNED);
264 if ((error = p_cansched(td, p)))
265 return (error);
266 if (n > PRIO_MAX)
267 n = PRIO_MAX;
268 if (n < PRIO_MIN)
269 n = PRIO_MIN;
270 if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
271 return (EACCES);
272 sched_nice(p, n);
273 return (0);
274 }
275
276 static int unprivileged_idprio;
277 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW,
278 &unprivileged_idprio, 0, "Allow non-root users to set an idle priority");
279
280 /*
281 * Set realtime priority for LWP.
282 */
283 #ifndef _SYS_SYSPROTO_H_
284 struct rtprio_thread_args {
285 int function;
286 lwpid_t lwpid;
287 struct rtprio *rtp;
288 };
289 #endif
290 int
291 rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
292 {
293 struct proc *p;
294 struct rtprio rtp;
295 struct thread *td1;
296 int cierror, error;
297
298 /* Perform copyin before acquiring locks if needed. */
299 if (uap->function == RTP_SET)
300 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
301 else
302 cierror = 0;
303
304 /*
305 * Though lwpid is unique, only current process is supported
306 * since there is no efficient way to look up a LWP yet.
307 */
308 p = td->td_proc;
309 PROC_LOCK(p);
310
311 switch (uap->function) {
312 case RTP_LOOKUP:
313 if ((error = p_cansee(td, p)))
314 break;
315 if (uap->lwpid == 0 || uap->lwpid == td->td_tid)
316 td1 = td;
317 else
318 td1 = thread_find(p, uap->lwpid);
319 if (td1 != NULL)
320 pri_to_rtp(td1, &rtp);
321 else
322 error = ESRCH;
323 PROC_UNLOCK(p);
324 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
325 case RTP_SET:
326 if ((error = p_cansched(td, p)) || (error = cierror))
327 break;
328
329 /* Disallow setting rtprio in most cases if not superuser. */
330
331 /*
332 * Realtime priority has to be restricted for reasons which
333 * should be obvious. However, for idleprio processes, there is
334 * a potential for system deadlock if an idleprio process gains
335 * a lock on a resource that other processes need (and the
336 * idleprio process can't run due to a CPU-bound normal
337 * process). Fix me! XXX
338 *
339 * This problem is not only related to idleprio process.
340 * A user level program can obtain a file lock and hold it
341 * indefinitely. Additionally, without idleprio processes it is
342 * still conceivable that a program with low priority will never
343 * get to run. In short, allowing this feature might make it
344 * easier to lock a resource indefinitely, but it is not the
345 * only thing that makes it possible.
346 */
347 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
348 (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
349 unprivileged_idprio == 0)) {
350 error = priv_check(td, PRIV_SCHED_RTPRIO);
351 if (error)
352 break;
353 }
354
355 if (uap->lwpid == 0 || uap->lwpid == td->td_tid)
356 td1 = td;
357 else
358 td1 = thread_find(p, uap->lwpid);
359 if (td1 != NULL)
360 error = rtp_to_pri(&rtp, td1);
361 else
362 error = ESRCH;
363 break;
364 default:
365 error = EINVAL;
366 break;
367 }
368 PROC_UNLOCK(p);
369 return (error);
370 }
371
372 /*
373 * Set realtime priority.
374 */
375 #ifndef _SYS_SYSPROTO_H_
376 struct rtprio_args {
377 int function;
378 pid_t pid;
379 struct rtprio *rtp;
380 };
381 #endif
382 int
383 rtprio(td, uap)
384 struct thread *td; /* curthread */
385 register struct rtprio_args *uap;
386 {
387 struct proc *p;
388 struct thread *tdp;
389 struct rtprio rtp;
390 int cierror, error;
391
392 /* Perform copyin before acquiring locks if needed. */
393 if (uap->function == RTP_SET)
394 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
395 else
396 cierror = 0;
397
398 if (uap->pid == 0) {
399 p = td->td_proc;
400 PROC_LOCK(p);
401 } else {
402 p = pfind(uap->pid);
403 if (p == NULL)
404 return (ESRCH);
405 }
406
407 switch (uap->function) {
408 case RTP_LOOKUP:
409 if ((error = p_cansee(td, p)))
410 break;
411 /*
412 * Return OUR priority if no pid specified,
413 * or if one is, report the highest priority
414 * in the process. There isn't much more you can do as
415 * there is only room to return a single priority.
416 * Note: specifying our own pid is not the same
417 * as leaving it zero.
418 */
419 if (uap->pid == 0) {
420 pri_to_rtp(td, &rtp);
421 } else {
422 struct rtprio rtp2;
423
424 rtp.type = RTP_PRIO_IDLE;
425 rtp.prio = RTP_PRIO_MAX;
426 FOREACH_THREAD_IN_PROC(p, tdp) {
427 pri_to_rtp(tdp, &rtp2);
428 if (rtp2.type < rtp.type ||
429 (rtp2.type == rtp.type &&
430 rtp2.prio < rtp.prio)) {
431 rtp.type = rtp2.type;
432 rtp.prio = rtp2.prio;
433 }
434 }
435 }
436 PROC_UNLOCK(p);
437 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
438 case RTP_SET:
439 if ((error = p_cansched(td, p)) || (error = cierror))
440 break;
441
442 /*
443 * Disallow setting rtprio in most cases if not superuser.
444 * See the comment in sys_rtprio_thread about idprio
445 * threads holding a lock.
446 */
447 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
448 (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
449 !unprivileged_idprio)) {
450 error = priv_check(td, PRIV_SCHED_RTPRIO);
451 if (error)
452 break;
453 }
454
455 /*
456 * If we are setting our own priority, set just our
457 * thread but if we are doing another process,
458 * do all the threads on that process. If we
459 * specify our own pid we do the latter.
460 */
461 if (uap->pid == 0) {
462 error = rtp_to_pri(&rtp, td);
463 } else {
464 FOREACH_THREAD_IN_PROC(p, td) {
465 if ((error = rtp_to_pri(&rtp, td)) != 0)
466 break;
467 }
468 }
469 break;
470 default:
471 error = EINVAL;
472 break;
473 }
474 PROC_UNLOCK(p);
475 return (error);
476 }
477
478 int
479 rtp_to_pri(struct rtprio *rtp, struct thread *td)
480 {
481 u_char newpri;
482 u_char oldpri;
483
484 switch (RTP_PRIO_BASE(rtp->type)) {
485 case RTP_PRIO_REALTIME:
486 if (rtp->prio > RTP_PRIO_MAX)
487 return (EINVAL);
488 newpri = PRI_MIN_REALTIME + rtp->prio;
489 break;
490 case RTP_PRIO_NORMAL:
491 if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE))
492 return (EINVAL);
493 newpri = PRI_MIN_TIMESHARE + rtp->prio;
494 break;
495 case RTP_PRIO_IDLE:
496 if (rtp->prio > RTP_PRIO_MAX)
497 return (EINVAL);
498 newpri = PRI_MIN_IDLE + rtp->prio;
499 break;
500 default:
501 return (EINVAL);
502 }
503
504 thread_lock(td);
505 sched_class(td, rtp->type); /* XXX fix */
506 oldpri = td->td_user_pri;
507 sched_user_prio(td, newpri);
508 if (td->td_user_pri != oldpri && (td == curthread ||
509 td->td_priority == oldpri || td->td_user_pri <= PRI_MAX_REALTIME))
510 sched_prio(td, td->td_user_pri);
511 if (TD_ON_UPILOCK(td) && oldpri != newpri) {
512 thread_unlock(td);
513 umtx_pi_adjust(td, oldpri);
514 } else
515 thread_unlock(td);
516 return (0);
517 }
518
519 void
520 pri_to_rtp(struct thread *td, struct rtprio *rtp)
521 {
522
523 thread_lock(td);
524 switch (PRI_BASE(td->td_pri_class)) {
525 case PRI_REALTIME:
526 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
527 break;
528 case PRI_TIMESHARE:
529 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE;
530 break;
531 case PRI_IDLE:
532 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE;
533 break;
534 default:
535 break;
536 }
537 rtp->type = td->td_pri_class;
538 thread_unlock(td);
539 }
540
541 #if defined(COMPAT_43)
542 #ifndef _SYS_SYSPROTO_H_
543 struct osetrlimit_args {
544 u_int which;
545 struct orlimit *rlp;
546 };
547 #endif
548 int
549 osetrlimit(td, uap)
550 struct thread *td;
551 register struct osetrlimit_args *uap;
552 {
553 struct orlimit olim;
554 struct rlimit lim;
555 int error;
556
557 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
558 return (error);
559 lim.rlim_cur = olim.rlim_cur;
560 lim.rlim_max = olim.rlim_max;
561 error = kern_setrlimit(td, uap->which, &lim);
562 return (error);
563 }
564
565 #ifndef _SYS_SYSPROTO_H_
566 struct ogetrlimit_args {
567 u_int which;
568 struct orlimit *rlp;
569 };
570 #endif
571 int
572 ogetrlimit(td, uap)
573 struct thread *td;
574 register struct ogetrlimit_args *uap;
575 {
576 struct orlimit olim;
577 struct rlimit rl;
578 struct proc *p;
579 int error;
580
581 if (uap->which >= RLIM_NLIMITS)
582 return (EINVAL);
583 p = td->td_proc;
584 PROC_LOCK(p);
585 lim_rlimit(p, uap->which, &rl);
586 PROC_UNLOCK(p);
587
588 /*
589 * XXX would be more correct to convert only RLIM_INFINITY to the
590 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
591 * values. Most 64->32 and 32->16 conversions, including not
592 * unimportant ones of uids are even more broken than what we
593 * do here (they blindly truncate). We don't do this correctly
594 * here since we have little experience with EOVERFLOW yet.
595 * Elsewhere, getuid() can't fail...
596 */
597 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
598 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
599 error = copyout(&olim, uap->rlp, sizeof(olim));
600 return (error);
601 }
602 #endif /* COMPAT_43 */
603
604 #ifndef _SYS_SYSPROTO_H_
605 struct __setrlimit_args {
606 u_int which;
607 struct rlimit *rlp;
608 };
609 #endif
610 int
611 setrlimit(td, uap)
612 struct thread *td;
613 register struct __setrlimit_args *uap;
614 {
615 struct rlimit alim;
616 int error;
617
618 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
619 return (error);
620 error = kern_setrlimit(td, uap->which, &alim);
621 return (error);
622 }
623
624 static void
625 lim_cb(void *arg)
626 {
627 struct rlimit rlim;
628 struct thread *td;
629 struct proc *p;
630
631 p = arg;
632 PROC_LOCK_ASSERT(p, MA_OWNED);
633 /*
634 * Check if the process exceeds its cpu resource allocation. If
635 * it reaches the max, arrange to kill the process in ast().
636 */
637 if (p->p_cpulimit == RLIM_INFINITY)
638 return;
639 PROC_SLOCK(p);
640 FOREACH_THREAD_IN_PROC(p, td) {
641 ruxagg(p, td);
642 }
643 PROC_SUNLOCK(p);
644 if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
645 lim_rlimit(p, RLIMIT_CPU, &rlim);
646 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
647 killproc(p, "exceeded maximum CPU limit");
648 } else {
649 if (p->p_cpulimit < rlim.rlim_max)
650 p->p_cpulimit += 5;
651 psignal(p, SIGXCPU);
652 }
653 }
654 if ((p->p_flag & P_WEXIT) == 0)
655 callout_reset(&p->p_limco, hz, lim_cb, p);
656 }
657
658 int
659 kern_setrlimit(td, which, limp)
660 struct thread *td;
661 u_int which;
662 struct rlimit *limp;
663 {
664 struct plimit *newlim, *oldlim;
665 struct proc *p;
666 register struct rlimit *alimp;
667 struct rlimit oldssiz;
668 int error;
669
670 if (which >= RLIM_NLIMITS)
671 return (EINVAL);
672
673 /*
674 * Preserve historical bugs by treating negative limits as unsigned.
675 */
676 if (limp->rlim_cur < 0)
677 limp->rlim_cur = RLIM_INFINITY;
678 if (limp->rlim_max < 0)
679 limp->rlim_max = RLIM_INFINITY;
680
681 oldssiz.rlim_cur = 0;
682 p = td->td_proc;
683 newlim = lim_alloc();
684 PROC_LOCK(p);
685 oldlim = p->p_limit;
686 alimp = &oldlim->pl_rlimit[which];
687 if (limp->rlim_cur > alimp->rlim_max ||
688 limp->rlim_max > alimp->rlim_max)
689 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
690 PROC_UNLOCK(p);
691 lim_free(newlim);
692 return (error);
693 }
694 if (limp->rlim_cur > limp->rlim_max)
695 limp->rlim_cur = limp->rlim_max;
696 lim_copy(newlim, oldlim);
697 alimp = &newlim->pl_rlimit[which];
698
699 switch (which) {
700
701 case RLIMIT_CPU:
702 if (limp->rlim_cur != RLIM_INFINITY &&
703 p->p_cpulimit == RLIM_INFINITY)
704 callout_reset(&p->p_limco, hz, lim_cb, p);
705 p->p_cpulimit = limp->rlim_cur;
706 break;
707 case RLIMIT_DATA:
708 if (limp->rlim_cur > maxdsiz)
709 limp->rlim_cur = maxdsiz;
710 if (limp->rlim_max > maxdsiz)
711 limp->rlim_max = maxdsiz;
712 break;
713
714 case RLIMIT_STACK:
715 if (limp->rlim_cur > maxssiz)
716 limp->rlim_cur = maxssiz;
717 if (limp->rlim_max > maxssiz)
718 limp->rlim_max = maxssiz;
719 oldssiz = *alimp;
720 if (td->td_proc->p_sysent->sv_fixlimit != NULL)
721 td->td_proc->p_sysent->sv_fixlimit(&oldssiz,
722 RLIMIT_STACK);
723 break;
724
725 case RLIMIT_NOFILE:
726 if (limp->rlim_cur > maxfilesperproc)
727 limp->rlim_cur = maxfilesperproc;
728 if (limp->rlim_max > maxfilesperproc)
729 limp->rlim_max = maxfilesperproc;
730 break;
731
732 case RLIMIT_NPROC:
733 if (limp->rlim_cur > maxprocperuid)
734 limp->rlim_cur = maxprocperuid;
735 if (limp->rlim_max > maxprocperuid)
736 limp->rlim_max = maxprocperuid;
737 if (limp->rlim_cur < 1)
738 limp->rlim_cur = 1;
739 if (limp->rlim_max < 1)
740 limp->rlim_max = 1;
741 break;
742 }
743 if (td->td_proc->p_sysent->sv_fixlimit != NULL)
744 td->td_proc->p_sysent->sv_fixlimit(limp, which);
745 *alimp = *limp;
746 p->p_limit = newlim;
747 PROC_UNLOCK(p);
748 lim_free(oldlim);
749
750 if (which == RLIMIT_STACK) {
751 /*
752 * Stack is allocated to the max at exec time with only
753 * "rlim_cur" bytes accessible. If stack limit is going
754 * up make more accessible, if going down make inaccessible.
755 */
756 if (limp->rlim_cur != oldssiz.rlim_cur) {
757 vm_offset_t addr;
758 vm_size_t size;
759 vm_prot_t prot;
760
761 if (limp->rlim_cur > oldssiz.rlim_cur) {
762 prot = p->p_sysent->sv_stackprot;
763 size = limp->rlim_cur - oldssiz.rlim_cur;
764 addr = p->p_sysent->sv_usrstack -
765 limp->rlim_cur;
766 } else {
767 prot = VM_PROT_NONE;
768 size = oldssiz.rlim_cur - limp->rlim_cur;
769 addr = p->p_sysent->sv_usrstack -
770 oldssiz.rlim_cur;
771 }
772 addr = trunc_page(addr);
773 size = round_page(size);
774 (void)vm_map_protect(&p->p_vmspace->vm_map,
775 addr, addr + size, prot, FALSE);
776 }
777 }
778
779 return (0);
780 }
781
782 #ifndef _SYS_SYSPROTO_H_
783 struct __getrlimit_args {
784 u_int which;
785 struct rlimit *rlp;
786 };
787 #endif
788 /* ARGSUSED */
789 int
790 getrlimit(td, uap)
791 struct thread *td;
792 register struct __getrlimit_args *uap;
793 {
794 struct rlimit rlim;
795 struct proc *p;
796 int error;
797
798 if (uap->which >= RLIM_NLIMITS)
799 return (EINVAL);
800 p = td->td_proc;
801 PROC_LOCK(p);
802 lim_rlimit(p, uap->which, &rlim);
803 PROC_UNLOCK(p);
804 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
805 return (error);
806 }
807
808 /*
809 * Transform the running time and tick information for children of proc p
810 * into user and system time usage.
811 */
812 void
813 calccru(p, up, sp)
814 struct proc *p;
815 struct timeval *up;
816 struct timeval *sp;
817 {
818
819 PROC_LOCK_ASSERT(p, MA_OWNED);
820 calcru1(p, &p->p_crux, up, sp);
821 }
822
823 /*
824 * Transform the running time and tick information in proc p into user
825 * and system time usage. If appropriate, include the current time slice
826 * on this CPU.
827 */
828 void
829 calcru(struct proc *p, struct timeval *up, struct timeval *sp)
830 {
831 struct thread *td;
832 uint64_t u;
833
834 PROC_LOCK_ASSERT(p, MA_OWNED);
835 PROC_SLOCK_ASSERT(p, MA_OWNED);
836 /*
837 * If we are getting stats for the current process, then add in the
838 * stats that this thread has accumulated in its current time slice.
839 * We reset the thread and CPU state as if we had performed a context
840 * switch right here.
841 */
842 td = curthread;
843 if (td->td_proc == p) {
844 u = cpu_ticks();
845 p->p_rux.rux_runtime += u - PCPU_GET(switchtime);
846 PCPU_SET(switchtime, u);
847 }
848 /* Make sure the per-thread stats are current. */
849 FOREACH_THREAD_IN_PROC(p, td) {
850 if (td->td_incruntime == 0)
851 continue;
852 ruxagg(p, td);
853 }
854 calcru1(p, &p->p_rux, up, sp);
855 }
856
857 static void
858 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
859 struct timeval *sp)
860 {
861 /* {user, system, interrupt, total} {ticks, usec}: */
862 u_int64_t ut, uu, st, su, it, tt, tu;
863
864 ut = ruxp->rux_uticks;
865 st = ruxp->rux_sticks;
866 it = ruxp->rux_iticks;
867 tt = ut + st + it;
868 if (tt == 0) {
869 /* Avoid divide by zero */
870 st = 1;
871 tt = 1;
872 }
873 tu = cputick2usec(ruxp->rux_runtime);
874 if ((int64_t)tu < 0) {
875 /* XXX: this should be an assert /phk */
876 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
877 (intmax_t)tu, p->p_pid, p->p_comm);
878 tu = ruxp->rux_tu;
879 }
880
881 if (tu >= ruxp->rux_tu) {
882 /*
883 * The normal case, time increased.
884 * Enforce monotonicity of bucketed numbers.
885 */
886 uu = (tu * ut) / tt;
887 if (uu < ruxp->rux_uu)
888 uu = ruxp->rux_uu;
889 su = (tu * st) / tt;
890 if (su < ruxp->rux_su)
891 su = ruxp->rux_su;
892 } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
893 /*
894 * When we calibrate the cputicker, it is not uncommon to
895 * see the presumably fixed frequency increase slightly over
896 * time as a result of thermal stabilization and NTP
897 * discipline (of the reference clock). We therefore ignore
898 * a bit of backwards slop because we expect to catch up
899 * shortly. We use a 3 microsecond limit to catch low
900 * counts and a 1% limit for high counts.
901 */
902 uu = ruxp->rux_uu;
903 su = ruxp->rux_su;
904 tu = ruxp->rux_tu;
905 } else { /* tu < ruxp->rux_tu */
906 /*
907 * What happened here was likely that a laptop, which ran at
908 * a reduced clock frequency at boot, kicked into high gear.
909 * The wisdom of spamming this message in that case is
910 * dubious, but it might also be indicative of something
911 * serious, so lets keep it and hope laptops can be made
912 * more truthful about their CPU speed via ACPI.
913 */
914 printf("calcru: runtime went backwards from %ju usec "
915 "to %ju usec for pid %d (%s)\n",
916 (uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
917 p->p_pid, p->p_comm);
918 uu = (tu * ut) / tt;
919 su = (tu * st) / tt;
920 }
921
922 ruxp->rux_uu = uu;
923 ruxp->rux_su = su;
924 ruxp->rux_tu = tu;
925
926 up->tv_sec = uu / 1000000;
927 up->tv_usec = uu % 1000000;
928 sp->tv_sec = su / 1000000;
929 sp->tv_usec = su % 1000000;
930 }
931
932 #ifndef _SYS_SYSPROTO_H_
933 struct getrusage_args {
934 int who;
935 struct rusage *rusage;
936 };
937 #endif
938 int
939 getrusage(td, uap)
940 register struct thread *td;
941 register struct getrusage_args *uap;
942 {
943 struct rusage ru;
944 int error;
945
946 error = kern_getrusage(td, uap->who, &ru);
947 if (error == 0)
948 error = copyout(&ru, uap->rusage, sizeof(struct rusage));
949 return (error);
950 }
951
952 int
953 kern_getrusage(struct thread *td, int who, struct rusage *rup)
954 {
955 struct proc *p;
956 int error;
957
958 error = 0;
959 p = td->td_proc;
960 PROC_LOCK(p);
961 switch (who) {
962 case RUSAGE_SELF:
963 rufetchcalc(p, rup, &rup->ru_utime,
964 &rup->ru_stime);
965 break;
966
967 case RUSAGE_CHILDREN:
968 *rup = p->p_stats->p_cru;
969 calccru(p, &rup->ru_utime, &rup->ru_stime);
970 break;
971
972 case RUSAGE_THREAD:
973 PROC_SLOCK(p);
974 ruxagg(p, td);
975 PROC_SUNLOCK(p);
976 thread_lock(td);
977 *rup = td->td_ru;
978 calcru1(p, &td->td_rux, &rup->ru_utime, &rup->ru_stime);
979 thread_unlock(td);
980 break;
981
982 default:
983 error = EINVAL;
984 }
985 PROC_UNLOCK(p);
986 return (error);
987 }
988
989 void
990 rucollect(struct rusage *ru, struct rusage *ru2)
991 {
992 long *ip, *ip2;
993 int i;
994
995 if (ru->ru_maxrss < ru2->ru_maxrss)
996 ru->ru_maxrss = ru2->ru_maxrss;
997 ip = &ru->ru_first;
998 ip2 = &ru2->ru_first;
999 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
1000 *ip++ += *ip2++;
1001 }
1002
1003 void
1004 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
1005 struct rusage_ext *rux2)
1006 {
1007
1008 rux->rux_runtime += rux2->rux_runtime;
1009 rux->rux_uticks += rux2->rux_uticks;
1010 rux->rux_sticks += rux2->rux_sticks;
1011 rux->rux_iticks += rux2->rux_iticks;
1012 rux->rux_uu += rux2->rux_uu;
1013 rux->rux_su += rux2->rux_su;
1014 rux->rux_tu += rux2->rux_tu;
1015 rucollect(ru, ru2);
1016 }
1017
1018 /*
1019 * Aggregate tick counts into the proc's rusage_ext.
1020 */
1021 static void
1022 ruxagg_locked(struct rusage_ext *rux, struct thread *td)
1023 {
1024
1025 THREAD_LOCK_ASSERT(td, MA_OWNED);
1026 PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED);
1027 rux->rux_runtime += td->td_incruntime;
1028 rux->rux_uticks += td->td_uticks;
1029 rux->rux_sticks += td->td_sticks;
1030 rux->rux_iticks += td->td_iticks;
1031 }
1032
1033 void
1034 ruxagg(struct proc *p, struct thread *td)
1035 {
1036
1037 thread_lock(td);
1038 ruxagg_locked(&p->p_rux, td);
1039 ruxagg_locked(&td->td_rux, td);
1040 td->td_incruntime = 0;
1041 td->td_uticks = 0;
1042 td->td_iticks = 0;
1043 td->td_sticks = 0;
1044 thread_unlock(td);
1045 }
1046
1047 /*
1048 * Update the rusage_ext structure and fetch a valid aggregate rusage
1049 * for proc p if storage for one is supplied.
1050 */
1051 void
1052 rufetch(struct proc *p, struct rusage *ru)
1053 {
1054 struct thread *td;
1055
1056 PROC_SLOCK_ASSERT(p, MA_OWNED);
1057
1058 *ru = p->p_ru;
1059 if (p->p_numthreads > 0) {
1060 FOREACH_THREAD_IN_PROC(p, td) {
1061 ruxagg(p, td);
1062 rucollect(ru, &td->td_ru);
1063 }
1064 }
1065 }
1066
1067 /*
1068 * Atomically perform a rufetch and a calcru together.
1069 * Consumers, can safely assume the calcru is executed only once
1070 * rufetch is completed.
1071 */
1072 void
1073 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
1074 struct timeval *sp)
1075 {
1076
1077 PROC_SLOCK(p);
1078 rufetch(p, ru);
1079 calcru(p, up, sp);
1080 PROC_SUNLOCK(p);
1081 }
1082
1083 /*
1084 * Allocate a new resource limits structure and initialize its
1085 * reference count and mutex pointer.
1086 */
1087 struct plimit *
1088 lim_alloc()
1089 {
1090 struct plimit *limp;
1091
1092 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
1093 refcount_init(&limp->pl_refcnt, 1);
1094 return (limp);
1095 }
1096
1097 struct plimit *
1098 lim_hold(limp)
1099 struct plimit *limp;
1100 {
1101
1102 refcount_acquire(&limp->pl_refcnt);
1103 return (limp);
1104 }
1105
1106 void
1107 lim_fork(struct proc *p1, struct proc *p2)
1108 {
1109 p2->p_limit = lim_hold(p1->p_limit);
1110 callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
1111 if (p1->p_cpulimit != RLIM_INFINITY)
1112 callout_reset(&p2->p_limco, hz, lim_cb, p2);
1113 }
1114
1115 void
1116 lim_free(limp)
1117 struct plimit *limp;
1118 {
1119
1120 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow"));
1121 if (refcount_release(&limp->pl_refcnt))
1122 free((void *)limp, M_PLIMIT);
1123 }
1124
1125 /*
1126 * Make a copy of the plimit structure.
1127 * We share these structures copy-on-write after fork.
1128 */
1129 void
1130 lim_copy(dst, src)
1131 struct plimit *dst, *src;
1132 {
1133
1134 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit"));
1135 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
1136 }
1137
1138 /*
1139 * Return the hard limit for a particular system resource. The
1140 * which parameter specifies the index into the rlimit array.
1141 */
1142 rlim_t
1143 lim_max(struct proc *p, int which)
1144 {
1145 struct rlimit rl;
1146
1147 lim_rlimit(p, which, &rl);
1148 return (rl.rlim_max);
1149 }
1150
1151 /*
1152 * Return the current (soft) limit for a particular system resource.
1153 * The which parameter which specifies the index into the rlimit array
1154 */
1155 rlim_t
1156 lim_cur(struct proc *p, int which)
1157 {
1158 struct rlimit rl;
1159
1160 lim_rlimit(p, which, &rl);
1161 return (rl.rlim_cur);
1162 }
1163
1164 /*
1165 * Return a copy of the entire rlimit structure for the system limit
1166 * specified by 'which' in the rlimit structure pointed to by 'rlp'.
1167 */
1168 void
1169 lim_rlimit(struct proc *p, int which, struct rlimit *rlp)
1170 {
1171
1172 PROC_LOCK_ASSERT(p, MA_OWNED);
1173 KASSERT(which >= 0 && which < RLIM_NLIMITS,
1174 ("request for invalid resource limit"));
1175 *rlp = p->p_limit->pl_rlimit[which];
1176 if (p->p_sysent->sv_fixlimit != NULL)
1177 p->p_sysent->sv_fixlimit(rlp, which);
1178 }
1179
1180 /*
1181 * Find the uidinfo structure for a uid. This structure is used to
1182 * track the total resource consumption (process count, socket buffer
1183 * size, etc.) for the uid and impose limits.
1184 */
1185 void
1186 uihashinit()
1187 {
1188
1189 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
1190 rw_init(&uihashtbl_lock, "uidinfo hash");
1191 }
1192
1193 /*
1194 * Look up a uidinfo struct for the parameter uid.
1195 * uihashtbl_lock must be locked.
1196 */
1197 static struct uidinfo *
1198 uilookup(uid)
1199 uid_t uid;
1200 {
1201 struct uihashhead *uipp;
1202 struct uidinfo *uip;
1203
1204 rw_assert(&uihashtbl_lock, RA_LOCKED);
1205 uipp = UIHASH(uid);
1206 LIST_FOREACH(uip, uipp, ui_hash)
1207 if (uip->ui_uid == uid)
1208 break;
1209
1210 return (uip);
1211 }
1212
1213 /*
1214 * Find or allocate a struct uidinfo for a particular uid.
1215 * Increase refcount on uidinfo struct returned.
1216 * uifree() should be called on a struct uidinfo when released.
1217 */
1218 struct uidinfo *
1219 uifind(uid)
1220 uid_t uid;
1221 {
1222 struct uidinfo *old_uip, *uip;
1223
1224 rw_rlock(&uihashtbl_lock);
1225 uip = uilookup(uid);
1226 if (uip == NULL) {
1227 rw_runlock(&uihashtbl_lock);
1228 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
1229 rw_wlock(&uihashtbl_lock);
1230 /*
1231 * There's a chance someone created our uidinfo while we
1232 * were in malloc and not holding the lock, so we have to
1233 * make sure we don't insert a duplicate uidinfo.
1234 */
1235 if ((old_uip = uilookup(uid)) != NULL) {
1236 /* Someone else beat us to it. */
1237 free(uip, M_UIDINFO);
1238 uip = old_uip;
1239 } else {
1240 refcount_init(&uip->ui_ref, 0);
1241 uip->ui_uid = uid;
1242 mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL,
1243 MTX_DEF);
1244 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
1245 }
1246 }
1247 uihold(uip);
1248 rw_unlock(&uihashtbl_lock);
1249 return (uip);
1250 }
1251
1252 /*
1253 * Place another refcount on a uidinfo struct.
1254 */
1255 void
1256 uihold(uip)
1257 struct uidinfo *uip;
1258 {
1259
1260 refcount_acquire(&uip->ui_ref);
1261 }
1262
1263 /*-
1264 * Since uidinfo structs have a long lifetime, we use an
1265 * opportunistic refcounting scheme to avoid locking the lookup hash
1266 * for each release.
1267 *
1268 * If the refcount hits 0, we need to free the structure,
1269 * which means we need to lock the hash.
1270 * Optimal case:
1271 * After locking the struct and lowering the refcount, if we find
1272 * that we don't need to free, simply unlock and return.
1273 * Suboptimal case:
1274 * If refcount lowering results in need to free, bump the count
1275 * back up, lose the lock and acquire the locks in the proper
1276 * order to try again.
1277 */
1278 void
1279 uifree(uip)
1280 struct uidinfo *uip;
1281 {
1282 int old;
1283
1284 /* Prepare for optimal case. */
1285 old = uip->ui_ref;
1286 if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1))
1287 return;
1288
1289 /* Prepare for suboptimal case. */
1290 rw_wlock(&uihashtbl_lock);
1291 if (refcount_release(&uip->ui_ref)) {
1292 LIST_REMOVE(uip, ui_hash);
1293 rw_wunlock(&uihashtbl_lock);
1294 if (uip->ui_sbsize != 0)
1295 printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
1296 uip->ui_uid, uip->ui_sbsize);
1297 if (uip->ui_proccnt != 0)
1298 printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1299 uip->ui_uid, uip->ui_proccnt);
1300 if (uip->ui_vmsize != 0)
1301 printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
1302 uip->ui_uid, (unsigned long long)uip->ui_vmsize);
1303 mtx_destroy(&uip->ui_vmsize_mtx);
1304 free(uip, M_UIDINFO);
1305 return;
1306 }
1307 /*
1308 * Someone added a reference between atomic_cmpset_int() and
1309 * rw_wlock(&uihashtbl_lock).
1310 */
1311 rw_wunlock(&uihashtbl_lock);
1312 }
1313
1314 /*
1315 * Change the count associated with number of processes
1316 * a given user is using. When 'max' is 0, don't enforce a limit
1317 */
1318 int
1319 chgproccnt(uip, diff, max)
1320 struct uidinfo *uip;
1321 int diff;
1322 rlim_t max;
1323 {
1324
1325 /* Don't allow them to exceed max, but allow subtraction. */
1326 if (diff > 0 && max != 0) {
1327 if (atomic_fetchadd_long(&uip->ui_proccnt, (long)diff) + diff > max) {
1328 atomic_subtract_long(&uip->ui_proccnt, (long)diff);
1329 return (0);
1330 }
1331 } else {
1332 atomic_add_long(&uip->ui_proccnt, (long)diff);
1333 if (uip->ui_proccnt < 0)
1334 printf("negative proccnt for uid = %d\n", uip->ui_uid);
1335 }
1336 return (1);
1337 }
1338
1339 /*
1340 * Change the total socket buffer size a user has used.
1341 */
1342 int
1343 chgsbsize(uip, hiwat, to, max)
1344 struct uidinfo *uip;
1345 u_int *hiwat;
1346 u_int to;
1347 rlim_t max;
1348 {
1349 int diff;
1350
1351 diff = to - *hiwat;
1352 if (diff > 0) {
1353 if (atomic_fetchadd_long(&uip->ui_sbsize, (long)diff) + diff > max) {
1354 atomic_subtract_long(&uip->ui_sbsize, (long)diff);
1355 return (0);
1356 }
1357 } else {
1358 atomic_add_long(&uip->ui_sbsize, (long)diff);
1359 if (uip->ui_sbsize < 0)
1360 printf("negative sbsize for uid = %d\n", uip->ui_uid);
1361 }
1362 *hiwat = to;
1363 return (1);
1364 }
1365
1366 /*
1367 * Change the count associated with number of pseudo-terminals
1368 * a given user is using. When 'max' is 0, don't enforce a limit
1369 */
1370 int
1371 chgptscnt(uip, diff, max)
1372 struct uidinfo *uip;
1373 int diff;
1374 rlim_t max;
1375 {
1376
1377 /* Don't allow them to exceed max, but allow subtraction. */
1378 if (diff > 0 && max != 0) {
1379 if (atomic_fetchadd_long(&uip->ui_ptscnt, (long)diff) + diff > max) {
1380 atomic_subtract_long(&uip->ui_ptscnt, (long)diff);
1381 return (0);
1382 }
1383 } else {
1384 atomic_add_long(&uip->ui_ptscnt, (long)diff);
1385 if (uip->ui_ptscnt < 0)
1386 printf("negative ptscnt for uid = %d\n", uip->ui_uid);
1387 }
1388 return (1);
1389 }
Cache object: 1ea1d45eabe1c85ac344f192f48d83bc
|