FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_sched.c
1 /* $NetBSD: sys_sched.c,v 1.49 2020/05/23 23:42:43 ad Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * Lock order:
33 *
34 * cpu_lock ->
35 * proc_lock ->
36 * proc_t::p_lock ->
37 * lwp_t::lwp_lock
38 *
39 * TODO:
40 * - Handle pthread_setschedprio() as defined by POSIX;
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.49 2020/05/23 23:42:43 ad Exp $");
45
46 #include <sys/param.h>
47
48 #include <sys/cpu.h>
49 #include <sys/kauth.h>
50 #include <sys/kmem.h>
51 #include <sys/lwp.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/pset.h>
55 #include <sys/sched.h>
56 #include <sys/syscallargs.h>
57 #include <sys/sysctl.h>
58 #include <sys/systm.h>
59 #include <sys/types.h>
60 #include <sys/unistd.h>
61
62 static struct sysctllog *sched_sysctl_log;
63 static kauth_listener_t sched_listener;
64
65 /*
66 * Convert user priority or the in-kernel priority or convert the current
67 * priority to the appropriate range according to the policy change.
68 */
69 static pri_t
70 convert_pri(lwp_t *l, int policy, pri_t pri)
71 {
72
73 /* Convert user priority to the in-kernel */
74 if (pri != PRI_NONE) {
75 /* Only for real-time threads */
76 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
77 KASSERT(policy != SCHED_OTHER);
78 return PRI_USER_RT + pri;
79 }
80
81 /* Neither policy, nor priority change */
82 if (l->l_class == policy)
83 return l->l_priority;
84
85 /* Time-sharing -> real-time */
86 if (l->l_class == SCHED_OTHER) {
87 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
88 return PRI_USER_RT;
89 }
90
91 /* Real-time -> time-sharing */
92 if (policy == SCHED_OTHER) {
93 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
94 /*
95 * this is a bit arbitrary because the priority is dynamic
96 * for SCHED_OTHER threads and will likely be changed by
97 * the scheduler soon anyway.
98 */
99 return l->l_priority - PRI_USER_RT;
100 }
101
102 /* Real-time -> real-time */
103 return l->l_priority;
104 }
105
106 int
107 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
108 const struct sched_param *params)
109 {
110 struct proc *p;
111 struct lwp *t;
112 pri_t pri;
113 u_int lcnt;
114 int error;
115
116 error = 0;
117
118 pri = params->sched_priority;
119
120 /* If no parameters specified, just return (this should not happen) */
121 if (pri == PRI_NONE && policy == SCHED_NONE)
122 return 0;
123
124 /* Validate scheduling class */
125 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
126 return EINVAL;
127
128 /* Validate priority */
129 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
130 return EINVAL;
131
132 if (pid != 0) {
133 /* Find the process */
134 mutex_enter(&proc_lock);
135 p = proc_find(pid);
136 if (p == NULL) {
137 mutex_exit(&proc_lock);
138 return ESRCH;
139 }
140 mutex_enter(p->p_lock);
141 mutex_exit(&proc_lock);
142 /* Disallow modification of system processes */
143 if ((p->p_flag & PK_SYSTEM) != 0) {
144 mutex_exit(p->p_lock);
145 return EPERM;
146 }
147 } else {
148 /* Use the calling process */
149 p = curlwp->l_proc;
150 mutex_enter(p->p_lock);
151 }
152
153 /* Find the LWP(s) */
154 lcnt = 0;
155 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
156 pri_t kpri;
157 int lpolicy;
158
159 if (lid && lid != t->l_lid)
160 continue;
161
162 lcnt++;
163 lwp_lock(t);
164 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
165
166 /* Disallow setting of priority for SCHED_OTHER threads */
167 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
168 lwp_unlock(t);
169 error = EINVAL;
170 break;
171 }
172
173 /* Convert priority, if needed */
174 kpri = convert_pri(t, lpolicy, pri);
175
176 /* Check the permission */
177 error = kauth_authorize_process(kauth_cred_get(),
178 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
179 KAUTH_ARG(kpri));
180 if (error) {
181 lwp_unlock(t);
182 break;
183 }
184
185 /* Set the scheduling class, change the priority */
186 t->l_class = lpolicy;
187 lwp_changepri(t, kpri);
188 lwp_unlock(t);
189 }
190 mutex_exit(p->p_lock);
191 return (lcnt == 0) ? ESRCH : error;
192 }
193
194 /*
195 * Set scheduling parameters.
196 */
197 int
198 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
199 register_t *retval)
200 {
201 /* {
202 syscallarg(pid_t) pid;
203 syscallarg(lwpid_t) lid;
204 syscallarg(int) policy;
205 syscallarg(const struct sched_param *) params;
206 } */
207 struct sched_param params;
208 int error;
209
210 /* Get the parameters from the user-space */
211 error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
212 if (error)
213 goto out;
214
215 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
216 SCARG(uap, policy), ¶ms);
217 out:
218 return error;
219 }
220
221 /*
222 * do_sched_getparam:
223 *
224 * if lid=0, returns the parameter of the first LWP in the process.
225 */
226 int
227 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
228 struct sched_param *params)
229 {
230 struct sched_param lparams;
231 struct lwp *t;
232 int error, lpolicy;
233
234 if (pid < 0 || lid < 0)
235 return EINVAL;
236
237 t = lwp_find2(pid, lid); /* acquire p_lock */
238 if (t == NULL)
239 return ESRCH;
240
241 /* Check the permission */
242 error = kauth_authorize_process(kauth_cred_get(),
243 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
244 if (error != 0) {
245 mutex_exit(t->l_proc->p_lock);
246 return error;
247 }
248
249 lwp_lock(t);
250 lparams.sched_priority = t->l_priority;
251 lpolicy = t->l_class;
252 lwp_unlock(t);
253 mutex_exit(t->l_proc->p_lock);
254
255 /*
256 * convert to the user-visible priority value.
257 * it's an inversion of convert_pri().
258 *
259 * the SCHED_OTHER case is a bit arbitrary given that
260 * - we don't allow setting the priority.
261 * - the priority is dynamic.
262 */
263 switch (lpolicy) {
264 case SCHED_OTHER:
265 lparams.sched_priority -= PRI_USER;
266 break;
267 case SCHED_RR:
268 case SCHED_FIFO:
269 lparams.sched_priority -= PRI_USER_RT;
270 break;
271 }
272
273 if (policy != NULL)
274 *policy = lpolicy;
275
276 if (params != NULL)
277 *params = lparams;
278
279 return error;
280 }
281
282 /*
283 * Get scheduling parameters.
284 */
285 int
286 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
287 register_t *retval)
288 {
289 /* {
290 syscallarg(pid_t) pid;
291 syscallarg(lwpid_t) lid;
292 syscallarg(int *) policy;
293 syscallarg(struct sched_param *) params;
294 } */
295 struct sched_param params;
296 int error, policy;
297
298 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
299 ¶ms);
300 if (error)
301 goto out;
302
303 error = copyout(¶ms, SCARG(uap, params), sizeof(params));
304 if (error == 0 && SCARG(uap, policy) != NULL)
305 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
306 out:
307 return error;
308 }
309
310 /*
311 * Allocate the CPU set, and get it from userspace.
312 */
313 static int
314 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
315 {
316 kcpuset_t *kset;
317 int error;
318
319 kcpuset_create(&kset, true);
320 error = kcpuset_copyin(sset, kset, size);
321 if (error) {
322 kcpuset_unuse(kset, NULL);
323 } else {
324 *dset = kset;
325 }
326 return error;
327 }
328
329 /*
330 * Set affinity.
331 */
332 int
333 sys__sched_setaffinity(struct lwp *l,
334 const struct sys__sched_setaffinity_args *uap, register_t *retval)
335 {
336 /* {
337 syscallarg(pid_t) pid;
338 syscallarg(lwpid_t) lid;
339 syscallarg(size_t) size;
340 syscallarg(const cpuset_t *) cpuset;
341 } */
342 kcpuset_t *kcset, *kcpulst = NULL;
343 struct cpu_info *ici, *ci;
344 struct proc *p;
345 struct lwp *t;
346 CPU_INFO_ITERATOR cii;
347 bool alloff;
348 lwpid_t lid;
349 u_int lcnt;
350 int error;
351
352 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
353 if (error)
354 return error;
355
356 /*
357 * Traverse _each_ CPU to:
358 * - Check that CPUs in the mask have no assigned processor set.
359 * - Check that at least one CPU from the mask is online.
360 * - Find the first target CPU to migrate.
361 *
362 * To avoid the race with CPU online/offline calls and processor sets,
363 * cpu_lock will be locked for the entire operation.
364 */
365 ci = NULL;
366 alloff = false;
367 mutex_enter(&cpu_lock);
368 for (CPU_INFO_FOREACH(cii, ici)) {
369 struct schedstate_percpu *ispc;
370
371 if (!kcpuset_isset(kcset, cpu_index(ici))) {
372 continue;
373 }
374
375 ispc = &ici->ci_schedstate;
376 /* Check that CPU is not in the processor-set */
377 if (ispc->spc_psid != PS_NONE) {
378 error = EPERM;
379 goto out;
380 }
381 /* Skip offline CPUs */
382 if (ispc->spc_flags & SPCF_OFFLINE) {
383 alloff = true;
384 continue;
385 }
386 /* Target CPU to migrate */
387 if (ci == NULL) {
388 ci = ici;
389 }
390 }
391 if (ci == NULL) {
392 if (alloff) {
393 /* All CPUs in the set are offline */
394 error = EPERM;
395 goto out;
396 }
397 /* Empty set */
398 kcpuset_unuse(kcset, &kcpulst);
399 kcset = NULL;
400 }
401
402 if (SCARG(uap, pid) != 0) {
403 /* Find the process */
404 mutex_enter(&proc_lock);
405 p = proc_find(SCARG(uap, pid));
406 if (p == NULL) {
407 mutex_exit(&proc_lock);
408 error = ESRCH;
409 goto out;
410 }
411 mutex_enter(p->p_lock);
412 mutex_exit(&proc_lock);
413 /* Disallow modification of system processes. */
414 if ((p->p_flag & PK_SYSTEM) != 0) {
415 mutex_exit(p->p_lock);
416 error = EPERM;
417 goto out;
418 }
419 } else {
420 /* Use the calling process */
421 p = l->l_proc;
422 mutex_enter(p->p_lock);
423 }
424
425 /*
426 * Check the permission.
427 */
428 error = kauth_authorize_process(l->l_cred,
429 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
430 if (error != 0) {
431 mutex_exit(p->p_lock);
432 goto out;
433 }
434
435 /* Iterate through LWP(s). */
436 lcnt = 0;
437 lid = SCARG(uap, lid);
438 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
439 if (lid && lid != t->l_lid) {
440 continue;
441 }
442 lwp_lock(t);
443 /* No affinity for zombie LWPs. */
444 if (t->l_stat == LSZOMB) {
445 lwp_unlock(t);
446 continue;
447 }
448 /* First, release existing affinity, if any. */
449 if (t->l_affinity) {
450 kcpuset_unuse(t->l_affinity, &kcpulst);
451 }
452 if (kcset) {
453 /*
454 * Hold a reference on affinity mask, assign mask to
455 * LWP and migrate it to another CPU (unlocks LWP).
456 */
457 kcpuset_use(kcset);
458 t->l_affinity = kcset;
459 lwp_migrate(t, ci);
460 } else {
461 /* Old affinity mask is released, just clear. */
462 t->l_affinity = NULL;
463 lwp_unlock(t);
464 }
465 lcnt++;
466 }
467 mutex_exit(p->p_lock);
468 if (lcnt == 0) {
469 error = ESRCH;
470 }
471 out:
472 mutex_exit(&cpu_lock);
473
474 /*
475 * Drop the initial reference (LWPs, if any, have the ownership now),
476 * and destroy whatever is in the G/C list, if filled.
477 */
478 if (kcset) {
479 kcpuset_unuse(kcset, &kcpulst);
480 }
481 if (kcpulst) {
482 kcpuset_destroy(kcpulst);
483 }
484 return error;
485 }
486
487 /*
488 * Get affinity.
489 */
490 int
491 sys__sched_getaffinity(struct lwp *l,
492 const struct sys__sched_getaffinity_args *uap, register_t *retval)
493 {
494 /* {
495 syscallarg(pid_t) pid;
496 syscallarg(lwpid_t) lid;
497 syscallarg(size_t) size;
498 syscallarg(cpuset_t *) cpuset;
499 } */
500 struct lwp *t;
501 kcpuset_t *kcset;
502 int error;
503
504 if (SCARG(uap, pid) < 0 || SCARG(uap, lid) < 0)
505 return EINVAL;
506
507 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
508 if (error)
509 return error;
510
511 /* Locks the LWP */
512 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
513 if (t == NULL) {
514 error = ESRCH;
515 goto out;
516 }
517 /* Check the permission */
518 if (kauth_authorize_process(l->l_cred,
519 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
520 mutex_exit(t->l_proc->p_lock);
521 error = EPERM;
522 goto out;
523 }
524 lwp_lock(t);
525 if (t->l_affinity) {
526 kcpuset_copy(kcset, t->l_affinity);
527 } else {
528 kcpuset_zero(kcset);
529 }
530 lwp_unlock(t);
531 mutex_exit(t->l_proc->p_lock);
532
533 error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size));
534 out:
535 kcpuset_unuse(kcset, NULL);
536 return error;
537 }
538
539 /*
540 * Priority protection for PTHREAD_PRIO_PROTECT. This is a weak
541 * analogue of priority inheritance: temp raise the priority
542 * of the caller when accessing a protected resource.
543 */
544 int
545 sys__sched_protect(struct lwp *l,
546 const struct sys__sched_protect_args *uap, register_t *retval)
547 {
548 /* {
549 syscallarg(int) priority;
550 syscallarg(int *) opriority;
551 } */
552 int error;
553 pri_t pri;
554
555 KASSERT(l->l_inheritedprio == -1);
556 KASSERT(l->l_auxprio == -1 || l->l_auxprio == l->l_protectprio);
557
558 pri = SCARG(uap, priority);
559 error = 0;
560 lwp_lock(l);
561 if (pri == -1) {
562 /* back out priority changes */
563 switch(l->l_protectdepth) {
564 case 0:
565 error = EINVAL;
566 break;
567 case 1:
568 l->l_protectdepth = 0;
569 l->l_protectprio = -1;
570 l->l_auxprio = -1;
571 break;
572 default:
573 l->l_protectdepth--;
574 break;
575 }
576 } else if (pri < 0) {
577 /* Just retrieve the current value, for debugging */
578 if (l->l_protectprio == -1)
579 error = ENOENT;
580 else
581 *retval = l->l_protectprio - PRI_USER_RT;
582 } else if (__predict_false(pri < SCHED_PRI_MIN ||
583 pri > SCHED_PRI_MAX || l->l_priority > pri + PRI_USER_RT)) {
584 /* must fail if existing priority is higher */
585 error = EPERM;
586 } else {
587 /* play along but make no changes if not a realtime LWP. */
588 l->l_protectdepth++;
589 pri += PRI_USER_RT;
590 if (__predict_true(l->l_class != SCHED_OTHER &&
591 pri > l->l_protectprio)) {
592 l->l_protectprio = pri;
593 l->l_auxprio = pri;
594 }
595 }
596 lwp_unlock(l);
597
598 return error;
599 }
600
601 /*
602 * Yield.
603 */
604 int
605 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
606 {
607
608 yield();
609 return 0;
610 }
611
612 /*
613 * Sysctl nodes and initialization.
614 */
615 static void
616 sysctl_sched_setup(struct sysctllog **clog)
617 {
618 const struct sysctlnode *node = NULL;
619
620 sysctl_createv(clog, 0, NULL, NULL,
621 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
622 CTLTYPE_INT, "posix_sched",
623 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
624 "Process Scheduling option to which the "
625 "system attempts to conform"),
626 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
627 CTL_KERN, CTL_CREATE, CTL_EOL);
628 sysctl_createv(clog, 0, NULL, &node,
629 CTLFLAG_PERMANENT,
630 CTLTYPE_NODE, "sched",
631 SYSCTL_DESCR("Scheduler options"),
632 NULL, 0, NULL, 0,
633 CTL_KERN, CTL_CREATE, CTL_EOL);
634
635 if (node == NULL)
636 return;
637
638 sysctl_createv(clog, 0, &node, NULL,
639 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
640 CTLTYPE_INT, "pri_min",
641 SYSCTL_DESCR("Minimal POSIX real-time priority"),
642 NULL, SCHED_PRI_MIN, NULL, 0,
643 CTL_CREATE, CTL_EOL);
644 sysctl_createv(clog, 0, &node, NULL,
645 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
646 CTLTYPE_INT, "pri_max",
647 SYSCTL_DESCR("Maximal POSIX real-time priority"),
648 NULL, SCHED_PRI_MAX, NULL, 0,
649 CTL_CREATE, CTL_EOL);
650 }
651
652 static int
653 sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
654 void *arg0, void *arg1, void *arg2, void *arg3)
655 {
656 struct proc *p;
657 int result;
658
659 result = KAUTH_RESULT_DEFER;
660 p = arg0;
661
662 switch (action) {
663 case KAUTH_PROCESS_SCHEDULER_GETPARAM:
664 if (kauth_cred_uidmatch(cred, p->p_cred))
665 result = KAUTH_RESULT_ALLOW;
666 break;
667
668 case KAUTH_PROCESS_SCHEDULER_SETPARAM:
669 if (kauth_cred_uidmatch(cred, p->p_cred)) {
670 struct lwp *l;
671 int policy;
672 pri_t priority;
673
674 l = arg1;
675 policy = (int)(unsigned long)arg2;
676 priority = (pri_t)(unsigned long)arg3;
677
678 if ((policy == l->l_class ||
679 (policy != SCHED_FIFO && policy != SCHED_RR)) &&
680 priority <= l->l_priority)
681 result = KAUTH_RESULT_ALLOW;
682 }
683
684 break;
685
686 case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
687 result = KAUTH_RESULT_ALLOW;
688 break;
689
690 case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
691 /* Privileged; we let the secmodel handle this. */
692 break;
693
694 default:
695 break;
696 }
697
698 return result;
699 }
700
701 void
702 sched_init(void)
703 {
704
705 sysctl_sched_setup(&sched_sysctl_log);
706
707 sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
708 sched_listener_cb, NULL);
709 }
Cache object: d40de701f572164f4ca6353fede109bd
|