FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_sched.c
1 /* $NetBSD: sys_sched.c,v 1.30.4.3 2009/03/08 03:15:36 snj Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * Lock order:
33 *
34 * cpu_lock ->
35 * proc_lock ->
36 * proc_t::p_lock ->
37 * lwp_t::lwp_lock
38 *
39 * TODO:
40 * - Handle pthread_setschedprio() as defined by POSIX;
41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.30.4.3 2009/03/08 03:15:36 snj Exp $");
46
47 #include <sys/param.h>
48
49 #include <sys/cpu.h>
50 #include <sys/kauth.h>
51 #include <sys/kmem.h>
52 #include <sys/lwp.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/pset.h>
56 #include <sys/sa.h>
57 #include <sys/savar.h>
58 #include <sys/sched.h>
59 #include <sys/syscallargs.h>
60 #include <sys/sysctl.h>
61 #include <sys/systm.h>
62 #include <sys/types.h>
63 #include <sys/unistd.h>
64
65 #include "opt_sa.h"
66
67 /*
68 * Convert user priority or the in-kernel priority or convert the current
69 * priority to the appropriate range according to the policy change.
70 */
71 static pri_t
72 convert_pri(lwp_t *l, int policy, pri_t pri)
73 {
74
75 /* Convert user priority to the in-kernel */
76 if (pri != PRI_NONE) {
77 /* Only for real-time threads */
78 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
79 KASSERT(policy != SCHED_OTHER);
80 return PRI_USER_RT + pri;
81 }
82
83 /* Neither policy, nor priority change */
84 if (l->l_class == policy)
85 return l->l_priority;
86
87 /* Time-sharing -> real-time */
88 if (l->l_class == SCHED_OTHER) {
89 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
90 return PRI_USER_RT;
91 }
92
93 /* Real-time -> time-sharing */
94 if (policy == SCHED_OTHER) {
95 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
96 return l->l_priority - PRI_USER_RT;
97 }
98
99 /* Real-time -> real-time */
100 return l->l_priority;
101 }
102
103 int
104 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
105 const struct sched_param *params)
106 {
107 struct proc *p;
108 struct lwp *t;
109 pri_t pri;
110 u_int lcnt;
111 int error;
112
113 error = 0;
114
115 pri = params->sched_priority;
116
117 /* If no parameters specified, just return (this should not happen) */
118 if (pri == PRI_NONE && policy == SCHED_NONE)
119 return 0;
120
121 /* Validate scheduling class */
122 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
123 return EINVAL;
124
125 /* Validate priority */
126 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
127 return EINVAL;
128
129 if (pid != 0) {
130 /* Find the process */
131 mutex_enter(proc_lock);
132 p = p_find(pid, PFIND_LOCKED);
133 if (p == NULL) {
134 mutex_exit(proc_lock);
135 return ESRCH;
136 }
137 mutex_enter(p->p_lock);
138 mutex_exit(proc_lock);
139 /* Disallow modification of system processes */
140 if ((p->p_flag & PK_SYSTEM) != 0) {
141 mutex_exit(p->p_lock);
142 return EPERM;
143 }
144 } else {
145 /* Use the calling process */
146 p = curlwp->l_proc;
147 mutex_enter(p->p_lock);
148 }
149
150 /* Find the LWP(s) */
151 lcnt = 0;
152 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
153 pri_t kpri;
154 int lpolicy;
155
156 if (lid && lid != t->l_lid)
157 continue;
158
159 lcnt++;
160 lwp_lock(t);
161 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
162
163 /* Disallow setting of priority for SCHED_OTHER threads */
164 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
165 lwp_unlock(t);
166 error = EINVAL;
167 break;
168 }
169
170 /* Convert priority, if needed */
171 kpri = convert_pri(t, lpolicy, pri);
172
173 /* Check the permission */
174 error = kauth_authorize_process(kauth_cred_get(),
175 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
176 KAUTH_ARG(kpri));
177 if (error) {
178 lwp_unlock(t);
179 break;
180 }
181
182 /* Set the scheduling class, change the priority */
183 t->l_class = lpolicy;
184 lwp_changepri(t, kpri);
185 lwp_unlock(t);
186 }
187 mutex_exit(p->p_lock);
188 return (lcnt == 0) ? ESRCH : error;
189 }
190
191 /*
192 * Set scheduling parameters.
193 */
194 int
195 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
196 register_t *retval)
197 {
198 /* {
199 syscallarg(pid_t) pid;
200 syscallarg(lwpid_t) lid;
201 syscallarg(int) policy;
202 syscallarg(const struct sched_param *) params;
203 } */
204 struct sched_param params;
205 int error;
206
207 /* Get the parameters from the user-space */
208 error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
209 if (error)
210 goto out;
211
212 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
213 SCARG(uap, policy), ¶ms);
214 out:
215 return error;
216 }
217
218 int
219 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
220 struct sched_param *params)
221 {
222 struct sched_param lparams;
223 struct lwp *t;
224 int error, lpolicy;
225
226 /* Locks the LWP */
227 t = lwp_find2(pid, lid);
228 if (t == NULL)
229 return ESRCH;
230
231 /* Check the permission */
232 error = kauth_authorize_process(kauth_cred_get(),
233 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
234 if (error != 0) {
235 mutex_exit(t->l_proc->p_lock);
236 return error;
237 }
238
239 lwp_lock(t);
240 lparams.sched_priority = t->l_priority;
241 lpolicy = t->l_class;
242
243 switch (lpolicy) {
244 case SCHED_OTHER:
245 lparams.sched_priority -= PRI_USER;
246 break;
247 case SCHED_RR:
248 case SCHED_FIFO:
249 lparams.sched_priority -= PRI_USER_RT;
250 break;
251 }
252
253 if (policy != NULL)
254 *policy = lpolicy;
255
256 if (params != NULL)
257 *params = lparams;
258
259 lwp_unlock(t);
260 mutex_exit(t->l_proc->p_lock);
261 return error;
262 }
263
264 /*
265 * Get scheduling parameters.
266 */
267 int
268 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
269 register_t *retval)
270 {
271 /* {
272 syscallarg(pid_t) pid;
273 syscallarg(lwpid_t) lid;
274 syscallarg(int *) policy;
275 syscallarg(struct sched_param *) params;
276 } */
277 struct sched_param params;
278 int error, policy;
279
280 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
281 ¶ms);
282 if (error)
283 goto out;
284
285 error = copyout(¶ms, SCARG(uap, params), sizeof(params));
286 if (error == 0 && SCARG(uap, policy) != NULL)
287 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
288 out:
289 return error;
290 }
291
292 /*
293 * Allocate the CPU set, and get it from userspace.
294 */
295 static int
296 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
297 {
298 int error;
299
300 *dset = kcpuset_create();
301 error = kcpuset_copyin(sset, *dset, size);
302 if (error != 0)
303 kcpuset_unuse(*dset, NULL);
304 return error;
305 }
306
307 /*
308 * Set affinity.
309 */
310 int
311 sys__sched_setaffinity(struct lwp *l,
312 const struct sys__sched_setaffinity_args *uap, register_t *retval)
313 {
314 /* {
315 syscallarg(pid_t) pid;
316 syscallarg(lwpid_t) lid;
317 syscallarg(size_t) size;
318 syscallarg(const cpuset_t *) cpuset;
319 } */
320 kcpuset_t *cpuset, *cpulst = NULL;
321 struct cpu_info *ici, *ci;
322 struct proc *p;
323 struct lwp *t;
324 CPU_INFO_ITERATOR cii;
325 bool alloff;
326 lwpid_t lid;
327 u_int lcnt;
328 int error;
329
330 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size));
331 if (error)
332 return error;
333
334 /*
335 * Traverse _each_ CPU to:
336 * - Check that CPUs in the mask have no assigned processor set.
337 * - Check that at least one CPU from the mask is online.
338 * - Find the first target CPU to migrate.
339 *
340 * To avoid the race with CPU online/offline calls and processor sets,
341 * cpu_lock will be locked for the entire operation.
342 */
343 ci = NULL;
344 alloff = false;
345 mutex_enter(&cpu_lock);
346 for (CPU_INFO_FOREACH(cii, ici)) {
347 struct schedstate_percpu *ispc;
348
349 if (kcpuset_isset(cpu_index(ici), cpuset) == 0)
350 continue;
351
352 ispc = &ici->ci_schedstate;
353 /* Check that CPU is not in the processor-set */
354 if (ispc->spc_psid != PS_NONE) {
355 error = EPERM;
356 goto out;
357 }
358 /* Skip offline CPUs */
359 if (ispc->spc_flags & SPCF_OFFLINE) {
360 alloff = true;
361 continue;
362 }
363 /* Target CPU to migrate */
364 if (ci == NULL) {
365 ci = ici;
366 }
367 }
368 if (ci == NULL) {
369 if (alloff) {
370 /* All CPUs in the set are offline */
371 error = EPERM;
372 goto out;
373 }
374 /* Empty set */
375 kcpuset_unuse(cpuset, &cpulst);
376 cpuset = NULL;
377 }
378
379 if (SCARG(uap, pid) != 0) {
380 /* Find the process */
381 mutex_enter(proc_lock);
382 p = p_find(SCARG(uap, pid), PFIND_LOCKED);
383 if (p == NULL) {
384 mutex_exit(proc_lock);
385 error = ESRCH;
386 goto out;
387 }
388 mutex_enter(p->p_lock);
389 mutex_exit(proc_lock);
390 /* Disallow modification of system processes. */
391 if ((p->p_flag & PK_SYSTEM) != 0) {
392 mutex_exit(p->p_lock);
393 error = EPERM;
394 goto out;
395 }
396 } else {
397 /* Use the calling process */
398 p = l->l_proc;
399 mutex_enter(p->p_lock);
400 }
401
402 /*
403 * Check the permission.
404 */
405 error = kauth_authorize_process(l->l_cred,
406 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
407 if (error != 0) {
408 mutex_exit(p->p_lock);
409 goto out;
410 }
411
412 #ifdef KERN_SA
413 /* Changing the affinity of a SA process is not supported */
414 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
415 mutex_exit(p->p_lock);
416 error = EINVAL;
417 goto out;
418 }
419 #endif
420
421 /* Find the LWP(s) */
422 lcnt = 0;
423 lid = SCARG(uap, lid);
424 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
425 if (lid && lid != t->l_lid)
426 continue;
427 lwp_lock(t);
428 /* It is not allowed to set the affinity for zombie LWPs */
429 if (t->l_stat == LSZOMB) {
430 lwp_unlock(t);
431 continue;
432 }
433 if (cpuset) {
434 /* Set the affinity flag and new CPU set */
435 t->l_flag |= LW_AFFINITY;
436 kcpuset_use(cpuset);
437 if (t->l_affinity != NULL)
438 kcpuset_unuse(t->l_affinity, &cpulst);
439 t->l_affinity = cpuset;
440 /* Migrate to another CPU, unlocks LWP */
441 lwp_migrate(t, ci);
442 } else {
443 /* Unset the affinity flag */
444 t->l_flag &= ~LW_AFFINITY;
445 if (t->l_affinity != NULL)
446 kcpuset_unuse(t->l_affinity, &cpulst);
447 t->l_affinity = NULL;
448 lwp_unlock(t);
449 }
450 lcnt++;
451 }
452 mutex_exit(p->p_lock);
453 if (lcnt == 0)
454 error = ESRCH;
455 out:
456 mutex_exit(&cpu_lock);
457 if (cpuset != NULL)
458 kcpuset_unuse(cpuset, &cpulst);
459 kcpuset_destroy(cpulst);
460 return error;
461 }
462
463 /*
464 * Get affinity.
465 */
466 int
467 sys__sched_getaffinity(struct lwp *l,
468 const struct sys__sched_getaffinity_args *uap, register_t *retval)
469 {
470 /* {
471 syscallarg(pid_t) pid;
472 syscallarg(lwpid_t) lid;
473 syscallarg(size_t) size;
474 syscallarg(cpuset_t *) cpuset;
475 } */
476 struct lwp *t;
477 kcpuset_t *cpuset;
478 int error;
479
480 error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size));
481 if (error)
482 return error;
483
484 /* Locks the LWP */
485 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
486 if (t == NULL) {
487 error = ESRCH;
488 goto out;
489 }
490 /* Check the permission */
491 if (kauth_authorize_process(l->l_cred,
492 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
493 mutex_exit(t->l_proc->p_lock);
494 error = EPERM;
495 goto out;
496 }
497 lwp_lock(t);
498 if (t->l_flag & LW_AFFINITY) {
499 KASSERT(t->l_affinity != NULL);
500 kcpuset_copy(cpuset, t->l_affinity);
501 } else
502 kcpuset_zero(cpuset);
503 lwp_unlock(t);
504 mutex_exit(t->l_proc->p_lock);
505
506 error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
507 out:
508 kcpuset_unuse(cpuset, NULL);
509 return error;
510 }
511
512 /*
513 * Yield.
514 */
515 int
516 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
517 {
518
519 yield();
520 #ifdef KERN_SA
521 if (l->l_flag & LW_SA) {
522 sa_preempt(l);
523 }
524 #endif
525 return 0;
526 }
527
528 /*
529 * Sysctl nodes and initialization.
530 */
531 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
532 {
533 const struct sysctlnode *node = NULL;
534
535 sysctl_createv(clog, 0, NULL, NULL,
536 CTLFLAG_PERMANENT,
537 CTLTYPE_NODE, "kern", NULL,
538 NULL, 0, NULL, 0,
539 CTL_KERN, CTL_EOL);
540 sysctl_createv(clog, 0, NULL, NULL,
541 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
542 CTLTYPE_INT, "posix_sched",
543 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
544 "Process Scheduling option to which the "
545 "system attempts to conform"),
546 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
547 CTL_KERN, CTL_CREATE, CTL_EOL);
548 sysctl_createv(clog, 0, NULL, &node,
549 CTLFLAG_PERMANENT,
550 CTLTYPE_NODE, "sched",
551 SYSCTL_DESCR("Scheduler options"),
552 NULL, 0, NULL, 0,
553 CTL_KERN, CTL_CREATE, CTL_EOL);
554
555 if (node == NULL)
556 return;
557
558 sysctl_createv(clog, 0, &node, NULL,
559 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
560 CTLTYPE_INT, "pri_min",
561 SYSCTL_DESCR("Minimal POSIX real-time priority"),
562 NULL, SCHED_PRI_MIN, NULL, 0,
563 CTL_CREATE, CTL_EOL);
564 sysctl_createv(clog, 0, &node, NULL,
565 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
566 CTLTYPE_INT, "pri_max",
567 SYSCTL_DESCR("Maximal POSIX real-time priority"),
568 NULL, SCHED_PRI_MAX, NULL, 0,
569 CTL_CREATE, CTL_EOL);
570 }
Cache object: 31d4f7dbe484422088394e79eea321c4
|