1 /* $NetBSD: linux_sched.c,v 1.58.4.1 2009/06/19 21:42:28 snj Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Linux compatibility module. Try to deal with scheduler related syscalls.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.58.4.1 2009/06/19 21:42:28 snj Exp $");
39
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/malloc.h>
46 #include <sys/syscallargs.h>
47 #include <sys/wait.h>
48 #include <sys/kauth.h>
49 #include <sys/ptrace.h>
50
51 #include <sys/cpu.h>
52
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
56 #include <compat/linux/common/linux_emuldata.h>
57 #include <compat/linux/common/linux_ipc.h>
58 #include <compat/linux/common/linux_sem.h>
59 #include <compat/linux/common/linux_exec.h>
60
61 #include <compat/linux/linux_syscallargs.h>
62
63 #include <compat/linux/common/linux_sched.h>
64
65 int
66 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
67 {
68 /* {
69 syscallarg(int) flags;
70 syscallarg(void *) stack;
71 #ifdef LINUX_NPTL
72 syscallarg(void *) parent_tidptr;
73 syscallarg(void *) child_tidptr;
74 #endif
75 } */
76 int flags, sig;
77 int error;
78 struct proc *p;
79 #ifdef LINUX_NPTL
80 struct linux_emuldata *led;
81 #endif
82
83 /*
84 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
85 */
86 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
87 return (EINVAL);
88
89 /*
90 * Thread group implies shared signals. Shared signals
91 * imply shared VM. This matches what Linux kernel does.
92 */
93 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
94 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
95 return (EINVAL);
96 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
97 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
98 return (EINVAL);
99
100 flags = 0;
101
102 if (SCARG(uap, flags) & LINUX_CLONE_VM)
103 flags |= FORK_SHAREVM;
104 if (SCARG(uap, flags) & LINUX_CLONE_FS)
105 flags |= FORK_SHARECWD;
106 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
107 flags |= FORK_SHAREFILES;
108 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
109 flags |= FORK_SHARESIGS;
110 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
111 flags |= FORK_PPWAIT;
112
113 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
114 if (sig < 0 || sig >= LINUX__NSIG)
115 return (EINVAL);
116 sig = linux_to_native_signo[sig];
117
118 #ifdef LINUX_NPTL
119 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
120
121 led->parent_tidptr = SCARG(uap, parent_tidptr);
122 led->child_tidptr = SCARG(uap, child_tidptr);
123 led->clone_flags = SCARG(uap, flags);
124 #endif /* LINUX_NPTL */
125
126 /*
127 * Note that Linux does not provide a portable way of specifying
128 * the stack area; the caller must know if the stack grows up
129 * or down. So, we pass a stack size of 0, so that the code
130 * that makes this adjustment is a noop.
131 */
132 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
133 NULL, NULL, retval, &p)) != 0)
134 return error;
135
136 #ifdef LINUX_NPTL
137 if ((SCARG(uap, flags) & LINUX_CLONE_SETTLS) != 0)
138 return linux_init_thread_area(l, LIST_FIRST(&p->p_lwps));
139 #endif /* LINUX_NPTL */
140
141 return 0;
142 }
143
144 /*
145 * linux realtime priority
146 *
147 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
148 *
149 * - SCHED_OTHER tasks don't have realtime priorities.
150 * in particular, sched_param::sched_priority is always 0.
151 */
152
153 #define LINUX_SCHED_RTPRIO_MIN 1
154 #define LINUX_SCHED_RTPRIO_MAX 99
155
156 static int
157 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
158 int *native_policy, struct sched_param *native_params)
159 {
160
161 switch (linux_policy) {
162 case LINUX_SCHED_OTHER:
163 if (native_policy != NULL) {
164 *native_policy = SCHED_OTHER;
165 }
166 break;
167
168 case LINUX_SCHED_FIFO:
169 if (native_policy != NULL) {
170 *native_policy = SCHED_FIFO;
171 }
172 break;
173
174 case LINUX_SCHED_RR:
175 if (native_policy != NULL) {
176 *native_policy = SCHED_RR;
177 }
178 break;
179
180 default:
181 return EINVAL;
182 }
183
184 if (linux_params != NULL) {
185 int prio = linux_params->sched_priority;
186
187 KASSERT(native_params != NULL);
188
189 if (linux_policy == LINUX_SCHED_OTHER) {
190 if (prio != 0) {
191 return EINVAL;
192 }
193 native_params->sched_priority = PRI_NONE; /* XXX */
194 } else {
195 if (prio < LINUX_SCHED_RTPRIO_MIN ||
196 prio > LINUX_SCHED_RTPRIO_MAX) {
197 return EINVAL;
198 }
199 native_params->sched_priority =
200 (prio - LINUX_SCHED_RTPRIO_MIN)
201 * (SCHED_PRI_MAX - SCHED_PRI_MIN)
202 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
203 + SCHED_PRI_MIN;
204 }
205 }
206
207 return 0;
208 }
209
210 static int
211 sched_native2linux(int native_policy, struct sched_param *native_params,
212 int *linux_policy, struct linux_sched_param *linux_params)
213 {
214
215 switch (native_policy) {
216 case SCHED_OTHER:
217 if (linux_policy != NULL) {
218 *linux_policy = LINUX_SCHED_OTHER;
219 }
220 break;
221
222 case SCHED_FIFO:
223 if (linux_policy != NULL) {
224 *linux_policy = LINUX_SCHED_FIFO;
225 }
226 break;
227
228 case SCHED_RR:
229 if (linux_policy != NULL) {
230 *linux_policy = LINUX_SCHED_RR;
231 }
232 break;
233
234 default:
235 panic("%s: unknown policy %d\n", __func__, native_policy);
236 }
237
238 if (native_params != NULL) {
239 int prio = native_params->sched_priority;
240
241 KASSERT(prio >= SCHED_PRI_MIN);
242 KASSERT(prio <= SCHED_PRI_MAX);
243 KASSERT(linux_params != NULL);
244
245 #ifdef DEBUG_LINUX
246 printf("native2linux: native: policy %d, priority %d\n",
247 native_policy, prio);
248 #endif
249
250 if (native_policy == SCHED_OTHER) {
251 linux_params->sched_priority = 0;
252 } else {
253 linux_params->sched_priority =
254 (prio - SCHED_PRI_MIN)
255 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
256 / (SCHED_PRI_MAX - SCHED_PRI_MIN)
257 + LINUX_SCHED_RTPRIO_MIN;
258 }
259 #ifdef DEBUG_LINUX
260 printf("native2linux: linux: policy %d, priority %d\n",
261 -1, linux_params->sched_priority);
262 #endif
263 }
264
265 return 0;
266 }
267
268 int
269 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
270 {
271 /* {
272 syscallarg(linux_pid_t) pid;
273 syscallarg(const struct linux_sched_param *) sp;
274 } */
275 int error, policy;
276 struct linux_sched_param lp;
277 struct sched_param sp;
278
279 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
280 error = EINVAL;
281 goto out;
282 }
283
284 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
285 if (error)
286 goto out;
287
288 /* We need the current policy in Linux terms. */
289 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
290 if (error)
291 goto out;
292 error = sched_native2linux(policy, NULL, &policy, NULL);
293 if (error)
294 goto out;
295
296 error = sched_linux2native(policy, &lp, &policy, &sp);
297 if (error)
298 goto out;
299
300 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
301 if (error)
302 goto out;
303
304 out:
305 return error;
306 }
307
308 int
309 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
310 {
311 /* {
312 syscallarg(linux_pid_t) pid;
313 syscallarg(struct linux_sched_param *) sp;
314 } */
315 struct linux_sched_param lp;
316 struct sched_param sp;
317 int error, policy;
318
319 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
320 error = EINVAL;
321 goto out;
322 }
323
324 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
325 if (error)
326 goto out;
327 #ifdef DEBUG_LINUX
328 printf("getparam: native: policy %d, priority %d\n",
329 policy, sp.sched_priority);
330 #endif
331
332 error = sched_native2linux(policy, &sp, NULL, &lp);
333 if (error)
334 goto out;
335 #ifdef DEBUG_LINUX
336 printf("getparam: linux: policy %d, priority %d\n",
337 policy, lp.sched_priority);
338 #endif
339
340 error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
341 if (error)
342 goto out;
343
344 out:
345 return error;
346 }
347
348 int
349 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
350 {
351 /* {
352 syscallarg(linux_pid_t) pid;
353 syscallarg(int) policy;
354 syscallarg(cont struct linux_sched_scheduler *) sp;
355 } */
356 int error, policy;
357 struct linux_sched_param lp;
358 struct sched_param sp;
359
360 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
361 error = EINVAL;
362 goto out;
363 }
364
365 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
366 if (error)
367 goto out;
368 #ifdef DEBUG_LINUX
369 printf("setscheduler: linux: policy %d, priority %d\n",
370 SCARG(uap, policy), lp.sched_priority);
371 #endif
372
373 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
374 if (error)
375 goto out;
376 #ifdef DEBUG_LINUX
377 printf("setscheduler: native: policy %d, priority %d\n",
378 policy, sp.sched_priority);
379 #endif
380
381 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
382 if (error)
383 goto out;
384
385 out:
386 return error;
387 }
388
389 int
390 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
391 {
392 /* {
393 syscallarg(linux_pid_t) pid;
394 } */
395 int error, policy;
396
397 *retval = -1;
398
399 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
400 if (error)
401 goto out;
402
403 error = sched_native2linux(policy, NULL, &policy, NULL);
404 if (error)
405 goto out;
406
407 *retval = policy;
408
409 out:
410 return error;
411 }
412
413 int
414 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
415 {
416
417 yield();
418 return 0;
419 }
420
421 int
422 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
423 {
424 /* {
425 syscallarg(int) policy;
426 } */
427
428 switch (SCARG(uap, policy)) {
429 case LINUX_SCHED_OTHER:
430 *retval = 0;
431 break;
432 case LINUX_SCHED_FIFO:
433 case LINUX_SCHED_RR:
434 *retval = LINUX_SCHED_RTPRIO_MAX;
435 break;
436 default:
437 return EINVAL;
438 }
439
440 return 0;
441 }
442
443 int
444 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
445 {
446 /* {
447 syscallarg(int) policy;
448 } */
449
450 switch (SCARG(uap, policy)) {
451 case LINUX_SCHED_OTHER:
452 *retval = 0;
453 break;
454 case LINUX_SCHED_FIFO:
455 case LINUX_SCHED_RR:
456 *retval = LINUX_SCHED_RTPRIO_MIN;
457 break;
458 default:
459 return EINVAL;
460 }
461
462 return 0;
463 }
464
465 #ifndef __m68k__
466 /* Present on everything but m68k */
467 int
468 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
469 {
470 #ifdef LINUX_NPTL
471 /* {
472 syscallarg(int) error_code;
473 } */
474 struct proc *p = l->l_proc;
475 struct linux_emuldata *led = p->p_emuldata;
476 struct linux_emuldata *e;
477
478 if (led->s->flags & LINUX_LES_USE_NPTL) {
479
480 #ifdef DEBUG_LINUX
481 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
482 led->s->refs);
483 #endif
484
485 /*
486 * The calling thread is supposed to kill all threads
487 * in the same thread group (i.e. all threads created
488 * via clone(2) with CLONE_THREAD flag set).
489 *
490 * If there is only one thread, things are quite simple
491 */
492 if (led->s->refs == 1)
493 return sys_exit(l, (const void *)uap, retval);
494
495 #ifdef DEBUG_LINUX
496 printf("%s:%d\n", __func__, __LINE__);
497 #endif
498
499 mutex_enter(proc_lock);
500 led->s->flags |= LINUX_LES_INEXITGROUP;
501 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
502
503 /*
504 * Kill all threads in the group. The emulation exit hook takes
505 * care of hiding the zombies and reporting the exit code
506 * properly.
507 */
508 LIST_FOREACH(e, &led->s->threads, threads) {
509 if (e->proc == p)
510 continue;
511
512 #ifdef DEBUG_LINUX
513 printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
514 #endif
515 psignal(e->proc, SIGKILL);
516 }
517
518 /* Now, kill ourselves */
519 psignal(p, SIGKILL);
520 mutex_exit(proc_lock);
521
522 return 0;
523
524 }
525 #endif /* LINUX_NPTL */
526
527 return sys_exit(l, (const void *)uap, retval);
528 }
529 #endif /* !__m68k__ */
530
531 #ifdef LINUX_NPTL
532 int
533 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
534 {
535 /* {
536 syscallarg(int *) tidptr;
537 } */
538 struct linux_emuldata *led;
539
540 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
541 led->clear_tid = SCARG(uap, tid);
542
543 led->s->flags |= LINUX_LES_USE_NPTL;
544
545 *retval = l->l_proc->p_pid;
546
547 return 0;
548 }
549
550 /* ARGUSED1 */
551 int
552 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
553 {
554 /* The Linux kernel does it exactly that way */
555 *retval = l->l_proc->p_pid;
556 return 0;
557 }
558
559 #ifdef LINUX_NPTL
560 /* ARGUSED1 */
561 int
562 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
563 {
564 struct linux_emuldata *led = l->l_proc->p_emuldata;
565
566 if (led->s->flags & LINUX_LES_USE_NPTL) {
567 /* The Linux kernel does it exactly that way */
568 *retval = led->s->group_pid;
569 } else {
570 *retval = l->l_proc->p_pid;
571 }
572
573 return 0;
574 }
575
576 /* ARGUSED1 */
577 int
578 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
579 {
580 struct proc *p = l->l_proc;
581 struct linux_emuldata *led = p->p_emuldata;
582 struct proc *glp;
583 struct proc *pp;
584
585 mutex_enter(proc_lock);
586 if (led->s->flags & LINUX_LES_USE_NPTL) {
587
588 /* Find the thread group leader's parent */
589 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) {
590 /* Maybe panic... */
591 printf("linux_sys_getppid: missing group leader PID"
592 " %d\n", led->s->group_pid);
593 mutex_exit(proc_lock);
594 return -1;
595 }
596 pp = glp->p_pptr;
597
598 /* If this is a Linux process too, return thread group PID */
599 if (pp->p_emul == p->p_emul) {
600 struct linux_emuldata *pled;
601
602 pled = pp->p_emuldata;
603 *retval = pled->s->group_pid;
604 } else {
605 *retval = pp->p_pid;
606 }
607
608 } else {
609 *retval = p->p_pptr->p_pid;
610 }
611 mutex_exit(proc_lock);
612
613 return 0;
614 }
615 #endif /* LINUX_NPTL */
616
617 int
618 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
619 {
620 /* {
621 syscallarg(pid_t) pid;
622 syscallarg(unsigned int) len;
623 syscallarg(unsigned long *) mask;
624 } */
625 int error;
626 int ret;
627 char *data;
628 int *retp;
629
630 if (SCARG(uap, mask) == NULL)
631 return EINVAL;
632
633 if (SCARG(uap, len) < sizeof(int))
634 return EINVAL;
635
636 if (pfind(SCARG(uap, pid)) == NULL)
637 return ESRCH;
638
639 /*
640 * return the actual number of CPU, tag all of them as available
641 * The result is a mask, the first CPU being in the least significant
642 * bit.
643 */
644 ret = (1 << ncpu) - 1;
645 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
646 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
647 *retp = ret;
648
649 error = copyout(data, SCARG(uap, mask), SCARG(uap, len));
650
651 free(data, M_TEMP);
652
653 return error;
654
655 }
656
657 int
658 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
659 {
660 /* {
661 syscallarg(pid_t) pid;
662 syscallarg(unsigned int) len;
663 syscallarg(unsigned long *) mask;
664 } */
665
666 if (pfind(SCARG(uap, pid)) == NULL)
667 return ESRCH;
668
669 /* Let's ignore it */
670 #ifdef DEBUG_LINUX
671 printf("linux_sys_sched_setaffinity\n");
672 #endif
673 return 0;
674 };
675 #endif /* LINUX_NPTL */
Cache object: 6a485d710de3e548d37a5b777f4e4ea6
|