1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
38
39 #include <sys/kernel.h>
40 #include <sys/vm.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
44 #include <sys/user.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_DTRACE
61 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
63 #endif
64
65 extern void unix_syscall(x86_saved_state_t *);
66 extern void unix_syscall64(x86_saved_state_t *);
67 extern void *find_user_regs(thread_t);
68
69 extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
70 extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
71
72 /* dynamically generated at build time based on syscalls.master */
73 extern const char *syscallnames[];
74
75 /*
76 * This needs to be a single switch so that it's "all on" or "all off",
77 * rather than being turned on for some code paths and not others, as this
78 * has a tendency to introduce "blame the next guy" bugs.
79 */
80 #if DEBUG
81 #define FUNNEL_DEBUG 1 /* Check for funnel held on exit */
82 #endif
83
84 /*
85 * Function: unix_syscall
86 *
87 * Inputs: regs - pointer to i386 save area
88 *
89 * Outputs: none
90 */
91 void
92 unix_syscall(x86_saved_state_t *state)
93 {
94 thread_t thread;
95 void *vt;
96 unsigned int code;
97 struct sysent *callp;
98
99 int error;
100 vm_offset_t params;
101 struct proc *p;
102 struct uthread *uthread;
103 x86_saved_state32_t *regs;
104 boolean_t args_in_uthread;
105 boolean_t is_vfork;
106
107 assert(is_saved_state32(state));
108 regs = saved_state32(state);
109 #if DEBUG
110 if (regs->eax == 0x800)
111 thread_exception_return();
112 #endif
113 thread = current_thread();
114 uthread = get_bsdthread_info(thread);
115
116 /* Get the approriate proc; may be different from task's for vfork() */
117 is_vfork = uthread->uu_flag & UT_VFORK;
118 if (__improbable(is_vfork != 0))
119 p = current_proc();
120 else
121 p = (struct proc *)get_bsdtask_info(current_task());
122
123 /* Verify that we are not being called from a task without a proc */
124 if (__improbable(p == NULL)) {
125 regs->eax = EPERM;
126 regs->efl |= EFL_CF;
127 task_terminate_internal(current_task());
128 thread_exception_return();
129 /* NOTREACHED */
130 }
131
132 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
133 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
134 code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
135 args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread);
136 params = (vm_offset_t) (regs->uesp + sizeof (int));
137
138 regs->efl &= ~(EFL_CF);
139
140 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
141
142 if (__improbable(callp == sysent)) {
143 code = fuword(params);
144 params += sizeof(int);
145 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
146 }
147
148 vt = (void *)uthread->uu_arg;
149
150 if (callp->sy_arg_bytes != 0) {
151 sy_munge_t *mungerp;
152
153 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
154 if (!args_in_uthread)
155 {
156 uint32_t nargs;
157 nargs = callp->sy_arg_bytes;
158 error = copyin((user_addr_t) params, (char *) vt, nargs);
159 if (error) {
160 regs->eax = error;
161 regs->efl |= EFL_CF;
162 thread_exception_return();
163 /* NOTREACHED */
164 }
165 }
166
167 if (__probable(code != 180)) {
168 int *ip = (int *)vt;
169
170 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
171 *ip, *(ip+1), *(ip+2), *(ip+3), 0);
172 }
173 mungerp = callp->sy_arg_munge32;
174
175 /*
176 * If non-NULL, then call the syscall argument munger to
177 * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the
178 * first argument is NULL because we are munging in place
179 * after a copyin because the ABI currently doesn't use
180 * registers to pass system call arguments.
181 */
182 if (mungerp != NULL)
183 (*mungerp)(NULL, vt);
184 } else
185 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
186 0, 0, 0, 0, 0);
187
188 /*
189 * Delayed binding of thread credential to process credential, if we
190 * are not running with an explicitly set thread credential.
191 */
192 kauth_cred_uthread_update(uthread, p);
193
194 uthread->uu_rval[0] = 0;
195 uthread->uu_rval[1] = regs->edx;
196 uthread->uu_flag |= UT_NOTCANCELPT;
197
198
199 #ifdef JOE_DEBUG
200 uthread->uu_iocount = 0;
201 uthread->uu_vpindex = 0;
202 #endif
203
204 AUDIT_SYSCALL_ENTER(code, p, uthread);
205 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
206 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
207
208 #ifdef JOE_DEBUG
209 if (uthread->uu_iocount)
210 printf("system call returned with uu_iocount != 0\n");
211 #endif
212 #if CONFIG_DTRACE
213 uthread->t_dtrace_errno = error;
214 #endif /* CONFIG_DTRACE */
215
216 if (__improbable(error == ERESTART)) {
217 /*
218 * Move the user's pc back to repeat the syscall:
219 * 5 bytes for a sysenter, or 2 for an int 8x.
220 * The SYSENTER_TF_CS covers single-stepping over a sysenter
221 * - see debug trap handler in idt.s/idt64.s
222 */
223
224 pal_syscall_restart(thread, state);
225 }
226 else if (error != EJUSTRETURN) {
227 if (__improbable(error)) {
228 regs->eax = error;
229 regs->efl |= EFL_CF; /* carry bit */
230 } else { /* (not error) */
231 regs->eax = uthread->uu_rval[0];
232 regs->edx = uthread->uu_rval[1];
233 }
234 }
235
236 DEBUG_KPRINT_SYSCALL_UNIX(
237 "unix_syscall: error=%d retval=(%u,%u)\n",
238 error, regs->eax, regs->edx);
239
240 uthread->uu_flag &= ~UT_NOTCANCELPT;
241 #if FUNNEL_DEBUG
242 /*
243 * if we're holding the funnel panic
244 */
245 syscall_exit_funnelcheck();
246 #endif /* FUNNEL_DEBUG */
247
248 if (__improbable(uthread->uu_lowpri_window)) {
249 /*
250 * task is marked as a low priority I/O type
251 * and the I/O we issued while in this system call
252 * collided with normal I/O operations... we'll
253 * delay in order to mitigate the impact of this
254 * task on the normal operation of the system
255 */
256 throttle_lowpri_io(TRUE);
257 }
258 if (__probable(code != 180))
259 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
260 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
261
262 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
263 pal_execve_return(thread);
264 }
265
266 thread_exception_return();
267 /* NOTREACHED */
268 }
269
270
271 void
272 unix_syscall64(x86_saved_state_t *state)
273 {
274 thread_t thread;
275 unsigned int code;
276 struct sysent *callp;
277 void *uargp;
278 int args_in_regs;
279 int error;
280 struct proc *p;
281 struct uthread *uthread;
282 x86_saved_state64_t *regs;
283
284 assert(is_saved_state64(state));
285 regs = saved_state64(state);
286 #if DEBUG
287 if (regs->rax == 0x2000800)
288 thread_exception_return();
289 #endif
290 thread = current_thread();
291 uthread = get_bsdthread_info(thread);
292
293 /* Get the approriate proc; may be different from task's for vfork() */
294 if (__probable(!(uthread->uu_flag & UT_VFORK)))
295 p = (struct proc *)get_bsdtask_info(current_task());
296 else
297 p = current_proc();
298
299 /* Verify that we are not being called from a task without a proc */
300 if (__improbable(p == NULL)) {
301 regs->rax = EPERM;
302 regs->isf.rflags |= EFL_CF;
303 task_terminate_internal(current_task());
304 thread_exception_return();
305 /* NOTREACHED */
306 }
307 args_in_regs = 6;
308
309 code = regs->rax & SYSCALL_NUMBER_MASK;
310 DEBUG_KPRINT_SYSCALL_UNIX(
311 "unix_syscall64: code=%d(%s) rip=%llx\n",
312 code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
313 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
314 uargp = (void *)(®s->rdi);
315
316 if (__improbable(callp == sysent)) {
317 /*
318 * indirect system call... system call number
319 * passed as 'arg0'
320 */
321 code = regs->rdi;
322 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
323 uargp = (void *)(®s->rsi);
324 args_in_regs = 5;
325 }
326
327 if (callp->sy_narg != 0) {
328 if (code != 180) {
329 uint64_t *ip = (uint64_t *)uargp;
330
331 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
332 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
333 }
334 assert(callp->sy_narg <= 8);
335
336 if (__improbable(callp->sy_narg > args_in_regs)) {
337 int copyin_count;
338
339 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
340
341 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count);
342 if (error) {
343 regs->rax = error;
344 regs->isf.rflags |= EFL_CF;
345 thread_exception_return();
346 /* NOTREACHED */
347 }
348 }
349 /*
350 * XXX Turn 64 bit unsafe calls into nosys()
351 */
352 if (__improbable(callp->sy_flags & UNSAFE_64BIT)) {
353 callp = &sysent[63];
354 goto unsafe;
355 }
356 } else
357 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
358 0, 0, 0, 0, 0);
359 unsafe:
360
361 /*
362 * Delayed binding of thread credential to process credential, if we
363 * are not running with an explicitly set thread credential.
364 */
365 kauth_cred_uthread_update(uthread, p);
366
367 uthread->uu_rval[0] = 0;
368 uthread->uu_rval[1] = 0;
369
370
371 uthread->uu_flag |= UT_NOTCANCELPT;
372
373 #ifdef JOE_DEBUG
374 uthread->uu_iocount = 0;
375 uthread->uu_vpindex = 0;
376 #endif
377
378 AUDIT_SYSCALL_ENTER(code, p, uthread);
379 error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
380 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
381
382 #ifdef JOE_DEBUG
383 if (uthread->uu_iocount)
384 printf("system call returned with uu_iocount != 0\n");
385 #endif
386
387 #if CONFIG_DTRACE
388 uthread->t_dtrace_errno = error;
389 #endif /* CONFIG_DTRACE */
390
391 if (__improbable(error == ERESTART)) {
392 /*
393 * all system calls come through via the syscall instruction
394 * in 64 bit mode... its 2 bytes in length
395 * move the user's pc back to repeat the syscall:
396 */
397 pal_syscall_restart( thread, state );
398 }
399 else if (error != EJUSTRETURN) {
400 if (__improbable(error)) {
401 regs->rax = error;
402 regs->isf.rflags |= EFL_CF; /* carry bit */
403 } else { /* (not error) */
404
405 switch (callp->sy_return_type) {
406 case _SYSCALL_RET_INT_T:
407 regs->rax = uthread->uu_rval[0];
408 regs->rdx = uthread->uu_rval[1];
409 break;
410 case _SYSCALL_RET_UINT_T:
411 regs->rax = ((u_int)uthread->uu_rval[0]);
412 regs->rdx = ((u_int)uthread->uu_rval[1]);
413 break;
414 case _SYSCALL_RET_OFF_T:
415 case _SYSCALL_RET_ADDR_T:
416 case _SYSCALL_RET_SIZE_T:
417 case _SYSCALL_RET_SSIZE_T:
418 case _SYSCALL_RET_UINT64_T:
419 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
420 regs->rdx = 0;
421 break;
422 case _SYSCALL_RET_NONE:
423 break;
424 default:
425 panic("unix_syscall: unknown return type");
426 break;
427 }
428 regs->isf.rflags &= ~EFL_CF;
429 }
430 }
431
432 DEBUG_KPRINT_SYSCALL_UNIX(
433 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
434 error, regs->rax, regs->rdx);
435
436 uthread->uu_flag &= ~UT_NOTCANCELPT;
437
438 #if FUNNEL_DEBUG
439 /*
440 * if we're holding the funnel panic
441 */
442 syscall_exit_funnelcheck();
443 #endif /* FUNNEL_DEBUG */
444
445 if (__improbable(uthread->uu_lowpri_window)) {
446 /*
447 * task is marked as a low priority I/O type
448 * and the I/O we issued while in this system call
449 * collided with normal I/O operations... we'll
450 * delay in order to mitigate the impact of this
451 * task on the normal operation of the system
452 */
453 throttle_lowpri_io(TRUE);
454 }
455 if (__probable(code != 180))
456 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
457 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
458
459 thread_exception_return();
460 /* NOTREACHED */
461 }
462
463
464 void
465 unix_syscall_return(int error)
466 {
467 thread_t thread;
468 struct uthread *uthread;
469 struct proc *p;
470 unsigned int code;
471 vm_offset_t params;
472 struct sysent *callp;
473
474 thread = current_thread();
475 uthread = get_bsdthread_info(thread);
476
477 pal_register_cache_state(thread, DIRTY);
478
479 p = current_proc();
480
481 if (proc_is64bit(p)) {
482 x86_saved_state64_t *regs;
483
484 regs = saved_state64(find_user_regs(thread));
485
486 /* reconstruct code for tracing before blasting rax */
487 code = regs->rax & SYSCALL_NUMBER_MASK;
488 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
489
490 if (callp == sysent)
491 /*
492 * indirect system call... system call number
493 * passed as 'arg0'
494 */
495 code = regs->rdi;
496
497 #if CONFIG_DTRACE
498 if (callp->sy_call == dtrace_systrace_syscall)
499 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
500 #endif /* CONFIG_DTRACE */
501 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
502
503 if (error == ERESTART) {
504 /*
505 * repeat the syscall
506 */
507 pal_syscall_restart( thread, find_user_regs(thread) );
508 }
509 else if (error != EJUSTRETURN) {
510 if (error) {
511 regs->rax = error;
512 regs->isf.rflags |= EFL_CF; /* carry bit */
513 } else { /* (not error) */
514
515 switch (callp->sy_return_type) {
516 case _SYSCALL_RET_INT_T:
517 regs->rax = uthread->uu_rval[0];
518 regs->rdx = uthread->uu_rval[1];
519 break;
520 case _SYSCALL_RET_UINT_T:
521 regs->rax = ((u_int)uthread->uu_rval[0]);
522 regs->rdx = ((u_int)uthread->uu_rval[1]);
523 break;
524 case _SYSCALL_RET_OFF_T:
525 case _SYSCALL_RET_ADDR_T:
526 case _SYSCALL_RET_SIZE_T:
527 case _SYSCALL_RET_SSIZE_T:
528 case _SYSCALL_RET_UINT64_T:
529 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
530 regs->rdx = 0;
531 break;
532 case _SYSCALL_RET_NONE:
533 break;
534 default:
535 panic("unix_syscall: unknown return type");
536 break;
537 }
538 regs->isf.rflags &= ~EFL_CF;
539 }
540 }
541 DEBUG_KPRINT_SYSCALL_UNIX(
542 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
543 error, regs->rax, regs->rdx);
544 } else {
545 x86_saved_state32_t *regs;
546
547 regs = saved_state32(find_user_regs(thread));
548
549 regs->efl &= ~(EFL_CF);
550 /* reconstruct code for tracing before blasting eax */
551 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
552 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
553
554 #if CONFIG_DTRACE
555 if (callp->sy_call == dtrace_systrace_syscall)
556 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
557 #endif /* CONFIG_DTRACE */
558 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
559
560 if (callp == sysent) {
561 params = (vm_offset_t) (regs->uesp + sizeof (int));
562 code = fuword(params);
563 }
564 if (error == ERESTART) {
565 pal_syscall_restart( thread, find_user_regs(thread) );
566 }
567 else if (error != EJUSTRETURN) {
568 if (error) {
569 regs->eax = error;
570 regs->efl |= EFL_CF; /* carry bit */
571 } else { /* (not error) */
572 regs->eax = uthread->uu_rval[0];
573 regs->edx = uthread->uu_rval[1];
574 }
575 }
576 DEBUG_KPRINT_SYSCALL_UNIX(
577 "unix_syscall_return: error=%d retval=(%u,%u)\n",
578 error, regs->eax, regs->edx);
579 }
580
581
582 uthread->uu_flag &= ~UT_NOTCANCELPT;
583
584 #if FUNNEL_DEBUG
585 /*
586 * if we're holding the funnel panic
587 */
588 syscall_exit_funnelcheck();
589 #endif /* FUNNEL_DEBUG */
590
591 if (uthread->uu_lowpri_window) {
592 /*
593 * task is marked as a low priority I/O type
594 * and the I/O we issued while in this system call
595 * collided with normal I/O operations... we'll
596 * delay in order to mitigate the impact of this
597 * task on the normal operation of the system
598 */
599 throttle_lowpri_io(TRUE);
600 }
601 if (code != 180)
602 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
603 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
604
605 thread_exception_return();
606 /* NOTREACHED */
607 }
608
609 void
610 munge_wwwlww(
611 __unused const void *in32,
612 void *out64)
613 {
614 uint32_t *arg32;
615 uint64_t *arg64;
616
617 /* we convert in place in out64 */
618 arg32 = (uint32_t *) out64;
619 arg64 = (uint64_t *) out64;
620
621 arg64[5] = arg32[6]; /* wwwlwW */
622 arg64[4] = arg32[5]; /* wwwlWw */
623 arg32[7] = arg32[4]; /* wwwLww (hi) */
624 arg32[6] = arg32[3]; /* wwwLww (lo) */
625 arg64[2] = arg32[2]; /* wwWlww */
626 arg64[1] = arg32[1]; /* wWwlww */
627 arg64[0] = arg32[0]; /* Wwwlww */
628 }
629
630
631 void
632 munge_wwlwww(
633 __unused const void *in32,
634 void *out64)
635 {
636 uint32_t *arg32;
637 uint64_t *arg64;
638
639 /* we convert in place in out64 */
640 arg32 = (uint32_t *) out64;
641 arg64 = (uint64_t *) out64;
642
643 arg64[5] = arg32[6]; /* wwlwwW */
644 arg64[4] = arg32[5]; /* wwlwWw */
645 arg64[3] = arg32[4]; /* wwlWww */
646 arg32[5] = arg32[3]; /* wwLwww (hi) */
647 arg32[4] = arg32[2]; /* wwLwww (lo) */
648 arg64[1] = arg32[1]; /* wWlwww */
649 arg64[0] = arg32[0]; /* Wwlwww */
650 }
651
Cache object: 9f6d13d5fb4b69127e05c04dc2f87fea
|