1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/kauth.h>
82 #include <sys/file_internal.h>
83 #include <sys/guarded.h>
84 #include <sys/priv.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/stat.h>
88 #include <sys/ioctl.h>
89 #include <sys/fcntl.h>
90 #include <sys/fsctl.h>
91 #include <sys/malloc.h>
92 #include <sys/mman.h>
93 #include <sys/mount.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/aio_kern.h>
98 #include <sys/ev.h>
99 #include <kern/locks.h>
100 #include <sys/uio_internal.h>
101 #include <sys/codesign.h>
102 #include <sys/codedir_internal.h>
103 #include <sys/mount_internal.h>
104 #include <sys/kdebug.h>
105 #include <sys/sysproto.h>
106 #include <sys/pipe.h>
107 #include <sys/spawn.h>
108 #include <sys/cprotect.h>
109 #include <sys/ubc_internal.h>
110
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/waitq.h>
114 #include <kern/ipc_misc.h>
115 #include <kern/ast.h>
116
117 #include <vm/vm_protos.h>
118 #include <mach/mach_port.h>
119
120 #include <security/audit/audit.h>
121 #if CONFIG_MACF
122 #include <security/mac_framework.h>
123 #endif
124
125 #include <stdbool.h>
126 #include <os/atomic_private.h>
127 #include <os/overflow.h>
128 #include <IOKit/IOBSD.h>
129
130 #define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
131 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
132 mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
133 void ipc_port_release_send(ipc_port_t);
134
135 void fileport_releasefg(struct fileglob *fg);
136
137 /* flags for fp_close_and_unlock */
138 #define FD_DUP2RESV 1
139
140 /* We don't want these exported */
141
142 __private_extern__
143 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
144
145 /* Conflict wait queue for when selects collide (opaque type) */
146 extern struct waitq select_conflict_queue;
147
148 #define f_flag fp_glob->fg_flag
149 #define f_type fp_glob->fg_ops->fo_type
150 #define f_cred fp_glob->fg_cred
151 #define f_ops fp_glob->fg_ops
152 #define f_offset fp_glob->fg_offset
153
154 ZONE_DEFINE_TYPE(fg_zone, "fileglob", struct fileglob, ZC_ZFREE_CLEARMEM);
155 ZONE_DEFINE_ID(ZONE_ID_FILEPROC, "fileproc", struct fileproc, ZC_ZFREE_CLEARMEM);
156
157 /*
158 * If you need accounting for KM_OFILETABL consider using
159 * KALLOC_HEAP_DEFINE to define a view.
160 */
161 #define KM_OFILETABL KHEAP_DEFAULT
162
163 /*
164 * rdar://88960128
165 */
166 #define fd_alloc_files(n_files, flags) \
167 __typed_allocators_ignore_push \
168 kheap_alloc(KM_OFILETABL, n_files * OFILESIZE, flags) \
169 __typed_allocators_ignore_pop
170
171 #define fd_free_files(files, n_files) \
172 __typed_allocators_ignore_push \
173 kheap_free(KM_OFILETABL, ofiles, n_files * OFILESIZE) \
174 __typed_allocators_ignore_pop
175
176 /*
177 * Descriptor management.
178 */
179 int nfiles; /* actual number of open files */
180 /*
181 * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
182 */
183 static const struct fileops uninitops;
184
185 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
186 static LCK_GRP_DECLARE(file_lck_grp, "file");
187
188
189 #pragma mark fileglobs
190
191 /*!
192 * @function fg_free
193 *
194 * @brief
195 * Free a file structure.
196 */
197 static void
198 fg_free(struct fileglob *fg)
199 {
200 os_atomic_dec(&nfiles, relaxed);
201
202 if (fg->fg_vn_data) {
203 fg_vn_data_free(fg->fg_vn_data);
204 fg->fg_vn_data = NULL;
205 }
206
207 kauth_cred_t cred = fg->fg_cred;
208 if (IS_VALID_CRED(cred)) {
209 kauth_cred_unref(&cred);
210 fg->fg_cred = NOCRED;
211 }
212 lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
213
214 #if CONFIG_MACF && CONFIG_VNGUARD
215 vng_file_label_destroy(fg);
216 #endif
217 zfree(fg_zone, fg);
218 }
219
220 OS_ALWAYS_INLINE
221 void
222 fg_ref(proc_t p, struct fileglob *fg)
223 {
224 #if DEBUG || DEVELOPMENT
225 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
226 #else
227 (void)p;
228 #endif
229 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
230 }
231
232 void
233 fg_drop_live(struct fileglob *fg)
234 {
235 os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
236 }
237
238 int
239 fg_drop(proc_t p, struct fileglob *fg)
240 {
241 struct vnode *vp;
242 struct vfs_context context;
243 int error = 0;
244
245 if (fg == NULL) {
246 return 0;
247 }
248
249 /* Set up context with cred stashed in fg */
250 if (p == current_proc()) {
251 context.vc_thread = current_thread();
252 } else {
253 context.vc_thread = NULL;
254 }
255 context.vc_ucred = fg->fg_cred;
256
257 /*
258 * POSIX record locking dictates that any close releases ALL
259 * locks owned by this process. This is handled by setting
260 * a flag in the unlock to free ONLY locks obeying POSIX
261 * semantics, and not to free BSD-style file locks.
262 * If the descriptor was in a message, POSIX-style locks
263 * aren't passed with the descriptor.
264 */
265 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
266 (p->p_ladvflag & P_LADVLOCK)) {
267 struct flock lf = {
268 .l_whence = SEEK_SET,
269 .l_type = F_UNLCK,
270 };
271
272 vp = (struct vnode *)fg_get_data(fg);
273 if ((error = vnode_getwithref(vp)) == 0) {
274 (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
275 (void)vnode_put(vp);
276 }
277 }
278
279 if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
280 /*
281 * Since we ensure that fg->fg_ops is always initialized,
282 * it is safe to invoke fo_close on the fg
283 */
284 error = fo_close(fg, &context);
285
286 fg_free(fg);
287 }
288
289 return error;
290 }
291
292 inline
293 void
294 fg_set_data(
295 struct fileglob *fg,
296 void *fg_data)
297 {
298 uintptr_t *store = &fg->fg_data;
299
300 #if __has_feature(ptrauth_calls)
301 int type = FILEGLOB_DTYPE(fg);
302
303 if (fg_data) {
304 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
305 fg_data = ptrauth_sign_unauthenticated(fg_data,
306 ptrauth_key_process_independent_data,
307 ptrauth_blend_discriminator(store, type));
308 }
309 #endif // __has_feature(ptrauth_calls)
310
311 *store = (uintptr_t)fg_data;
312 }
313
314 inline
315 void *
316 fg_get_data_volatile(struct fileglob *fg)
317 {
318 uintptr_t *store = &fg->fg_data;
319 void *fg_data = (void *)*store;
320
321 #if __has_feature(ptrauth_calls)
322 int type = FILEGLOB_DTYPE(fg);
323
324 if (fg_data) {
325 type ^= OS_PTRAUTH_DISCRIMINATOR("fileglob.fg_data");
326 fg_data = ptrauth_auth_data(fg_data,
327 ptrauth_key_process_independent_data,
328 ptrauth_blend_discriminator(store, type));
329 }
330 #endif // __has_feature(ptrauth_calls)
331
332 return fg_data;
333 }
334
335 static void
336 fg_transfer_filelocks(proc_t p, struct fileglob *fg, thread_t thread)
337 {
338 struct vnode *vp;
339 struct vfs_context context;
340 struct proc *old_proc = current_proc();
341
342 assert(fg != NULL);
343
344 assert(p != old_proc);
345 context.vc_thread = thread;
346 context.vc_ucred = fg->fg_cred;
347
348 /* Transfer all POSIX Style locks to new proc */
349 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
350 (p->p_ladvflag & P_LADVLOCK)) {
351 struct flock lf = {
352 .l_whence = SEEK_SET,
353 .l_start = 0,
354 .l_len = 0,
355 .l_type = F_TRANSFER,
356 };
357
358 vp = (struct vnode *)fg_get_data(fg);
359 if (vnode_getwithref(vp) == 0) {
360 (void)VNOP_ADVLOCK(vp, (caddr_t)old_proc, F_TRANSFER, &lf, F_POSIX, &context, NULL);
361 (void)vnode_put(vp);
362 }
363 }
364
365 /* Transfer all OFD Style locks to new proc */
366 if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
367 (fg->fg_lflags & FG_HAS_OFDLOCK)) {
368 struct flock lf = {
369 .l_whence = SEEK_SET,
370 .l_start = 0,
371 .l_len = 0,
372 .l_type = F_TRANSFER,
373 };
374
375 vp = (struct vnode *)fg_get_data(fg);
376 if (vnode_getwithref(vp) == 0) {
377 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_TRANSFER, &lf, F_OFD_LOCK, &context, NULL);
378 (void)vnode_put(vp);
379 }
380 }
381 return;
382 }
383
384 bool
385 fg_sendable(struct fileglob *fg)
386 {
387 switch (FILEGLOB_DTYPE(fg)) {
388 case DTYPE_VNODE:
389 case DTYPE_SOCKET:
390 case DTYPE_PIPE:
391 case DTYPE_PSXSHM:
392 case DTYPE_NETPOLICY:
393 return (fg->fg_lflags & FG_CONFINED) == 0;
394
395 default:
396 return false;
397 }
398 }
399
400 #pragma mark file descriptor table (static helpers)
401
402 static void
403 procfdtbl_reservefd(struct proc * p, int fd)
404 {
405 p->p_fd.fd_ofiles[fd] = NULL;
406 p->p_fd.fd_ofileflags[fd] |= UF_RESERVED;
407 }
408
409 void
410 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
411 {
412 if (fp != NULL) {
413 p->p_fd.fd_ofiles[fd] = fp;
414 }
415 p->p_fd.fd_ofileflags[fd] &= ~UF_RESERVED;
416 if ((p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
417 p->p_fd.fd_ofileflags[fd] &= ~UF_RESVWAIT;
418 wakeup(&p->p_fd);
419 }
420 }
421
422 static void
423 procfdtbl_waitfd(struct proc * p, int fd)
424 {
425 p->p_fd.fd_ofileflags[fd] |= UF_RESVWAIT;
426 msleep(&p->p_fd, &p->p_fd.fd_lock, PRIBIO, "ftbl_waitfd", NULL);
427 }
428
429 static void
430 procfdtbl_clearfd(struct proc * p, int fd)
431 {
432 int waiting;
433
434 waiting = (p->p_fd.fd_ofileflags[fd] & UF_RESVWAIT);
435 p->p_fd.fd_ofiles[fd] = NULL;
436 p->p_fd.fd_ofileflags[fd] = 0;
437 if (waiting == UF_RESVWAIT) {
438 wakeup(&p->p_fd);
439 }
440 }
441
442 /*
443 * fdrelse
444 *
445 * Description: Inline utility function to free an fd in a filedesc
446 *
447 * Parameters: fdp Pointer to filedesc fd lies in
448 * fd fd to free
449 * reserv fd should be reserved
450 *
451 * Returns: void
452 *
453 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
454 * the caller
455 */
456 void
457 fdrelse(struct proc * p, int fd)
458 {
459 struct filedesc *fdp = &p->p_fd;
460 int nfd = 0;
461
462 if (fd < fdp->fd_freefile) {
463 fdp->fd_freefile = fd;
464 }
465 #if DIAGNOSTIC
466 if (fd >= fdp->fd_afterlast) {
467 panic("fdrelse: fd_afterlast inconsistent");
468 }
469 #endif
470 procfdtbl_clearfd(p, fd);
471
472 nfd = fdp->fd_afterlast;
473 while (nfd > 0 && fdp->fd_ofiles[nfd - 1] == NULL &&
474 !(fdp->fd_ofileflags[nfd - 1] & UF_RESERVED)) {
475 nfd--;
476 }
477 fdp->fd_afterlast = nfd;
478
479 #if CONFIG_PROC_RESOURCE_LIMITS
480 fdp->fd_nfiles_open--;
481 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
482 }
483
484
485 /*
486 * finishdup
487 *
488 * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
489 *
490 * Parameters: p Process performing the dup
491 * old The fd to dup
492 * new The fd to dup it to
493 * fp_flags Flags to augment the new fp
494 * retval Pointer to the call return area
495 *
496 * Returns: 0 Success
497 * EBADF
498 * ENOMEM
499 *
500 * Implicit returns:
501 * *retval (modified) The new descriptor
502 *
503 * Locks: Assumes proc_fdlock for process pointing to fdp is held by
504 * the caller
505 *
506 * Notes: This function may drop and reacquire this lock; it is unsafe
507 * for a caller to assume that other state protected by the lock
508 * has not been subsequently changed out from under it.
509 */
510 static int
511 finishdup(proc_t p, struct filedesc *fdp, int old, int new,
512 fileproc_flags_t fp_flags, int32_t *retval)
513 {
514 struct fileproc *nfp;
515 struct fileproc *ofp;
516 #if CONFIG_MACF
517 int error;
518 kauth_cred_t cred;
519 #endif
520
521 #if DIAGNOSTIC
522 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
523 #endif
524 if ((ofp = fdp->fd_ofiles[old]) == NULL ||
525 (fdp->fd_ofileflags[old] & UF_RESERVED)) {
526 fdrelse(p, new);
527 return EBADF;
528 }
529
530 #if CONFIG_MACF
531 cred = kauth_cred_proc_ref(p);
532 error = mac_file_check_dup(cred, ofp->fp_glob, new);
533 kauth_cred_unref(&cred);
534
535 if (error) {
536 fdrelse(p, new);
537 return error;
538 }
539 #endif
540
541 fg_ref(p, ofp->fp_glob);
542
543 proc_fdunlock(p);
544
545 nfp = fileproc_alloc_init();
546
547 if (fp_flags) {
548 nfp->fp_flags |= fp_flags;
549 }
550 nfp->fp_glob = ofp->fp_glob;
551
552 proc_fdlock(p);
553
554 #if DIAGNOSTIC
555 if (fdp->fd_ofiles[new] != 0) {
556 panic("finishdup: overwriting fd_ofiles with new %d", new);
557 }
558 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
559 panic("finishdup: unreserved fileflags with new %d", new);
560 }
561 #endif
562
563 if (new >= fdp->fd_afterlast) {
564 fdp->fd_afterlast = new + 1;
565 }
566 procfdtbl_releasefd(p, new, nfp);
567 *retval = new;
568 return 0;
569 }
570
571
572 #pragma mark file descriptor table (exported functions)
573
574 void
575 proc_dirs_lock_shared(proc_t p)
576 {
577 lck_rw_lock_shared(&p->p_fd.fd_dirs_lock);
578 }
579
580 void
581 proc_dirs_unlock_shared(proc_t p)
582 {
583 lck_rw_unlock_shared(&p->p_fd.fd_dirs_lock);
584 }
585
586 void
587 proc_dirs_lock_exclusive(proc_t p)
588 {
589 lck_rw_lock_exclusive(&p->p_fd.fd_dirs_lock);
590 }
591
592 void
593 proc_dirs_unlock_exclusive(proc_t p)
594 {
595 lck_rw_unlock_exclusive(&p->p_fd.fd_dirs_lock);
596 }
597
598 /*
599 * proc_fdlock, proc_fdlock_spin
600 *
601 * Description: Lock to control access to the per process struct fileproc
602 * and struct filedesc
603 *
604 * Parameters: p Process to take the lock on
605 *
606 * Returns: void
607 *
608 * Notes: The lock is initialized in forkproc() and destroyed in
609 * reap_child_process().
610 */
611 void
612 proc_fdlock(proc_t p)
613 {
614 lck_mtx_lock(&p->p_fd.fd_lock);
615 }
616
617 void
618 proc_fdlock_spin(proc_t p)
619 {
620 lck_mtx_lock_spin(&p->p_fd.fd_lock);
621 }
622
623 void
624 proc_fdlock_assert(proc_t p, int assertflags)
625 {
626 lck_mtx_assert(&p->p_fd.fd_lock, assertflags);
627 }
628
629
630 /*
631 * proc_fdunlock
632 *
633 * Description: Unlock the lock previously locked by a call to proc_fdlock()
634 *
635 * Parameters: p Process to drop the lock on
636 *
637 * Returns: void
638 */
639 void
640 proc_fdunlock(proc_t p)
641 {
642 lck_mtx_unlock(&p->p_fd.fd_lock);
643 }
644
645 bool
646 fdt_available_locked(proc_t p, int n)
647 {
648 struct filedesc *fdp = &p->p_fd;
649 struct fileproc **fpp;
650 char *flags;
651 int i;
652 int lim = proc_limitgetcur_nofile(p);
653
654 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
655 return true;
656 }
657 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
658 flags = &fdp->fd_ofileflags[fdp->fd_freefile];
659 for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
660 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
661 return true;
662 }
663 }
664 return false;
665 }
666
667
668 struct fdt_iterator
669 fdt_next(proc_t p, int fd, bool only_settled)
670 {
671 struct fdt_iterator it;
672 struct filedesc *fdp = &p->p_fd;
673 struct fileproc *fp;
674 int nfds = fdp->fd_afterlast;
675
676 while (++fd < nfds) {
677 fp = fdp->fd_ofiles[fd];
678 if (fp == NULL || fp->fp_glob == NULL) {
679 continue;
680 }
681 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
682 continue;
683 }
684 it.fdti_fd = fd;
685 it.fdti_fp = fp;
686 return it;
687 }
688
689 it.fdti_fd = nfds;
690 it.fdti_fp = NULL;
691 return it;
692 }
693
694 struct fdt_iterator
695 fdt_prev(proc_t p, int fd, bool only_settled)
696 {
697 struct fdt_iterator it;
698 struct filedesc *fdp = &p->p_fd;
699 struct fileproc *fp;
700
701 while (--fd >= 0) {
702 fp = fdp->fd_ofiles[fd];
703 if (fp == NULL || fp->fp_glob == NULL) {
704 continue;
705 }
706 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
707 continue;
708 }
709 it.fdti_fd = fd;
710 it.fdti_fp = fp;
711 return it;
712 }
713
714 it.fdti_fd = -1;
715 it.fdti_fp = NULL;
716 return it;
717 }
718
719 void
720 fdt_init(proc_t p)
721 {
722 struct filedesc *fdp = &p->p_fd;
723
724 lck_mtx_init(&fdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
725 lck_mtx_init(&fdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
726 lck_mtx_init(&fdp->fd_lock, &proc_fdmlock_grp, &proc_lck_attr);
727 lck_rw_init(&fdp->fd_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
728 }
729
730 void
731 fdt_destroy(proc_t p)
732 {
733 struct filedesc *fdp = &p->p_fd;
734
735 lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
736 lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
737 lck_mtx_destroy(&fdp->fd_lock, &proc_fdmlock_grp);
738 lck_rw_destroy(&fdp->fd_dirs_lock, &proc_dirslock_grp);
739 }
740
741 void
742 fdt_exec(proc_t p, short posix_spawn_flags, thread_t thread, bool in_exec)
743 {
744 struct filedesc *fdp = &p->p_fd;
745 thread_t self = current_thread();
746 struct uthread *ut = get_bsdthread_info(self);
747 struct kqworkq *dealloc_kqwq = NULL;
748
749 /*
750 * If the current thread is bound as a workq/workloop
751 * servicing thread, we need to unbind it first.
752 */
753 if (ut->uu_kqr_bound && get_bsdthreadtask_info(self) == p) {
754 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
755 }
756
757 /*
758 * Deallocate the knotes for this process
759 * and mark the tables non-existent so
760 * subsequent kqueue closes go faster.
761 */
762 knotes_dealloc(p);
763 assert(fdp->fd_knlistsize == 0);
764 assert(fdp->fd_knhashmask == 0);
765
766 proc_fdlock(p);
767
768 /* Set the P_LADVLOCK flag if the flag set on old proc */
769 if (in_exec && (current_proc()->p_ladvflag & P_LADVLOCK)) {
770 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
771 }
772
773 for (int i = fdp->fd_afterlast; i-- > 0;) {
774 struct fileproc *fp = fdp->fd_ofiles[i];
775 char *flagp = &fdp->fd_ofileflags[i];
776 bool inherit_file = true;
777
778 if (fp == FILEPROC_NULL) {
779 continue;
780 }
781
782 /*
783 * no file descriptor should be in flux when in exec,
784 * because we stopped all other threads
785 */
786 if (*flagp & ~UF_INHERIT) {
787 panic("file %d/%p in flux during exec of %p", i, fp, p);
788 }
789
790 if (fp->fp_flags & FP_CLOEXEC) {
791 inherit_file = false;
792 } else if ((posix_spawn_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) &&
793 !(*flagp & UF_INHERIT)) {
794 /*
795 * Reverse the usual semantics of file descriptor
796 * inheritance - all of them should be closed
797 * except files marked explicitly as "inherit" and
798 * not marked close-on-exec.
799 */
800 inherit_file = false;
801 #if CONFIG_MACF
802 } else if (mac_file_check_inherit(proc_ucred(p), fp->fp_glob)) {
803 inherit_file = false;
804 #endif
805 }
806
807 *flagp = 0; /* clear UF_INHERIT */
808
809 if (!inherit_file) {
810 fp_close_and_unlock(p, i, fp, 0);
811 proc_fdlock(p);
812 } else if (in_exec) {
813 /* Transfer F_POSIX style lock to new proc */
814 proc_fdunlock(p);
815 fg_transfer_filelocks(p, fp->fp_glob, thread);
816 proc_fdlock(p);
817 }
818 }
819
820 /* release the per-process workq kq */
821 if (fdp->fd_wqkqueue) {
822 dealloc_kqwq = fdp->fd_wqkqueue;
823 fdp->fd_wqkqueue = NULL;
824 }
825
826 proc_fdunlock(p);
827
828 /* Anything to free? */
829 if (dealloc_kqwq) {
830 kqworkq_dealloc(dealloc_kqwq);
831 }
832 }
833
834
835 int
836 fdt_fork(struct filedesc *newfdp, proc_t p, vnode_t uth_cdir, bool in_exec)
837 {
838 struct filedesc *fdp = &p->p_fd;
839 struct fileproc **ofiles;
840 char *ofileflags;
841 int n_files, afterlast, freefile;
842 vnode_t v_dir;
843 #if CONFIG_PROC_RESOURCE_LIMITS
844 int fd_nfiles_open = 0;
845 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
846 proc_fdlock(p);
847
848 newfdp->fd_flags = (fdp->fd_flags & FILEDESC_FORK_INHERITED_MASK);
849 newfdp->fd_cmask = fdp->fd_cmask;
850 #if CONFIG_PROC_RESOURCE_LIMITS
851 newfdp->fd_nfiles_soft_limit = fdp->fd_nfiles_soft_limit;
852 newfdp->fd_nfiles_hard_limit = fdp->fd_nfiles_hard_limit;
853 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
854
855 /*
856 * For both fd_cdir and fd_rdir make sure we get
857 * a valid reference... if we can't, than set
858 * set the pointer(s) to NULL in the child... this
859 * will keep us from using a non-referenced vp
860 * and allows us to do the vnode_rele only on
861 * a properly referenced vp
862 */
863 if ((v_dir = fdp->fd_rdir)) {
864 if (vnode_getwithref(v_dir) == 0) {
865 if (vnode_ref(v_dir) == 0) {
866 newfdp->fd_rdir = v_dir;
867 }
868 vnode_put(v_dir);
869 }
870 if (newfdp->fd_rdir == NULL) {
871 /*
872 * We couldn't get a new reference on
873 * the chroot directory being
874 * inherited... this is fatal, since
875 * otherwise it would constitute an
876 * escape from a chroot environment by
877 * the new process.
878 */
879 proc_fdunlock(p);
880 return EPERM;
881 }
882 }
883
884 /*
885 * If we are running with per-thread current working directories,
886 * inherit the new current working directory from the current thread.
887 */
888 if ((v_dir = uth_cdir ? uth_cdir : fdp->fd_cdir)) {
889 if (vnode_getwithref(v_dir) == 0) {
890 if (vnode_ref(v_dir) == 0) {
891 newfdp->fd_cdir = v_dir;
892 }
893 vnode_put(v_dir);
894 }
895 if (newfdp->fd_cdir == NULL && v_dir == fdp->fd_cdir) {
896 /*
897 * we couldn't get a new reference on
898 * the current working directory being
899 * inherited... we might as well drop
900 * our reference from the parent also
901 * since the vnode has gone DEAD making
902 * it useless... by dropping it we'll
903 * be that much closer to recycling it
904 */
905 vnode_rele(fdp->fd_cdir);
906 fdp->fd_cdir = NULL;
907 }
908 }
909
910 /*
911 * If the number of open files fits in the internal arrays
912 * of the open file structure, use them, otherwise allocate
913 * additional memory for the number of descriptors currently
914 * in use.
915 */
916 afterlast = fdp->fd_afterlast;
917 freefile = fdp->fd_freefile;
918 if (afterlast <= NDFILE) {
919 n_files = NDFILE;
920 } else {
921 n_files = roundup(afterlast, NDEXTENT);
922 }
923
924 proc_fdunlock(p);
925
926 ofiles = fd_alloc_files(n_files, Z_WAITOK | Z_ZERO);
927 if (ofiles == NULL) {
928 if (newfdp->fd_cdir) {
929 vnode_rele(newfdp->fd_cdir);
930 newfdp->fd_cdir = NULL;
931 }
932 if (newfdp->fd_rdir) {
933 vnode_rele(newfdp->fd_rdir);
934 newfdp->fd_rdir = NULL;
935 }
936 return ENOMEM;
937 }
938 ofileflags = (char *)&ofiles[n_files];
939
940 proc_fdlock(p);
941
942 for (int i = afterlast; i-- > 0;) {
943 struct fileproc *ofp, *nfp;
944 char flags;
945
946 ofp = fdp->fd_ofiles[i];
947 flags = fdp->fd_ofileflags[i];
948
949 if (ofp == NULL ||
950 (ofp->fp_glob->fg_lflags & FG_CONFINED) ||
951 ((ofp->fp_flags & FP_CLOFORK) && !in_exec) ||
952 ((ofp->fp_flags & FP_CLOEXEC) && in_exec) ||
953 (flags & UF_RESERVED)) {
954 if (i + 1 == afterlast) {
955 afterlast = i;
956 }
957 if (i < freefile) {
958 freefile = i;
959 }
960
961 continue;
962 }
963
964 nfp = fileproc_alloc_init();
965 nfp->fp_glob = ofp->fp_glob;
966 if (in_exec) {
967 nfp->fp_flags = (ofp->fp_flags & (FP_CLOEXEC | FP_CLOFORK));
968 if (ofp->fp_guard_attrs) {
969 guarded_fileproc_copy_guard(ofp, nfp);
970 }
971 } else {
972 assert(ofp->fp_guard_attrs == 0);
973 nfp->fp_flags = (ofp->fp_flags & FP_CLOEXEC);
974 }
975 fg_ref(p, nfp->fp_glob);
976
977 ofiles[i] = nfp;
978 #if CONFIG_PROC_RESOURCE_LIMITS
979 fd_nfiles_open++;
980 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
981 }
982
983 proc_fdunlock(p);
984
985 newfdp->fd_ofiles = ofiles;
986 newfdp->fd_ofileflags = ofileflags;
987 newfdp->fd_nfiles = n_files;
988 newfdp->fd_afterlast = afterlast;
989 newfdp->fd_freefile = freefile;
990
991 #if CONFIG_PROC_RESOURCE_LIMITS
992 newfdp->fd_nfiles_open = fd_nfiles_open;
993 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
994
995 return 0;
996 }
997
998 void
999 fdt_invalidate(proc_t p)
1000 {
1001 struct filedesc *fdp = &p->p_fd;
1002 struct fileproc *fp, **ofiles;
1003 struct kqworkq *kqwq = NULL;
1004 vnode_t vn1 = NULL, vn2 = NULL;
1005 struct kqwllist *kqhash = NULL;
1006 u_long kqhashmask = 0;
1007 int n_files = 0;
1008
1009 /*
1010 * deallocate all the knotes up front and claim empty
1011 * tables to make any subsequent kqueue closes faster.
1012 */
1013 knotes_dealloc(p);
1014 assert(fdp->fd_knlistsize == 0);
1015 assert(fdp->fd_knhashmask == 0);
1016
1017 /*
1018 * dealloc all workloops that have outstanding retains
1019 * when created with scheduling parameters.
1020 */
1021 kqworkloops_dealloc(p);
1022
1023 proc_fdlock(p);
1024
1025 /* close file descriptors */
1026 if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
1027 for (int i = fdp->fd_afterlast; i-- > 0;) {
1028 if ((fp = fdp->fd_ofiles[i]) != NULL) {
1029 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
1030 panic("fdfree: found fp with UF_RESERVED");
1031 }
1032 fp_close_and_unlock(p, i, fp, 0);
1033 proc_fdlock(p);
1034 }
1035 }
1036 }
1037
1038 n_files = fdp->fd_nfiles;
1039 ofiles = fdp->fd_ofiles;
1040 kqwq = fdp->fd_wqkqueue;
1041 vn1 = fdp->fd_cdir;
1042 vn2 = fdp->fd_rdir;
1043
1044 fdp->fd_ofileflags = NULL;
1045 fdp->fd_ofiles = NULL;
1046 fdp->fd_nfiles = 0;
1047 fdp->fd_wqkqueue = NULL;
1048 fdp->fd_cdir = NULL;
1049 fdp->fd_rdir = NULL;
1050
1051 proc_fdunlock(p);
1052
1053 lck_mtx_lock(&fdp->fd_knhashlock);
1054
1055 kqhash = fdp->fd_kqhash;
1056 kqhashmask = fdp->fd_kqhashmask;
1057
1058 fdp->fd_kqhash = 0;
1059 fdp->fd_kqhashmask = 0;
1060
1061 lck_mtx_unlock(&fdp->fd_knhashlock);
1062
1063 fd_free_files(ofiles, n_files);
1064
1065 if (kqwq) {
1066 kqworkq_dealloc(kqwq);
1067 }
1068 if (vn1) {
1069 vnode_rele(vn1);
1070 }
1071 if (vn2) {
1072 vnode_rele(vn2);
1073 }
1074 if (kqhash) {
1075 for (uint32_t i = 0; i <= kqhashmask; i++) {
1076 assert(LIST_EMPTY(&kqhash[i]));
1077 }
1078 hashdestroy(kqhash, M_KQUEUE, kqhashmask);
1079 }
1080 }
1081
1082
1083 struct fileproc *
1084 fileproc_alloc_init(void)
1085 {
1086 struct fileproc *fp;
1087
1088 fp = zalloc_id(ZONE_ID_FILEPROC, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1089 os_ref_init(&fp->fp_iocount, &f_refgrp);
1090 return fp;
1091 }
1092
1093
1094 void
1095 fileproc_free(struct fileproc *fp)
1096 {
1097 os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
1098 #if DEVELOPMENT || DEBUG
1099 if (0 != refc) {
1100 panic("%s: pid %d refc: %u != 0",
1101 __func__, proc_pid(current_proc()), refc);
1102 }
1103 #endif
1104 if (fp->fp_guard_attrs) {
1105 guarded_fileproc_unguard(fp);
1106 }
1107 assert(fp->fp_wset == NULL);
1108 zfree_id(ZONE_ID_FILEPROC, fp);
1109 }
1110
1111
1112 /*
1113 * Statistics counter for the number of times a process calling fdalloc()
1114 * has resulted in an expansion of the per process open file table.
1115 *
1116 * XXX This would likely be of more use if it were per process
1117 */
1118 int fdexpand;
1119
1120 #if CONFIG_PROC_RESOURCE_LIMITS
1121 /*
1122 * Should be called only with the proc_fdlock held.
1123 */
1124 void
1125 fd_check_limit_exceeded(struct filedesc *fdp)
1126 {
1127 #if DIAGNOSTIC
1128 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1129 #endif
1130 if (!fd_above_soft_limit_notify(fdp) && fdp->fd_nfiles_soft_limit &&
1131 (fdp->fd_nfiles_open > fdp->fd_nfiles_soft_limit)) {
1132 fd_above_soft_limit_send_notification(fdp);
1133 act_set_astproc_resource(current_thread());
1134 } else if (!fd_above_hard_limit_notify(fdp) && fdp->fd_nfiles_hard_limit &&
1135 (fdp->fd_nfiles_open > fdp->fd_nfiles_hard_limit)) {
1136 fd_above_hard_limit_send_notification(fdp);
1137 act_set_astproc_resource(current_thread());
1138 }
1139 }
1140 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1141
1142 /*
1143 * fdalloc
1144 *
1145 * Description: Allocate a file descriptor for the process.
1146 *
1147 * Parameters: p Process to allocate the fd in
1148 * want The fd we would prefer to get
1149 * result Pointer to fd we got
1150 *
1151 * Returns: 0 Success
1152 * EMFILE
1153 * ENOMEM
1154 *
1155 * Implicit returns:
1156 * *result (modified) The fd which was allocated
1157 */
1158 int
1159 fdalloc(proc_t p, int want, int *result)
1160 {
1161 struct filedesc *fdp = &p->p_fd;
1162 int i;
1163 int last, numfiles, oldnfiles;
1164 struct fileproc **newofiles, **ofiles;
1165 char *newofileflags;
1166 int lim = proc_limitgetcur_nofile(p);
1167
1168 /*
1169 * Search for a free descriptor starting at the higher
1170 * of want or fd_freefile. If that fails, consider
1171 * expanding the ofile array.
1172 */
1173 #if DIAGNOSTIC
1174 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1175 #endif
1176
1177 for (;;) {
1178 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
1179 if ((i = want) < fdp->fd_freefile) {
1180 i = fdp->fd_freefile;
1181 }
1182 for (; i < last; i++) {
1183 if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
1184 procfdtbl_reservefd(p, i);
1185 if (i >= fdp->fd_afterlast) {
1186 fdp->fd_afterlast = i + 1;
1187 }
1188 if (want <= fdp->fd_freefile) {
1189 fdp->fd_freefile = i;
1190 }
1191 *result = i;
1192 #if CONFIG_PROC_RESOURCE_LIMITS
1193 fdp->fd_nfiles_open++;
1194 fd_check_limit_exceeded(fdp);
1195 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
1196 return 0;
1197 }
1198 }
1199
1200 /*
1201 * No space in current array. Expand?
1202 */
1203 if ((rlim_t)fdp->fd_nfiles >= lim) {
1204 return EMFILE;
1205 }
1206 if (fdp->fd_nfiles < NDEXTENT) {
1207 numfiles = NDEXTENT;
1208 } else {
1209 numfiles = 2 * fdp->fd_nfiles;
1210 }
1211 /* Enforce lim */
1212 if ((rlim_t)numfiles > lim) {
1213 numfiles = (int)lim;
1214 }
1215 proc_fdunlock(p);
1216 newofiles = fd_alloc_files(numfiles, Z_WAITOK);
1217 proc_fdlock(p);
1218 if (newofiles == NULL) {
1219 return ENOMEM;
1220 }
1221 if (fdp->fd_nfiles >= numfiles) {
1222 fd_free_files(newofiles, numfiles);
1223 continue;
1224 }
1225 newofileflags = (char *) &newofiles[numfiles];
1226 /*
1227 * Copy the existing ofile and ofileflags arrays
1228 * and zero the new portion of each array.
1229 */
1230 oldnfiles = fdp->fd_nfiles;
1231 (void) memcpy(newofiles, fdp->fd_ofiles,
1232 oldnfiles * sizeof(*fdp->fd_ofiles));
1233 (void) memset(&newofiles[oldnfiles], 0,
1234 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
1235
1236 (void) memcpy(newofileflags, fdp->fd_ofileflags,
1237 oldnfiles * sizeof(*fdp->fd_ofileflags));
1238 (void) memset(&newofileflags[oldnfiles], 0,
1239 (numfiles - oldnfiles) *
1240 sizeof(*fdp->fd_ofileflags));
1241 ofiles = fdp->fd_ofiles;
1242 fdp->fd_ofiles = newofiles;
1243 fdp->fd_ofileflags = newofileflags;
1244 fdp->fd_nfiles = numfiles;
1245 fd_free_files(ofiles, oldnfiles);
1246 fdexpand++;
1247 }
1248 }
1249
1250
1251 #pragma mark fileprocs
1252
1253 void
1254 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
1255 {
1256 if (clearflags) {
1257 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
1258 } else {
1259 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
1260 }
1261 }
1262
1263 fileproc_vflags_t
1264 fileproc_get_vflags(struct fileproc *fp)
1265 {
1266 return os_atomic_load(&fp->fp_vflags, relaxed);
1267 }
1268
1269 /*
1270 * falloc_withinit
1271 *
1272 * Create a new open file structure and allocate
1273 * a file descriptor for the process that refers to it.
1274 *
1275 * Returns: 0 Success
1276 *
1277 * Description: Allocate an entry in the per process open file table and
1278 * return the corresponding fileproc and fd.
1279 *
1280 * Parameters: p The process in whose open file
1281 * table the fd is to be allocated
1282 * resultfp Pointer to fileproc pointer
1283 * return area
1284 * resultfd Pointer to fd return area
1285 * ctx VFS context
1286 * fp_zalloc fileproc allocator to use
1287 * crarg allocator args
1288 *
1289 * Returns: 0 Success
1290 * ENFILE Too many open files in system
1291 * fdalloc:EMFILE Too many open files in process
1292 * fdalloc:ENOMEM M_OFILETABL zone exhausted
1293 * ENOMEM fp_zone or fg_zone zone
1294 * exhausted
1295 *
1296 * Implicit returns:
1297 * *resultfd (modified) Returned fileproc pointer
1298 * *resultfd (modified) Returned fd
1299 *
1300 * Notes: This function takes separate process and context arguments
1301 * solely to support kern_exec.c; otherwise, it would take
1302 * neither, and use the vfs_context_current() routine internally.
1303 */
1304 int
1305 falloc_withinit(proc_t p, struct fileproc **resultfp, int *resultfd,
1306 vfs_context_t ctx, fp_initfn_t fp_init, void *initarg)
1307 {
1308 struct fileproc *fp;
1309 struct fileglob *fg;
1310 int error, nfd;
1311 #if CONFIG_MACF
1312 kauth_cred_t cred;
1313 #endif
1314
1315 /* Make sure we don't go beyond the system-wide limit */
1316 if (nfiles >= maxfiles) {
1317 tablefull("file");
1318 return ENFILE;
1319 }
1320
1321 proc_fdlock(p);
1322
1323 /* fdalloc will make sure the process stays below per-process limit */
1324 if ((error = fdalloc(p, 0, &nfd))) {
1325 proc_fdunlock(p);
1326 return error;
1327 }
1328
1329 #if CONFIG_MACF
1330 cred = kauth_cred_proc_ref(p);
1331 error = mac_file_check_create(cred);
1332 kauth_cred_unref(&cred);
1333 if (error) {
1334 proc_fdunlock(p);
1335 return error;
1336 }
1337 #endif
1338
1339 /*
1340 * Allocate a new file descriptor.
1341 * If the process has file descriptor zero open, add to the list
1342 * of open files at that point, otherwise put it at the front of
1343 * the list of open files.
1344 */
1345 proc_fdunlock(p);
1346
1347 fp = fileproc_alloc_init();
1348 if (fp_init) {
1349 fp_init(fp, initarg);
1350 }
1351
1352 fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
1353 lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
1354
1355 os_ref_retain_locked(&fp->fp_iocount);
1356 os_ref_init_raw(&fg->fg_count, &f_refgrp);
1357 fg->fg_ops = &uninitops;
1358 fp->fp_glob = fg;
1359
1360 kauth_cred_ref(ctx->vc_ucred);
1361
1362 fp->f_cred = ctx->vc_ucred;
1363
1364 os_atomic_inc(&nfiles, relaxed);
1365
1366 proc_fdlock(p);
1367
1368 p->p_fd.fd_ofiles[nfd] = fp;
1369
1370 proc_fdunlock(p);
1371
1372 if (resultfp) {
1373 *resultfp = fp;
1374 }
1375 if (resultfd) {
1376 *resultfd = nfd;
1377 }
1378
1379 return 0;
1380 }
1381
1382 int
1383 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
1384 {
1385 return falloc_withinit(p, resultfp, resultfd, ctx, NULL, NULL);
1386 }
1387
1388
1389 /*
1390 * fp_free
1391 *
1392 * Description: Release the fd and free the fileproc associated with the fd
1393 * in the per process open file table of the specified process;
1394 * these values must correspond.
1395 *
1396 * Parameters: p Process containing fd
1397 * fd fd to be released
1398 * fp fileproc to be freed
1399 */
1400 void
1401 fp_free(proc_t p, int fd, struct fileproc * fp)
1402 {
1403 proc_fdlock_spin(p);
1404 fdrelse(p, fd);
1405 proc_fdunlock(p);
1406
1407 fg_free(fp->fp_glob);
1408 os_ref_release_live(&fp->fp_iocount);
1409 fileproc_free(fp);
1410 }
1411
1412
1413 struct fileproc *
1414 fp_get_noref_locked(proc_t p, int fd)
1415 {
1416 struct filedesc *fdp = &p->p_fd;
1417 struct fileproc *fp;
1418
1419 if (fd < 0 || fd >= fdp->fd_nfiles ||
1420 (fp = fdp->fd_ofiles[fd]) == NULL ||
1421 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1422 return NULL;
1423 }
1424
1425 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1426 return fp;
1427 }
1428
1429 struct fileproc *
1430 fp_get_noref_locked_with_iocount(proc_t p, int fd)
1431 {
1432 struct filedesc *fdp = &p->p_fd;
1433 struct fileproc *fp = NULL;
1434
1435 if (fd < 0 || fd >= fdp->fd_nfiles ||
1436 (fp = fdp->fd_ofiles[fd]) == NULL ||
1437 os_ref_get_count(&fp->fp_iocount) <= 1 ||
1438 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1439 !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
1440 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
1441 __func__, fd, fp);
1442 }
1443
1444 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1445 return fp;
1446 }
1447
1448
1449 /*
1450 * fp_lookup
1451 *
1452 * Description: Get fileproc pointer for a given fd from the per process
1453 * open file table of the specified process and if successful,
1454 * increment the fp_iocount
1455 *
1456 * Parameters: p Process in which fd lives
1457 * fd fd to get information for
1458 * resultfp Pointer to result fileproc
1459 * pointer area, or 0 if none
1460 * locked !0 if the caller holds the
1461 * proc_fdlock, 0 otherwise
1462 *
1463 * Returns: 0 Success
1464 * EBADF Bad file descriptor
1465 *
1466 * Implicit returns:
1467 * *resultfp (modified) Fileproc pointer
1468 *
1469 * Locks: If the argument 'locked' is non-zero, then the caller is
1470 * expected to have taken and held the proc_fdlock; if it is
1471 * zero, than this routine internally takes and drops this lock.
1472 */
1473 int
1474 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
1475 {
1476 struct filedesc *fdp = &p->p_fd;
1477 struct fileproc *fp;
1478
1479 if (!locked) {
1480 proc_fdlock_spin(p);
1481 }
1482 if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
1483 (fp = fdp->fd_ofiles[fd]) == NULL ||
1484 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1485 if (!locked) {
1486 proc_fdunlock(p);
1487 }
1488 return EBADF;
1489 }
1490
1491 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1492 os_ref_retain_locked(&fp->fp_iocount);
1493
1494 if (resultfp) {
1495 *resultfp = fp;
1496 }
1497 if (!locked) {
1498 proc_fdunlock(p);
1499 }
1500
1501 return 0;
1502 }
1503
1504
1505 int
1506 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
1507 {
1508 struct filedesc *fdp = &p->p_fd;
1509 struct fileproc *fp;
1510
1511 proc_fdlock_spin(p);
1512 if (fd < 0 || fd >= fdp->fd_nfiles ||
1513 (fp = fdp->fd_ofiles[fd]) == NULL ||
1514 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1515 proc_fdunlock(p);
1516 return EBADF;
1517 }
1518
1519 if (fp->f_type != ftype) {
1520 proc_fdunlock(p);
1521 return err;
1522 }
1523
1524 zone_id_require(ZONE_ID_FILEPROC, sizeof(*fp), fp);
1525 os_ref_retain_locked(&fp->fp_iocount);
1526 proc_fdunlock(p);
1527
1528 *fpp = fp;
1529 return 0;
1530 }
1531
1532
1533 /*
1534 * fp_drop
1535 *
1536 * Description: Drop the I/O reference previously taken by calling fp_lookup
1537 * et. al.
1538 *
1539 * Parameters: p Process in which the fd lives
1540 * fd fd associated with the fileproc
1541 * fp fileproc on which to set the
1542 * flag and drop the reference
1543 * locked flag to internally take and
1544 * drop proc_fdlock if it is not
1545 * already held by the caller
1546 *
1547 * Returns: 0 Success
1548 * EBADF Bad file descriptor
1549 *
1550 * Locks: This function internally takes and drops the proc_fdlock for
1551 * the supplied process if 'locked' is non-zero, and assumes that
1552 * the caller already holds this lock if 'locked' is non-zero.
1553 *
1554 * Notes: The fileproc must correspond to the fd in the supplied proc
1555 */
1556 int
1557 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
1558 {
1559 struct filedesc *fdp = &p->p_fd;
1560 int needwakeup = 0;
1561
1562 if (!locked) {
1563 proc_fdlock_spin(p);
1564 }
1565 if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
1566 (fp = fdp->fd_ofiles[fd]) == NULL ||
1567 ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
1568 !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
1569 if (!locked) {
1570 proc_fdunlock(p);
1571 }
1572 return EBADF;
1573 }
1574
1575 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
1576 if (fp->fp_flags & FP_SELCONFLICT) {
1577 fp->fp_flags &= ~FP_SELCONFLICT;
1578 }
1579
1580 if (fdp->fd_fpdrainwait) {
1581 fdp->fd_fpdrainwait = 0;
1582 needwakeup = 1;
1583 }
1584 }
1585 if (!locked) {
1586 proc_fdunlock(p);
1587 }
1588 if (needwakeup) {
1589 wakeup(&fdp->fd_fpdrainwait);
1590 }
1591
1592 return 0;
1593 }
1594
1595
1596 /*
1597 * fileproc_drain
1598 *
1599 * Description: Drain out pending I/O operations
1600 *
1601 * Parameters: p Process closing this file
1602 * fp fileproc struct for the open
1603 * instance on the file
1604 *
1605 * Returns: void
1606 *
1607 * Locks: Assumes the caller holds the proc_fdlock
1608 *
1609 * Notes: For character devices, this occurs on the last close of the
1610 * device; for all other file descriptors, this occurs on each
1611 * close to prevent fd's from being closed out from under
1612 * operations currently in progress and blocked
1613 *
1614 * See Also: file_vnode(), file_socket(), file_drop(), and the cautions
1615 * regarding their use and interaction with this function.
1616 */
1617 static void
1618 fileproc_drain(proc_t p, struct fileproc * fp)
1619 {
1620 struct filedesc *fdp = &p->p_fd;
1621 struct vfs_context context;
1622 thread_t thread;
1623 bool is_current_proc;
1624
1625 is_current_proc = (p == current_proc());
1626
1627 if (!is_current_proc) {
1628 proc_lock(p);
1629 thread = proc_thread(p); /* XXX */
1630 thread_reference(thread);
1631 proc_unlock(p);
1632 } else {
1633 thread = current_thread();
1634 }
1635
1636 context.vc_thread = thread;
1637 context.vc_ucred = fp->fp_glob->fg_cred;
1638
1639 /* Set the vflag for drain */
1640 fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
1641
1642 while (os_ref_get_count(&fp->fp_iocount) > 1) {
1643 lck_mtx_convert_spin(&fdp->fd_lock);
1644
1645 fo_drain(fp, &context);
1646 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
1647 struct select_set *selset;
1648
1649 if (fp->fp_guard_attrs) {
1650 selset = fp->fp_guard->fpg_wset;
1651 } else {
1652 selset = fp->fp_wset;
1653 }
1654 if (waitq_wakeup64_all(selset, NO_EVENT64,
1655 THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1656 panic("bad wait queue for waitq_wakeup64_all %p (%sfp:%p)",
1657 selset, fp->fp_guard_attrs ? "guarded " : "", fp);
1658 }
1659 }
1660 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1661 if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
1662 THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT) == KERN_INVALID_ARGUMENT) {
1663 panic("bad select_conflict_queue");
1664 }
1665 }
1666 fdp->fd_fpdrainwait = 1;
1667 msleep(&fdp->fd_fpdrainwait, &fdp->fd_lock, PRIBIO, "fpdrain", NULL);
1668 }
1669 #if DIAGNOSTIC
1670 if ((fp->fp_flags & FP_INSELECT) != 0) {
1671 panic("FP_INSELECT set on drained fp");
1672 }
1673 #endif
1674 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1675 fp->fp_flags &= ~FP_SELCONFLICT;
1676 }
1677
1678 if (!is_current_proc) {
1679 thread_deallocate(thread);
1680 }
1681 }
1682
1683
1684 int
1685 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
1686 {
1687 struct filedesc *fdp = &p->p_fd;
1688 struct fileglob *fg = fp->fp_glob;
1689 #if CONFIG_MACF
1690 kauth_cred_t cred;
1691 #endif
1692
1693 #if DIAGNOSTIC
1694 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
1695 #endif
1696
1697 /*
1698 * Keep most people from finding the filedesc while we are closing it.
1699 *
1700 * Callers are:
1701 *
1702 * - dup2() which always waits for UF_RESERVED to clear
1703 *
1704 * - close/guarded_close/... who will fail the fileproc lookup if
1705 * UF_RESERVED is set,
1706 *
1707 * - fdexec()/fdfree() who only run once all threads in the proc
1708 * are properly canceled, hence no fileproc in this proc should
1709 * be in flux.
1710 *
1711 * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
1712 *
1713 * Callers of fp_get_noref_locked_with_iocount() can still find
1714 * this entry so that they can drop their I/O reference despite
1715 * not having remembered the fileproc pointer (namely select() and
1716 * file_drop()).
1717 */
1718 if (p->p_fd.fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
1719 panic("%s: called with fileproc in flux (%d/:%p)",
1720 __func__, fd, fp);
1721 }
1722 p->p_fd.fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
1723
1724 if ((fp->fp_flags & FP_AIOISSUED) ||
1725 #if CONFIG_MACF
1726 (FILEGLOB_DTYPE(fg) == DTYPE_VNODE)
1727 #else
1728 kauth_authorize_fileop_has_listeners()
1729 #endif
1730 ) {
1731 proc_fdunlock(p);
1732
1733 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1734 /*
1735 * call out to allow 3rd party notification of close.
1736 * Ignore result of kauth_authorize_fileop call.
1737 */
1738 #if CONFIG_MACF
1739 cred = kauth_cred_proc_ref(p);
1740 mac_file_notify_close(cred, fp->fp_glob);
1741 kauth_cred_unref(&cred);
1742 #endif
1743
1744 if (kauth_authorize_fileop_has_listeners() &&
1745 vnode_getwithref((vnode_t)fg_get_data(fg)) == 0) {
1746 u_int fileop_flags = 0;
1747 if (fg->fg_flag & FWASWRITTEN) {
1748 fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
1749 }
1750 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
1751 (uintptr_t)fg_get_data(fg), (uintptr_t)fileop_flags);
1752
1753 vnode_put((vnode_t)fg_get_data(fg));
1754 }
1755 }
1756
1757 if (fp->fp_flags & FP_AIOISSUED) {
1758 /*
1759 * cancel all async IO requests that can be cancelled.
1760 */
1761 _aio_close( p, fd );
1762 }
1763
1764 proc_fdlock(p);
1765 }
1766
1767 if (fd < fdp->fd_knlistsize) {
1768 knote_fdclose(p, fd);
1769 }
1770
1771 fileproc_drain(p, fp);
1772
1773 if (flags & FD_DUP2RESV) {
1774 fdp->fd_ofiles[fd] = NULL;
1775 fdp->fd_ofileflags[fd] &= ~UF_CLOSING;
1776 } else {
1777 fdrelse(p, fd);
1778 }
1779
1780 proc_fdunlock(p);
1781
1782 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
1783 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
1784 fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg_get_data(fg)));
1785 }
1786
1787 fileproc_free(fp);
1788
1789 return fg_drop(p, fg);
1790 }
1791
1792 /*
1793 * dupfdopen
1794 *
1795 * Description: Duplicate the specified descriptor to a free descriptor;
1796 * this is the second half of fdopen(), above.
1797 *
1798 * Parameters: p current process pointer
1799 * indx fd to dup to
1800 * dfd fd to dup from
1801 * mode mode to set on new fd
1802 * error command code
1803 *
1804 * Returns: 0 Success
1805 * EBADF Source fd is bad
1806 * EACCES Requested mode not allowed
1807 * !0 'error', if not ENODEV or
1808 * ENXIO
1809 *
1810 * Notes: XXX This is not thread safe; see fdopen() above
1811 */
1812 int
1813 dupfdopen(proc_t p, int indx, int dfd, int flags, int error)
1814 {
1815 struct filedesc *fdp = &p->p_fd;
1816 struct fileproc *wfp;
1817 struct fileproc *fp;
1818 #if CONFIG_MACF
1819 int myerror;
1820 #endif
1821
1822 /*
1823 * If the to-be-dup'd fd number is greater than the allowed number
1824 * of file descriptors, or the fd to be dup'd has already been
1825 * closed, reject. Note, check for new == old is necessary as
1826 * falloc could allocate an already closed to-be-dup'd descriptor
1827 * as the new descriptor.
1828 */
1829 proc_fdlock(p);
1830
1831 fp = fdp->fd_ofiles[indx];
1832 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1833 (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
1834 (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
1835 proc_fdunlock(p);
1836 return EBADF;
1837 }
1838 #if CONFIG_MACF
1839 myerror = mac_file_check_dup(kauth_cred_get(), wfp->fp_glob, dfd);
1840 if (myerror) {
1841 proc_fdunlock(p);
1842 return myerror;
1843 }
1844 #endif
1845 /*
1846 * There are two cases of interest here.
1847 *
1848 * For ENODEV simply dup (dfd) to file descriptor
1849 * (indx) and return.
1850 *
1851 * For ENXIO steal away the file structure from (dfd) and
1852 * store it in (indx). (dfd) is effectively closed by
1853 * this operation.
1854 *
1855 * Any other error code is just returned.
1856 */
1857 switch (error) {
1858 case ENODEV:
1859 if (fp_isguarded(wfp, GUARD_DUP)) {
1860 proc_fdunlock(p);
1861 return EPERM;
1862 }
1863
1864 /*
1865 * Check that the mode the file is being opened for is a
1866 * subset of the mode of the existing descriptor.
1867 */
1868 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1869 proc_fdunlock(p);
1870 return EACCES;
1871 }
1872 if (indx >= fdp->fd_afterlast) {
1873 fdp->fd_afterlast = indx + 1;
1874 }
1875
1876 if (fp->fp_glob) {
1877 fg_free(fp->fp_glob);
1878 }
1879 fg_ref(p, wfp->fp_glob);
1880 fp->fp_glob = wfp->fp_glob;
1881 /*
1882 * Historically, open(/dev/fd/<n>) preserves close on fork/exec,
1883 * unlike dup(), dup2() or fcntl(F_DUPFD).
1884 *
1885 * open1() already handled O_CLO{EXEC,FORK}
1886 */
1887 fp->fp_flags |= (wfp->fp_flags & (FP_CLOFORK | FP_CLOEXEC));
1888
1889 procfdtbl_releasefd(p, indx, NULL);
1890 fp_drop(p, indx, fp, 1);
1891 proc_fdunlock(p);
1892 return 0;
1893
1894 default:
1895 proc_fdunlock(p);
1896 return error;
1897 }
1898 /* NOTREACHED */
1899 }
1900
1901
1902 #pragma mark KPIS (sys/file.h)
1903
1904 /*
1905 * fg_get_vnode
1906 *
1907 * Description: Return vnode associated with the file structure, if
1908 * any. The lifetime of the returned vnode is bound to
1909 * the lifetime of the file structure.
1910 *
1911 * Parameters: fg Pointer to fileglob to
1912 * inspect
1913 *
1914 * Returns: vnode_t
1915 */
1916 vnode_t
1917 fg_get_vnode(struct fileglob *fg)
1918 {
1919 if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
1920 return (vnode_t)fg_get_data(fg);
1921 } else {
1922 return NULL;
1923 }
1924 }
1925
1926
1927 /*
1928 * fp_getfvp
1929 *
1930 * Description: Get fileproc and vnode pointer for a given fd from the per
1931 * process open file table of the specified process, and if
1932 * successful, increment the fp_iocount
1933 *
1934 * Parameters: p Process in which fd lives
1935 * fd fd to get information for
1936 * resultfp Pointer to result fileproc
1937 * pointer area, or 0 if none
1938 * resultvp Pointer to result vnode pointer
1939 * area, or 0 if none
1940 *
1941 * Returns: 0 Success
1942 * EBADF Bad file descriptor
1943 * ENOTSUP fd does not refer to a vnode
1944 *
1945 * Implicit returns:
1946 * *resultfp (modified) Fileproc pointer
1947 * *resultvp (modified) vnode pointer
1948 *
1949 * Notes: The resultfp and resultvp fields are optional, and may be
1950 * independently specified as NULL to skip returning information
1951 *
1952 * Locks: Internally takes and releases proc_fdlock
1953 */
1954 int
1955 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
1956 {
1957 struct fileproc *fp;
1958 int error;
1959
1960 error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
1961 if (error == 0) {
1962 if (resultfp) {
1963 *resultfp = fp;
1964 }
1965 if (resultvp) {
1966 *resultvp = (struct vnode *)fp_get_data(fp);
1967 }
1968 }
1969
1970 return error;
1971 }
1972
1973
1974 /*
1975 * fp_get_pipe_id
1976 *
1977 * Description: Get pipe id for a given fd from the per process open file table
1978 * of the specified process.
1979 *
1980 * Parameters: p Process in which fd lives
1981 * fd fd to get information for
1982 * result_pipe_id Pointer to result pipe id
1983 *
1984 * Returns: 0 Success
1985 * EIVAL NULL pointer arguments passed
1986 * fp_lookup:EBADF Bad file descriptor
1987 * ENOTSUP fd does not refer to a pipe
1988 *
1989 * Implicit returns:
1990 * *result_pipe_id (modified) pipe id
1991 *
1992 * Locks: Internally takes and releases proc_fdlock
1993 */
1994 int
1995 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
1996 {
1997 struct fileproc *fp = FILEPROC_NULL;
1998 struct fileglob *fg = NULL;
1999 int error = 0;
2000
2001 if (p == NULL || result_pipe_id == NULL) {
2002 return EINVAL;
2003 }
2004
2005 proc_fdlock(p);
2006 if ((error = fp_lookup(p, fd, &fp, 1))) {
2007 proc_fdunlock(p);
2008 return error;
2009 }
2010 fg = fp->fp_glob;
2011
2012 if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
2013 *result_pipe_id = pipe_id((struct pipe*)fg_get_data(fg));
2014 } else {
2015 error = ENOTSUP;
2016 }
2017
2018 fp_drop(p, fd, fp, 1);
2019 proc_fdunlock(p);
2020 return error;
2021 }
2022
2023
2024 /*
2025 * file_vnode
2026 *
2027 * Description: Given an fd, look it up in the current process's per process
2028 * open file table, and return its internal vnode pointer.
2029 *
2030 * Parameters: fd fd to obtain vnode from
2031 * vpp pointer to vnode return area
2032 *
2033 * Returns: 0 Success
2034 * EINVAL The fd does not refer to a
2035 * vnode fileproc entry
2036 * fp_lookup:EBADF Bad file descriptor
2037 *
2038 * Implicit returns:
2039 * *vpp (modified) Returned vnode pointer
2040 *
2041 * Locks: This function internally takes and drops the proc_fdlock for
2042 * the current process
2043 *
2044 * Notes: If successful, this function increments the fp_iocount on the
2045 * fd's corresponding fileproc.
2046 *
2047 * The fileproc referenced is not returned; because of this, care
2048 * must be taken to not drop the last reference (e.g. by closing
2049 * the file). This is inherently unsafe, since the reference may
2050 * not be recoverable from the vnode, if there is a subsequent
2051 * close that destroys the associate fileproc. The caller should
2052 * therefore retain their own reference on the fileproc so that
2053 * the fp_iocount can be dropped subsequently. Failure to do this
2054 * can result in the returned pointer immediately becoming invalid
2055 * following the call.
2056 *
2057 * Use of this function is discouraged.
2058 */
2059 int
2060 file_vnode(int fd, struct vnode **vpp)
2061 {
2062 return file_vnode_withvid(fd, vpp, NULL);
2063 }
2064
2065
2066 /*
2067 * file_vnode_withvid
2068 *
2069 * Description: Given an fd, look it up in the current process's per process
2070 * open file table, and return its internal vnode pointer.
2071 *
2072 * Parameters: fd fd to obtain vnode from
2073 * vpp pointer to vnode return area
2074 * vidp pointer to vid of the returned vnode
2075 *
2076 * Returns: 0 Success
2077 * EINVAL The fd does not refer to a
2078 * vnode fileproc entry
2079 * fp_lookup:EBADF Bad file descriptor
2080 *
2081 * Implicit returns:
2082 * *vpp (modified) Returned vnode pointer
2083 *
2084 * Locks: This function internally takes and drops the proc_fdlock for
2085 * the current process
2086 *
2087 * Notes: If successful, this function increments the fp_iocount on the
2088 * fd's corresponding fileproc.
2089 *
2090 * The fileproc referenced is not returned; because of this, care
2091 * must be taken to not drop the last reference (e.g. by closing
2092 * the file). This is inherently unsafe, since the reference may
2093 * not be recoverable from the vnode, if there is a subsequent
2094 * close that destroys the associate fileproc. The caller should
2095 * therefore retain their own reference on the fileproc so that
2096 * the fp_iocount can be dropped subsequently. Failure to do this
2097 * can result in the returned pointer immediately becoming invalid
2098 * following the call.
2099 *
2100 * Use of this function is discouraged.
2101 */
2102 int
2103 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
2104 {
2105 struct fileproc *fp;
2106 int error;
2107
2108 error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
2109 if (error == 0) {
2110 if (vpp) {
2111 *vpp = (struct vnode *)fp_get_data(fp);
2112 }
2113 if (vidp) {
2114 *vidp = vnode_vid((struct vnode *)fp_get_data(fp));
2115 }
2116 }
2117 return error;
2118 }
2119
2120 /*
2121 * file_socket
2122 *
2123 * Description: Given an fd, look it up in the current process's per process
2124 * open file table, and return its internal socket pointer.
2125 *
2126 * Parameters: fd fd to obtain vnode from
2127 * sp pointer to socket return area
2128 *
2129 * Returns: 0 Success
2130 * ENOTSOCK Not a socket
2131 * fp_lookup:EBADF Bad file descriptor
2132 *
2133 * Implicit returns:
2134 * *sp (modified) Returned socket pointer
2135 *
2136 * Locks: This function internally takes and drops the proc_fdlock for
2137 * the current process
2138 *
2139 * Notes: If successful, this function increments the fp_iocount on the
2140 * fd's corresponding fileproc.
2141 *
2142 * The fileproc referenced is not returned; because of this, care
2143 * must be taken to not drop the last reference (e.g. by closing
2144 * the file). This is inherently unsafe, since the reference may
2145 * not be recoverable from the socket, if there is a subsequent
2146 * close that destroys the associate fileproc. The caller should
2147 * therefore retain their own reference on the fileproc so that
2148 * the fp_iocount can be dropped subsequently. Failure to do this
2149 * can result in the returned pointer immediately becoming invalid
2150 * following the call.
2151 *
2152 * Use of this function is discouraged.
2153 */
2154 int
2155 file_socket(int fd, struct socket **sp)
2156 {
2157 struct fileproc *fp;
2158 int error;
2159
2160 error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
2161 if (error == 0) {
2162 if (sp) {
2163 *sp = (struct socket *)fp_get_data(fp);
2164 }
2165 }
2166 return error;
2167 }
2168
2169
2170 /*
2171 * file_flags
2172 *
2173 * Description: Given an fd, look it up in the current process's per process
2174 * open file table, and return its fileproc's flags field.
2175 *
2176 * Parameters: fd fd whose flags are to be
2177 * retrieved
2178 * flags pointer to flags data area
2179 *
2180 * Returns: 0 Success
2181 * ENOTSOCK Not a socket
2182 * fp_lookup:EBADF Bad file descriptor
2183 *
2184 * Implicit returns:
2185 * *flags (modified) Returned flags field
2186 *
2187 * Locks: This function internally takes and drops the proc_fdlock for
2188 * the current process
2189 */
2190 int
2191 file_flags(int fd, int *flags)
2192 {
2193 proc_t p = current_proc();
2194 struct fileproc *fp;
2195 int error = EBADF;
2196
2197 proc_fdlock_spin(p);
2198 fp = fp_get_noref_locked(p, fd);
2199 if (fp) {
2200 *flags = (int)fp->f_flag;
2201 error = 0;
2202 }
2203 proc_fdunlock(p);
2204
2205 return error;
2206 }
2207
2208
2209 /*
2210 * file_drop
2211 *
2212 * Description: Drop an iocount reference on an fd, and wake up any waiters
2213 * for draining (i.e. blocked in fileproc_drain() called during
2214 * the last attempt to close a file).
2215 *
2216 * Parameters: fd fd on which an ioreference is
2217 * to be dropped
2218 *
2219 * Returns: 0 Success
2220 *
2221 * Description: Given an fd, look it up in the current process's per process
2222 * open file table, and drop it's fileproc's fp_iocount by one
2223 *
2224 * Notes: This is intended as a corresponding operation to the functions
2225 * file_vnode() and file_socket() operations.
2226 *
2227 * If the caller can't possibly hold an I/O reference,
2228 * this function will panic the kernel rather than allowing
2229 * for memory corruption. Callers should always call this
2230 * because they acquired an I/O reference on this file before.
2231 *
2232 * Use of this function is discouraged.
2233 */
2234 int
2235 file_drop(int fd)
2236 {
2237 struct fileproc *fp;
2238 proc_t p = current_proc();
2239 struct filedesc *fdp = &p->p_fd;
2240 int needwakeup = 0;
2241
2242 proc_fdlock_spin(p);
2243 fp = fp_get_noref_locked_with_iocount(p, fd);
2244
2245 if (1 == os_ref_release_locked(&fp->fp_iocount)) {
2246 if (fp->fp_flags & FP_SELCONFLICT) {
2247 fp->fp_flags &= ~FP_SELCONFLICT;
2248 }
2249
2250 if (fdp->fd_fpdrainwait) {
2251 fdp->fd_fpdrainwait = 0;
2252 needwakeup = 1;
2253 }
2254 }
2255 proc_fdunlock(p);
2256
2257 if (needwakeup) {
2258 wakeup(&fdp->fd_fpdrainwait);
2259 }
2260 return 0;
2261 }
2262
2263
2264 #pragma mark syscalls
2265
2266 #ifndef HFS_GET_BOOT_INFO
2267 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
2268 #endif
2269
2270 #ifndef HFS_SET_BOOT_INFO
2271 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
2272 #endif
2273
2274 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
2275 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
2276 #endif
2277
2278 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
2279 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
2280 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
2281 ? 1 : 0)
2282
2283 /*
2284 * sys_getdtablesize
2285 *
2286 * Description: Returns the per process maximum size of the descriptor table
2287 *
2288 * Parameters: p Process being queried
2289 * retval Pointer to the call return area
2290 *
2291 * Returns: 0 Success
2292 *
2293 * Implicit returns:
2294 * *retval (modified) Size of dtable
2295 */
2296 int
2297 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
2298 {
2299 *retval = proc_limitgetcur_nofile(p);
2300 return 0;
2301 }
2302
2303
2304 /*
2305 * check_file_seek_range
2306 *
2307 * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
2308 *
2309 * Parameters: fl Flock structure.
2310 * cur_file_offset Current offset in the file.
2311 *
2312 * Returns: 0 on Success.
2313 * EOVERFLOW on overflow.
2314 * EINVAL on offset less than zero.
2315 */
2316
2317 static int
2318 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
2319 {
2320 if (fl->l_whence == SEEK_CUR) {
2321 /* Check if the start marker is beyond LLONG_MAX. */
2322 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
2323 /* Check if start marker is negative */
2324 if (fl->l_start < 0) {
2325 return EINVAL;
2326 }
2327 return EOVERFLOW;
2328 }
2329 /* Check if the start marker is negative. */
2330 if (fl->l_start + cur_file_offset < 0) {
2331 return EINVAL;
2332 }
2333 /* Check if end marker is beyond LLONG_MAX. */
2334 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
2335 cur_file_offset, fl->l_len - 1))) {
2336 return EOVERFLOW;
2337 }
2338 /* Check if the end marker is negative. */
2339 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
2340 fl->l_len < 0)) {
2341 return EINVAL;
2342 }
2343 } else if (fl->l_whence == SEEK_SET) {
2344 /* Check if the start marker is negative. */
2345 if (fl->l_start < 0) {
2346 return EINVAL;
2347 }
2348 /* Check if the end marker is beyond LLONG_MAX. */
2349 if ((fl->l_len > 0) &&
2350 CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
2351 return EOVERFLOW;
2352 }
2353 /* Check if the end marker is negative. */
2354 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
2355 return EINVAL;
2356 }
2357 }
2358 return 0;
2359 }
2360
2361
2362 /*
2363 * sys_dup
2364 *
2365 * Description: Duplicate a file descriptor.
2366 *
2367 * Parameters: p Process performing the dup
2368 * uap->fd The fd to dup
2369 * retval Pointer to the call return area
2370 *
2371 * Returns: 0 Success
2372 * !0 Errno
2373 *
2374 * Implicit returns:
2375 * *retval (modified) The new descriptor
2376 */
2377 int
2378 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
2379 {
2380 struct filedesc *fdp = &p->p_fd;
2381 int old = uap->fd;
2382 int new, error;
2383 struct fileproc *fp;
2384
2385 proc_fdlock(p);
2386 if ((error = fp_lookup(p, old, &fp, 1))) {
2387 proc_fdunlock(p);
2388 return error;
2389 }
2390 if (fp_isguarded(fp, GUARD_DUP)) {
2391 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2392 (void) fp_drop(p, old, fp, 1);
2393 proc_fdunlock(p);
2394 return error;
2395 }
2396 if ((error = fdalloc(p, 0, &new))) {
2397 fp_drop(p, old, fp, 1);
2398 proc_fdunlock(p);
2399 return error;
2400 }
2401 error = finishdup(p, fdp, old, new, 0, retval);
2402
2403 if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
2404 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
2405 new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp_get_data(fp)));
2406 }
2407
2408 fp_drop(p, old, fp, 1);
2409 proc_fdunlock(p);
2410
2411 return error;
2412 }
2413
2414 /*
2415 * sys_dup2
2416 *
2417 * Description: Duplicate a file descriptor to a particular value.
2418 *
2419 * Parameters: p Process performing the dup
2420 * uap->from The fd to dup
2421 * uap->to The fd to dup it to
2422 * retval Pointer to the call return area
2423 *
2424 * Returns: 0 Success
2425 * !0 Errno
2426 *
2427 * Implicit returns:
2428 * *retval (modified) The new descriptor
2429 */
2430 int
2431 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
2432 {
2433 return dup2(p, uap->from, uap->to, retval);
2434 }
2435
2436 int
2437 dup2(proc_t p, int old, int new, int *retval)
2438 {
2439 struct filedesc *fdp = &p->p_fd;
2440 struct fileproc *fp, *nfp;
2441 int i, error;
2442
2443 proc_fdlock(p);
2444
2445 startover:
2446 if ((error = fp_lookup(p, old, &fp, 1))) {
2447 proc_fdunlock(p);
2448 return error;
2449 }
2450 if (fp_isguarded(fp, GUARD_DUP)) {
2451 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
2452 (void) fp_drop(p, old, fp, 1);
2453 proc_fdunlock(p);
2454 return error;
2455 }
2456 if (new < 0 || new >= proc_limitgetcur_nofile(p)) {
2457 fp_drop(p, old, fp, 1);
2458 proc_fdunlock(p);
2459 return EBADF;
2460 }
2461 if (old == new) {
2462 fp_drop(p, old, fp, 1);
2463 *retval = new;
2464 proc_fdunlock(p);
2465 return 0;
2466 }
2467 if (new < 0 || new >= fdp->fd_nfiles) {
2468 if ((error = fdalloc(p, new, &i))) {
2469 fp_drop(p, old, fp, 1);
2470 proc_fdunlock(p);
2471 return error;
2472 }
2473 if (new != i) {
2474 fdrelse(p, i);
2475 goto closeit;
2476 }
2477 } else {
2478 closeit:
2479 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
2480 fp_drop(p, old, fp, 1);
2481 procfdtbl_waitfd(p, new);
2482 #if DIAGNOSTIC
2483 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2484 #endif
2485 goto startover;
2486 }
2487
2488 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
2489 if (fp_isguarded(nfp, GUARD_CLOSE)) {
2490 fp_drop(p, old, fp, 1);
2491 error = fp_guard_exception(p,
2492 new, nfp, kGUARD_EXC_CLOSE);
2493 proc_fdunlock(p);
2494 return error;
2495 }
2496 (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
2497 proc_fdlock(p);
2498 assert(fdp->fd_ofileflags[new] & UF_RESERVED);
2499 } else {
2500 #if DIAGNOSTIC
2501 if (fdp->fd_ofiles[new] != NULL) {
2502 panic("dup2: no ref on fileproc %d", new);
2503 }
2504 #endif
2505 procfdtbl_reservefd(p, new);
2506 }
2507 }
2508 #if DIAGNOSTIC
2509 if (fdp->fd_ofiles[new] != 0) {
2510 panic("dup2: overwriting fd_ofiles with new %d", new);
2511 }
2512 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2513 panic("dup2: unreserved fileflags with new %d", new);
2514 }
2515 #endif
2516 error = finishdup(p, fdp, old, new, 0, retval);
2517 fp_drop(p, old, fp, 1);
2518 proc_fdunlock(p);
2519
2520 return error;
2521 }
2522
2523
2524 /*
2525 * fcntl
2526 *
2527 * Description: The file control system call.
2528 *
2529 * Parameters: p Process performing the fcntl
2530 * uap->fd The fd to operate against
2531 * uap->cmd The command to perform
2532 * uap->arg Pointer to the command argument
2533 * retval Pointer to the call return area
2534 *
2535 * Returns: 0 Success
2536 * !0 Errno (see fcntl_nocancel)
2537 *
2538 * Implicit returns:
2539 * *retval (modified) fcntl return value (if any)
2540 *
2541 * Notes: This system call differs from fcntl_nocancel() in that it
2542 * tests for cancellation prior to performing a potentially
2543 * blocking operation.
2544 */
2545 int
2546 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
2547 {
2548 __pthread_testcancel(1);
2549 return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
2550 }
2551
2552 #define ACCOUNT_OPENFROM_ENTITLEMENT \
2553 "com.apple.private.vfs.role-account-openfrom"
2554
2555 /*
2556 * sys_fcntl_nocancel
2557 *
2558 * Description: A non-cancel-testing file control system call.
2559 *
2560 * Parameters: p Process performing the fcntl
2561 * uap->fd The fd to operate against
2562 * uap->cmd The command to perform
2563 * uap->arg Pointer to the command argument
2564 * retval Pointer to the call return area
2565 *
2566 * Returns: 0 Success
2567 * EINVAL
2568 * fp_lookup:EBADF Bad file descriptor
2569 * [F_DUPFD]
2570 * fdalloc:EMFILE
2571 * fdalloc:ENOMEM
2572 * finishdup:EBADF
2573 * finishdup:ENOMEM
2574 * [F_SETOWN]
2575 * ESRCH
2576 * [F_SETLK]
2577 * EBADF
2578 * EOVERFLOW
2579 * copyin:EFAULT
2580 * vnode_getwithref:???
2581 * VNOP_ADVLOCK:???
2582 * msleep:ETIMEDOUT
2583 * [F_GETLK]
2584 * EBADF
2585 * EOVERFLOW
2586 * copyin:EFAULT
2587 * copyout:EFAULT
2588 * vnode_getwithref:???
2589 * VNOP_ADVLOCK:???
2590 * [F_PREALLOCATE]
2591 * EBADF
2592 * EFBIG
2593 * EINVAL
2594 * ENOSPC
2595 * copyin:EFAULT
2596 * copyout:EFAULT
2597 * vnode_getwithref:???
2598 * VNOP_ALLOCATE:???
2599 * [F_SETSIZE,F_RDADVISE]
2600 * EBADF
2601 * EINVAL
2602 * copyin:EFAULT
2603 * vnode_getwithref:???
2604 * [F_RDAHEAD,F_NOCACHE]
2605 * EBADF
2606 * vnode_getwithref:???
2607 * [???]
2608 *
2609 * Implicit returns:
2610 * *retval (modified) fcntl return value (if any)
2611 */
2612 #define SYS_FCNTL_DECLARE_VFS_CONTEXT(context) \
2613 struct vfs_context context = { \
2614 .vc_thread = current_thread(), \
2615 .vc_ucred = fp->f_cred, \
2616 }
2617
2618 static user_addr_t
2619 sys_fnctl_parse_arg(proc_t p, user_long_t arg)
2620 {
2621 /*
2622 * Since the arg parameter is defined as a long but may be
2623 * either a long or a pointer we must take care to handle
2624 * sign extension issues. Our sys call munger will sign
2625 * extend a long when we are called from a 32-bit process.
2626 * Since we can never have an address greater than 32-bits
2627 * from a 32-bit process we lop off the top 32-bits to avoid
2628 * getting the wrong address
2629 */
2630 return proc_is64bit(p) ? arg : CAST_USER_ADDR_T((uint32_t)arg);
2631 }
2632
2633 /* cleanup code common to fnctl functions, for when the fdlock is still held */
2634 static int
2635 sys_fcntl_out(proc_t p, int fd, struct fileproc *fp, int error)
2636 {
2637 fp_drop(p, fd, fp, 1);
2638 proc_fdunlock(p);
2639 return error;
2640 }
2641
2642 /* cleanup code common to fnctl acting on vnodes, once they unlocked the fdlock */
2643 static int
2644 sys_fcntl_outdrop(proc_t p, int fd, struct fileproc *fp, struct vnode *vp, int error)
2645 {
2646 #pragma unused(vp)
2647
2648 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2649 fp_drop(p, fd, fp, 0);
2650 return error;
2651 }
2652
2653 typedef int (*sys_fnctl_handler_t)(proc_t p, int fd, int cmd, user_long_t arg,
2654 struct fileproc *fp, int32_t *retval);
2655
2656 typedef int (*sys_fnctl_vnode_handler_t)(proc_t p, int fd, int cmd,
2657 user_long_t arg, struct fileproc *fp, struct vnode *vp, int32_t *retval);
2658
2659 /*
2660 * SPI (private) for opening a file starting from a dir fd
2661 *
2662 * Note: do not inline to keep stack usage under control.
2663 */
2664 __attribute__((noinline))
2665 static int
2666 sys_fcntl__OPENFROM(proc_t p, int fd, int cmd, user_long_t arg,
2667 struct fileproc *fp, struct vnode *vp, int32_t *retval)
2668 {
2669 #pragma unused(cmd)
2670
2671 user_addr_t argp = sys_fnctl_parse_arg(p, arg);
2672 struct user_fopenfrom fopen;
2673 struct vnode_attr *va;
2674 struct nameidata *nd;
2675 int error, cmode;
2676 bool has_entitlement;
2677
2678 /* Check if this isn't a valid file descriptor */
2679 if ((fp->f_flag & FREAD) == 0) {
2680 return sys_fcntl_out(p, fd, fp, EBADF);
2681 }
2682 proc_fdunlock(p);
2683
2684 if (vnode_getwithref(vp)) {
2685 error = ENOENT;
2686 goto outdrop;
2687 }
2688
2689 /* Only valid for directories */
2690 if (vp->v_type != VDIR) {
2691 vnode_put(vp);
2692 error = ENOTDIR;
2693 goto outdrop;
2694 }
2695
2696 /*
2697 * Only entitled apps may use the credentials of the thread
2698 * that opened the file descriptor.
2699 * Non-entitled threads will use their own context.
2700 */
2701 has_entitlement = IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT);
2702
2703 /* Get flags, mode and pathname arguments. */
2704 if (IS_64BIT_PROCESS(p)) {
2705 error = copyin(argp, &fopen, sizeof(fopen));
2706 } else {
2707 struct user32_fopenfrom fopen32;
2708
2709 error = copyin(argp, &fopen32, sizeof(fopen32));
2710 fopen.o_flags = fopen32.o_flags;
2711 fopen.o_mode = fopen32.o_mode;
2712 fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2713 }
2714 if (error) {
2715 vnode_put(vp);
2716 goto outdrop;
2717 }
2718
2719 /* open1() can have really deep stacks, so allocate those */
2720 va = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2721 nd = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2722
2723 AUDIT_ARG(fflags, fopen.o_flags);
2724 AUDIT_ARG(mode, fopen.o_mode);
2725 VATTR_INIT(va);
2726 /* Mask off all but regular access permissions */
2727 cmode = ((fopen.o_mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
2728 VATTR_SET(va, va_mode, cmode & ACCESSPERMS);
2729
2730 SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2731
2732 /* Start the lookup relative to the file descriptor's vnode. */
2733 NDINIT(nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2734 fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2735 nd->ni_dvp = vp;
2736
2737 error = open1(has_entitlement ? &context : vfs_context_current(),
2738 nd, fopen.o_flags, va, NULL, NULL, retval, AUTH_OPEN_NOAUTHFD);
2739
2740 kfree_type(struct vnode_attr, va);
2741 kfree_type(struct nameidata, nd);
2742
2743 vnode_put(vp);
2744
2745 outdrop:
2746 return sys_fcntl_outdrop(p, fd, fp, vp, error);
2747 }
2748
2749 int
2750 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
2751 {
2752 int fd = uap->fd;
2753 int cmd = uap->cmd;
2754 struct filedesc *fdp = &p->p_fd;
2755 struct fileproc *fp;
2756 struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */
2757 unsigned int oflags, nflags;
2758 int i, tmp, error, error2, flg = 0;
2759 struct flock fl = {};
2760 struct flocktimeout fltimeout;
2761 struct timespec *timeout = NULL;
2762 off_t offset;
2763 int newmin;
2764 daddr64_t lbn, bn;
2765 unsigned int fflag;
2766 user_addr_t argp;
2767 boolean_t is64bit;
2768 int has_entitlement = 0;
2769
2770 AUDIT_ARG(fd, uap->fd);
2771 AUDIT_ARG(cmd, uap->cmd);
2772
2773 proc_fdlock(p);
2774 if ((error = fp_lookup(p, fd, &fp, 1))) {
2775 proc_fdunlock(p);
2776 return error;
2777 }
2778
2779 SYS_FCNTL_DECLARE_VFS_CONTEXT(context);
2780
2781 is64bit = proc_is64bit(p);
2782 if (is64bit) {
2783 argp = uap->arg;
2784 } else {
2785 /*
2786 * Since the arg parameter is defined as a long but may be
2787 * either a long or a pointer we must take care to handle
2788 * sign extension issues. Our sys call munger will sign
2789 * extend a long when we are called from a 32-bit process.
2790 * Since we can never have an address greater than 32-bits
2791 * from a 32-bit process we lop off the top 32-bits to avoid
2792 * getting the wrong address
2793 */
2794 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
2795 }
2796
2797 #if CONFIG_MACF
2798 error = mac_file_check_fcntl(kauth_cred_get(), fp->fp_glob, cmd, uap->arg);
2799 if (error) {
2800 goto out;
2801 }
2802 #endif
2803
2804 switch (cmd) {
2805 case F_DUPFD:
2806 case F_DUPFD_CLOEXEC:
2807 if (fp_isguarded(fp, GUARD_DUP)) {
2808 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
2809 goto out;
2810 }
2811 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2812 AUDIT_ARG(value32, newmin);
2813 if (newmin < 0 || newmin >= proc_limitgetcur_nofile(p)) {
2814 error = EINVAL;
2815 goto out;
2816 }
2817 if ((error = fdalloc(p, newmin, &i))) {
2818 goto out;
2819 }
2820 error = finishdup(p, fdp, fd, i,
2821 cmd == F_DUPFD_CLOEXEC ? FP_CLOEXEC : 0, retval);
2822 goto out;
2823
2824 case F_GETFD:
2825 *retval = (fp->fp_flags & FP_CLOEXEC) ? FD_CLOEXEC : 0;
2826 error = 0;
2827 goto out;
2828
2829 case F_SETFD:
2830 AUDIT_ARG(value32, (uint32_t)uap->arg);
2831 if (uap->arg & FD_CLOEXEC) {
2832 fp->fp_flags |= FP_CLOEXEC;
2833 error = 0;
2834 } else if (!fp->fp_guard_attrs) {
2835 fp->fp_flags &= ~FP_CLOEXEC;
2836 error = 0;
2837 } else {
2838 error = fp_guard_exception(p,
2839 fd, fp, kGUARD_EXC_NOCLOEXEC);
2840 }
2841 goto out;
2842
2843 case F_GETFL:
2844 fflag = fp->f_flag;
2845 if ((fflag & O_EVTONLY) && proc_disallow_rw_for_o_evtonly(p)) {
2846 /*
2847 * We insert back F_READ so that conversion back to open flags with
2848 * OFLAGS() will come out right. We only need to set 'FREAD' as the
2849 * 'O_RDONLY' is always implied.
2850 */
2851 fflag |= FREAD;
2852 }
2853 *retval = OFLAGS(fflag);
2854 error = 0;
2855 goto out;
2856
2857 case F_SETFL:
2858 // FIXME (rdar://54898652)
2859 //
2860 // this code is broken if fnctl(F_SETFL), ioctl() are
2861 // called concurrently for the same fileglob.
2862
2863 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
2864 AUDIT_ARG(value32, tmp);
2865
2866 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
2867 nflags = oflags & ~FCNTLFLAGS;
2868 nflags |= FFLAGS(tmp) & FCNTLFLAGS;
2869 });
2870 tmp = nflags & FNONBLOCK;
2871 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2872 if (error) {
2873 goto out;
2874 }
2875 tmp = nflags & FASYNC;
2876 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
2877 if (!error) {
2878 goto out;
2879 }
2880 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
2881 tmp = 0;
2882 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
2883 goto out;
2884
2885 case F_GETOWN:
2886 if (fp->f_type == DTYPE_SOCKET) {
2887 *retval = ((struct socket *)fp_get_data(fp))->so_pgid;
2888 error = 0;
2889 goto out;
2890 }
2891 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
2892 *retval = -*retval;
2893 goto out;
2894
2895 case F_SETOWN:
2896 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
2897 AUDIT_ARG(value32, tmp);
2898 if (fp->f_type == DTYPE_SOCKET) {
2899 ((struct socket *)fp_get_data(fp))->so_pgid = tmp;
2900 error = 0;
2901 goto out;
2902 }
2903 if (fp->f_type == DTYPE_PIPE) {
2904 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2905 goto out;
2906 }
2907
2908 if (tmp <= 0) {
2909 tmp = -tmp;
2910 } else {
2911 proc_t p1 = proc_find(tmp);
2912 if (p1 == 0) {
2913 error = ESRCH;
2914 goto out;
2915 }
2916 tmp = (int)p1->p_pgrpid;
2917 proc_rele(p1);
2918 }
2919 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
2920 goto out;
2921
2922 case F_SETNOSIGPIPE:
2923 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
2924 if (fp->f_type == DTYPE_SOCKET) {
2925 #if SOCKETS
2926 error = sock_setsockopt((struct socket *)fp_get_data(fp),
2927 SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
2928 #else
2929 error = EINVAL;
2930 #endif
2931 } else {
2932 struct fileglob *fg = fp->fp_glob;
2933
2934 lck_mtx_lock_spin(&fg->fg_lock);
2935 if (tmp) {
2936 fg->fg_lflags |= FG_NOSIGPIPE;
2937 } else {
2938 fg->fg_lflags &= ~FG_NOSIGPIPE;
2939 }
2940 lck_mtx_unlock(&fg->fg_lock);
2941 error = 0;
2942 }
2943 goto out;
2944
2945 case F_GETNOSIGPIPE:
2946 if (fp->f_type == DTYPE_SOCKET) {
2947 #if SOCKETS
2948 int retsize = sizeof(*retval);
2949 error = sock_getsockopt((struct socket *)fp_get_data(fp),
2950 SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
2951 #else
2952 error = EINVAL;
2953 #endif
2954 } else {
2955 *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
2956 1 : 0;
2957 error = 0;
2958 }
2959 goto out;
2960
2961 case F_SETCONFINED:
2962 /*
2963 * If this is the only reference to this fglob in the process
2964 * and it's already marked as close-on-fork then mark it as
2965 * (immutably) "confined" i.e. any fd that points to it will
2966 * forever be close-on-fork, and attempts to use an IPC
2967 * mechanism to move the descriptor elsewhere will fail.
2968 */
2969 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
2970 struct fileglob *fg = fp->fp_glob;
2971
2972 lck_mtx_lock_spin(&fg->fg_lock);
2973 if (fg->fg_lflags & FG_CONFINED) {
2974 error = 0;
2975 } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
2976 error = EAGAIN; /* go close the dup .. */
2977 } else if (fp->fp_flags & FP_CLOFORK) {
2978 fg->fg_lflags |= FG_CONFINED;
2979 error = 0;
2980 } else {
2981 error = EBADF; /* open without O_CLOFORK? */
2982 }
2983 lck_mtx_unlock(&fg->fg_lock);
2984 } else {
2985 /*
2986 * Other subsystems may have built on the immutability
2987 * of FG_CONFINED; clearing it may be tricky.
2988 */
2989 error = EPERM; /* immutable */
2990 }
2991 goto out;
2992
2993 case F_GETCONFINED:
2994 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
2995 error = 0;
2996 goto out;
2997
2998 case F_SETLKWTIMEOUT:
2999 case F_SETLKW:
3000 case F_OFD_SETLKWTIMEOUT:
3001 case F_OFD_SETLKW:
3002 flg |= F_WAIT;
3003 OS_FALLTHROUGH;
3004
3005 case F_SETLK:
3006 case F_OFD_SETLK:
3007 if (fp->f_type != DTYPE_VNODE) {
3008 error = EBADF;
3009 goto out;
3010 }
3011 vp = (struct vnode *)fp_get_data(fp);
3012
3013 fflag = fp->f_flag;
3014 offset = fp->f_offset;
3015 proc_fdunlock(p);
3016
3017 /* Copy in the lock structure */
3018 if (F_SETLKWTIMEOUT == cmd || F_OFD_SETLKWTIMEOUT == cmd) {
3019 error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
3020 if (error) {
3021 goto outdrop;
3022 }
3023 fl = fltimeout.fl;
3024 timeout = &fltimeout.timeout;
3025 } else {
3026 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3027 if (error) {
3028 goto outdrop;
3029 }
3030 }
3031
3032 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3033 /* and ending byte for EOVERFLOW in SEEK_SET */
3034 error = check_file_seek_range(&fl, offset);
3035 if (error) {
3036 goto outdrop;
3037 }
3038
3039 if ((error = vnode_getwithref(vp))) {
3040 goto outdrop;
3041 }
3042 if (fl.l_whence == SEEK_CUR) {
3043 fl.l_start += offset;
3044 }
3045
3046 #if CONFIG_MACF
3047 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3048 F_SETLK, &fl);
3049 if (error) {
3050 (void)vnode_put(vp);
3051 goto outdrop;
3052 }
3053 #endif
3054
3055 #if CONFIG_FILE_LEASES
3056 (void)vnode_breaklease(vp, O_WRONLY, vfs_context_current());
3057 #endif
3058
3059 switch (cmd) {
3060 case F_OFD_SETLK:
3061 case F_OFD_SETLKW:
3062 case F_OFD_SETLKWTIMEOUT:
3063 flg |= F_OFD_LOCK;
3064 switch (fl.l_type) {
3065 case F_RDLCK:
3066 if ((fflag & FREAD) == 0) {
3067 error = EBADF;
3068 break;
3069 }
3070 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3071 F_SETLK, &fl, flg, &context, timeout);
3072 break;
3073 case F_WRLCK:
3074 if ((fflag & FWRITE) == 0) {
3075 error = EBADF;
3076 break;
3077 }
3078 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3079 F_SETLK, &fl, flg, &context, timeout);
3080 break;
3081 case F_UNLCK:
3082 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3083 F_UNLCK, &fl, F_OFD_LOCK, &context,
3084 timeout);
3085 break;
3086 default:
3087 error = EINVAL;
3088 break;
3089 }
3090 if (0 == error &&
3091 (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
3092 struct fileglob *fg = fp->fp_glob;
3093
3094 /*
3095 * arrange F_UNLCK on last close (once
3096 * set, FG_HAS_OFDLOCK is immutable)
3097 */
3098 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
3099 lck_mtx_lock_spin(&fg->fg_lock);
3100 fg->fg_lflags |= FG_HAS_OFDLOCK;
3101 lck_mtx_unlock(&fg->fg_lock);
3102 }
3103 }
3104 break;
3105 default:
3106 flg |= F_POSIX;
3107 switch (fl.l_type) {
3108 case F_RDLCK:
3109 if ((fflag & FREAD) == 0) {
3110 error = EBADF;
3111 break;
3112 }
3113 // XXX UInt32 unsafe for LP64 kernel
3114 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3115 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3116 F_SETLK, &fl, flg, &context, timeout);
3117 break;
3118 case F_WRLCK:
3119 if ((fflag & FWRITE) == 0) {
3120 error = EBADF;
3121 break;
3122 }
3123 // XXX UInt32 unsafe for LP64 kernel
3124 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
3125 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3126 F_SETLK, &fl, flg, &context, timeout);
3127 break;
3128 case F_UNLCK:
3129 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3130 F_UNLCK, &fl, F_POSIX, &context, timeout);
3131 break;
3132 default:
3133 error = EINVAL;
3134 break;
3135 }
3136 break;
3137 }
3138 (void) vnode_put(vp);
3139 goto outdrop;
3140
3141 case F_GETLK:
3142 case F_OFD_GETLK:
3143 case F_GETLKPID:
3144 case F_OFD_GETLKPID:
3145 if (fp->f_type != DTYPE_VNODE) {
3146 error = EBADF;
3147 goto out;
3148 }
3149 vp = (struct vnode *)fp_get_data(fp);
3150
3151 offset = fp->f_offset;
3152 proc_fdunlock(p);
3153
3154 /* Copy in the lock structure */
3155 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
3156 if (error) {
3157 goto outdrop;
3158 }
3159
3160 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
3161 /* and ending byte for EOVERFLOW in SEEK_SET */
3162 error = check_file_seek_range(&fl, offset);
3163 if (error) {
3164 goto outdrop;
3165 }
3166
3167 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
3168 error = EINVAL;
3169 goto outdrop;
3170 }
3171
3172 switch (fl.l_type) {
3173 case F_RDLCK:
3174 case F_UNLCK:
3175 case F_WRLCK:
3176 break;
3177 default:
3178 error = EINVAL;
3179 goto outdrop;
3180 }
3181
3182 switch (fl.l_whence) {
3183 case SEEK_CUR:
3184 case SEEK_SET:
3185 case SEEK_END:
3186 break;
3187 default:
3188 error = EINVAL;
3189 goto outdrop;
3190 }
3191
3192 if ((error = vnode_getwithref(vp)) == 0) {
3193 if (fl.l_whence == SEEK_CUR) {
3194 fl.l_start += offset;
3195 }
3196
3197 #if CONFIG_MACF
3198 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob,
3199 cmd, &fl);
3200 if (error == 0)
3201 #endif
3202 switch (cmd) {
3203 case F_OFD_GETLK:
3204 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3205 F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
3206 break;
3207 case F_OFD_GETLKPID:
3208 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
3209 F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
3210 break;
3211 default:
3212 error = VNOP_ADVLOCK(vp, (caddr_t)p,
3213 cmd, &fl, F_POSIX, &context, NULL);
3214 break;
3215 }
3216
3217 (void)vnode_put(vp);
3218
3219 if (error == 0) {
3220 error = copyout((caddr_t)&fl, argp, sizeof(fl));
3221 }
3222 }
3223 goto outdrop;
3224
3225 case F_PREALLOCATE: {
3226 fstore_t alloc_struct; /* structure for allocate command */
3227 u_int32_t alloc_flags = 0;
3228
3229 if (fp->f_type != DTYPE_VNODE) {
3230 error = EBADF;
3231 goto out;
3232 }
3233
3234 vp = (struct vnode *)fp_get_data(fp);
3235 proc_fdunlock(p);
3236
3237 /* make sure that we have write permission */
3238 if ((fp->f_flag & FWRITE) == 0) {
3239 error = EBADF;
3240 goto outdrop;
3241 }
3242
3243 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
3244 if (error) {
3245 goto outdrop;
3246 }
3247
3248 /* now set the space allocated to 0 */
3249 alloc_struct.fst_bytesalloc = 0;
3250
3251 /*
3252 * Do some simple parameter checking
3253 */
3254
3255 /* set up the flags */
3256
3257 alloc_flags |= PREALLOCATE;
3258
3259 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
3260 alloc_flags |= ALLOCATECONTIG;
3261 }
3262
3263 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
3264 alloc_flags |= ALLOCATEALL;
3265 }
3266
3267 if (alloc_struct.fst_flags & F_ALLOCATEPERSIST) {
3268 alloc_flags |= ALLOCATEPERSIST;
3269 }
3270
3271 /*
3272 * Do any position mode specific stuff. The only
3273 * position mode supported now is PEOFPOSMODE
3274 */
3275
3276 switch (alloc_struct.fst_posmode) {
3277 case F_PEOFPOSMODE:
3278 if (alloc_struct.fst_offset != 0) {
3279 error = EINVAL;
3280 goto outdrop;
3281 }
3282
3283 alloc_flags |= ALLOCATEFROMPEOF;
3284 break;
3285
3286 case F_VOLPOSMODE:
3287 if (alloc_struct.fst_offset <= 0) {
3288 error = EINVAL;
3289 goto outdrop;
3290 }
3291
3292 alloc_flags |= ALLOCATEFROMVOL;
3293 break;
3294
3295 default: {
3296 error = EINVAL;
3297 goto outdrop;
3298 }
3299 }
3300 if ((error = vnode_getwithref(vp)) == 0) {
3301 /*
3302 * call allocate to get the space
3303 */
3304 error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
3305 &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
3306 &context);
3307 (void)vnode_put(vp);
3308
3309 error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
3310
3311 if (error == 0) {
3312 error = error2;
3313 }
3314 }
3315 goto outdrop;
3316 }
3317 case F_PUNCHHOLE: {
3318 fpunchhole_t args;
3319
3320 if (fp->f_type != DTYPE_VNODE) {
3321 error = EBADF;
3322 goto out;
3323 }
3324
3325 vp = (struct vnode *)fp_get_data(fp);
3326 proc_fdunlock(p);
3327
3328 /* need write permissions */
3329 if ((fp->f_flag & FWRITE) == 0) {
3330 error = EPERM;
3331 goto outdrop;
3332 }
3333
3334 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3335 goto outdrop;
3336 }
3337
3338 if ((error = vnode_getwithref(vp))) {
3339 goto outdrop;
3340 }
3341
3342 #if CONFIG_MACF
3343 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
3344 (void)vnode_put(vp);
3345 goto outdrop;
3346 }
3347 #endif
3348
3349 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
3350 (void)vnode_put(vp);
3351
3352 goto outdrop;
3353 }
3354 case F_TRIM_ACTIVE_FILE: {
3355 ftrimactivefile_t args;
3356
3357 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
3358 error = EACCES;
3359 goto out;
3360 }
3361
3362 if (fp->f_type != DTYPE_VNODE) {
3363 error = EBADF;
3364 goto out;
3365 }
3366
3367 vp = (struct vnode *)fp_get_data(fp);
3368 proc_fdunlock(p);
3369
3370 /* need write permissions */
3371 if ((fp->f_flag & FWRITE) == 0) {
3372 error = EPERM;
3373 goto outdrop;
3374 }
3375
3376 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3377 goto outdrop;
3378 }
3379
3380 if ((error = vnode_getwithref(vp))) {
3381 goto outdrop;
3382 }
3383
3384 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
3385 (void)vnode_put(vp);
3386
3387 goto outdrop;
3388 }
3389 case F_SPECULATIVE_READ: {
3390 fspecread_t args;
3391 off_t temp_length = 0;
3392
3393 if (fp->f_type != DTYPE_VNODE) {
3394 error = EBADF;
3395 goto out;
3396 }
3397
3398 vp = (struct vnode *)fp_get_data(fp);
3399 proc_fdunlock(p);
3400
3401 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
3402 goto outdrop;
3403 }
3404
3405 /* Discard invalid offsets or lengths */
3406 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
3407 error = EINVAL;
3408 goto outdrop;
3409 }
3410
3411 /*
3412 * Round the file offset down to a page-size boundary (or to 0).
3413 * The filesystem will need to round the length up to the end of the page boundary
3414 * or to the EOF of the file.
3415 */
3416 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
3417 uint64_t foff_delta = args.fsr_offset - foff;
3418 args.fsr_offset = (off_t) foff;
3419
3420 /*
3421 * Now add in the delta to the supplied length. Since we may have adjusted the
3422 * offset, increase it by the amount that we adjusted.
3423 */
3424 if (os_add_overflow(args.fsr_length, foff_delta, &args.fsr_length)) {
3425 error = EOVERFLOW;
3426 goto outdrop;
3427 }
3428
3429 /*
3430 * Make sure (fsr_offset + fsr_length) does not overflow.
3431 */
3432 if (os_add_overflow(args.fsr_offset, args.fsr_length, &temp_length)) {
3433 error = EOVERFLOW;
3434 goto outdrop;
3435 }
3436
3437 if ((error = vnode_getwithref(vp))) {
3438 goto outdrop;
3439 }
3440 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
3441 (void)vnode_put(vp);
3442
3443 goto outdrop;
3444 }
3445 case F_SETSIZE:
3446 if (fp->f_type != DTYPE_VNODE) {
3447 error = EBADF;
3448 goto out;
3449 }
3450 vp = (struct vnode *)fp_get_data(fp);
3451 proc_fdunlock(p);
3452
3453 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
3454 if (error) {
3455 goto outdrop;
3456 }
3457 AUDIT_ARG(value64, offset);
3458
3459 error = vnode_getwithref(vp);
3460 if (error) {
3461 goto outdrop;
3462 }
3463
3464 #if CONFIG_MACF
3465 error = mac_vnode_check_truncate(&context,
3466 fp->fp_glob->fg_cred, vp);
3467 if (error) {
3468 (void)vnode_put(vp);
3469 goto outdrop;
3470 }
3471 #endif
3472 /*
3473 * Make sure that we are root. Growing a file
3474 * without zero filling the data is a security hole.
3475 */
3476 if (!kauth_cred_issuser(kauth_cred_get())) {
3477 error = EACCES;
3478 } else {
3479 /*
3480 * Require privilege to change file size without zerofill,
3481 * else will change the file size and zerofill it.
3482 */
3483 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
3484 if (error == 0) {
3485 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
3486 } else {
3487 error = vnode_setsize(vp, offset, 0, &context);
3488 }
3489
3490 #if CONFIG_MACF
3491 if (error == 0) {
3492 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
3493 }
3494 #endif
3495 }
3496
3497 (void)vnode_put(vp);
3498 goto outdrop;
3499
3500 case F_RDAHEAD:
3501 if (fp->f_type != DTYPE_VNODE) {
3502 error = EBADF;
3503 goto out;
3504 }
3505 if (uap->arg) {
3506 os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3507 } else {
3508 os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
3509 }
3510 goto out;
3511
3512 case F_NOCACHE:
3513 if (fp->f_type != DTYPE_VNODE) {
3514 error = EBADF;
3515 goto out;
3516 }
3517 if (uap->arg) {
3518 os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3519 } else {
3520 os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
3521 }
3522 goto out;
3523
3524 case F_NODIRECT:
3525 if (fp->f_type != DTYPE_VNODE) {
3526 error = EBADF;
3527 goto out;
3528 }
3529 if (uap->arg) {
3530 os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3531 } else {
3532 os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
3533 }
3534 goto out;
3535
3536 case F_SINGLE_WRITER:
3537 if (fp->f_type != DTYPE_VNODE) {
3538 error = EBADF;
3539 goto out;
3540 }
3541 if (uap->arg) {
3542 os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3543 } else {
3544 os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
3545 }
3546 goto out;
3547
3548 case F_GLOBAL_NOCACHE:
3549 if (fp->f_type != DTYPE_VNODE) {
3550 error = EBADF;
3551 goto out;
3552 }
3553 vp = (struct vnode *)fp_get_data(fp);
3554 proc_fdunlock(p);
3555
3556 if ((error = vnode_getwithref(vp)) == 0) {
3557 *retval = vnode_isnocache(vp);
3558
3559 if (uap->arg) {
3560 vnode_setnocache(vp);
3561 } else {
3562 vnode_clearnocache(vp);
3563 }
3564
3565 (void)vnode_put(vp);
3566 }
3567 goto outdrop;
3568
3569 case F_CHECK_OPENEVT:
3570 if (fp->f_type != DTYPE_VNODE) {
3571 error = EBADF;
3572 goto out;
3573 }
3574 vp = (struct vnode *)fp_get_data(fp);
3575 proc_fdunlock(p);
3576
3577 if ((error = vnode_getwithref(vp)) == 0) {
3578 *retval = vnode_is_openevt(vp);
3579
3580 if (uap->arg) {
3581 vnode_set_openevt(vp);
3582 } else {
3583 vnode_clear_openevt(vp);
3584 }
3585
3586 (void)vnode_put(vp);
3587 }
3588 goto outdrop;
3589
3590 case F_RDADVISE: {
3591 struct radvisory ra_struct;
3592
3593 if (fp->f_type != DTYPE_VNODE) {
3594 error = EBADF;
3595 goto out;
3596 }
3597 vp = (struct vnode *)fp_get_data(fp);
3598 proc_fdunlock(p);
3599
3600 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
3601 goto outdrop;
3602 }
3603 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
3604 error = EINVAL;
3605 goto outdrop;
3606 }
3607 if ((error = vnode_getwithref(vp)) == 0) {
3608 error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
3609
3610 (void)vnode_put(vp);
3611 }
3612 goto outdrop;
3613 }
3614
3615 case F_FLUSH_DATA:
3616
3617 if (fp->f_type != DTYPE_VNODE) {
3618 error = EBADF;
3619 goto out;
3620 }
3621 vp = (struct vnode *)fp_get_data(fp);
3622 proc_fdunlock(p);
3623
3624 if ((error = vnode_getwithref(vp)) == 0) {
3625 error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
3626
3627 (void)vnode_put(vp);
3628 }
3629 goto outdrop;
3630
3631 case F_LOG2PHYS:
3632 case F_LOG2PHYS_EXT: {
3633 struct log2phys l2p_struct = {}; /* structure for allocate command */
3634 int devBlockSize;
3635
3636 off_t file_offset = 0;
3637 size_t a_size = 0;
3638 size_t run = 0;
3639
3640 if (cmd == F_LOG2PHYS_EXT) {
3641 error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
3642 if (error) {
3643 goto out;
3644 }
3645 file_offset = l2p_struct.l2p_devoffset;
3646 } else {
3647 file_offset = fp->f_offset;
3648 }
3649 if (fp->f_type != DTYPE_VNODE) {
3650 error = EBADF;
3651 goto out;
3652 }
3653 vp = (struct vnode *)fp_get_data(fp);
3654 proc_fdunlock(p);
3655 if ((error = vnode_getwithref(vp))) {
3656 goto outdrop;
3657 }
3658 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
3659 if (error) {
3660 (void)vnode_put(vp);
3661 goto outdrop;
3662 }
3663 error = VNOP_BLKTOOFF(vp, lbn, &offset);
3664 if (error) {
3665 (void)vnode_put(vp);
3666 goto outdrop;
3667 }
3668 devBlockSize = vfs_devblocksize(vnode_mount(vp));
3669 if (cmd == F_LOG2PHYS_EXT) {
3670 if (l2p_struct.l2p_contigbytes < 0) {
3671 vnode_put(vp);
3672 error = EINVAL;
3673 goto outdrop;
3674 }
3675
3676 a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
3677 } else {
3678 a_size = devBlockSize;
3679 }
3680
3681 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
3682
3683 (void)vnode_put(vp);
3684
3685 if (!error) {
3686 l2p_struct.l2p_flags = 0; /* for now */
3687 if (cmd == F_LOG2PHYS_EXT) {
3688 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
3689 } else {
3690 l2p_struct.l2p_contigbytes = 0; /* for now */
3691 }
3692
3693 /*
3694 * The block number being -1 suggests that the file offset is not backed
3695 * by any real blocks on-disk. As a result, just let it be passed back up wholesale.
3696 */
3697 if (bn == -1) {
3698 /* Don't multiply it by the block size */
3699 l2p_struct.l2p_devoffset = bn;
3700 } else {
3701 l2p_struct.l2p_devoffset = bn * devBlockSize;
3702 l2p_struct.l2p_devoffset += file_offset - offset;
3703 }
3704 error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
3705 }
3706 goto outdrop;
3707 }
3708 case F_GETPATH:
3709 case F_GETPATH_NOFIRMLINK: {
3710 char *pathbufp;
3711 size_t pathlen;
3712
3713 if (fp->f_type != DTYPE_VNODE) {
3714 error = EBADF;
3715 goto out;
3716 }
3717 vp = (struct vnode *)fp_get_data(fp);
3718 proc_fdunlock(p);
3719
3720 pathlen = MAXPATHLEN;
3721 pathbufp = zalloc(ZV_NAMEI);
3722
3723 if ((error = vnode_getwithref(vp)) == 0) {
3724 error = vn_getpath_ext(vp, NULL, pathbufp,
3725 &pathlen, cmd == F_GETPATH_NOFIRMLINK ?
3726 VN_GETPATH_NO_FIRMLINK : 0);
3727 (void)vnode_put(vp);
3728
3729 if (error == 0) {
3730 error = copyout((caddr_t)pathbufp, argp, pathlen);
3731 }
3732 }
3733 zfree(ZV_NAMEI, pathbufp);
3734 goto outdrop;
3735 }
3736
3737 case F_PATHPKG_CHECK: {
3738 char *pathbufp;
3739 size_t pathlen;
3740
3741 if (fp->f_type != DTYPE_VNODE) {
3742 error = EBADF;
3743 goto out;
3744 }
3745 vp = (struct vnode *)fp_get_data(fp);
3746 proc_fdunlock(p);
3747
3748 pathlen = MAXPATHLEN;
3749 pathbufp = zalloc(ZV_NAMEI);
3750
3751 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
3752 if ((error = vnode_getwithref(vp)) == 0) {
3753 AUDIT_ARG(text, pathbufp);
3754 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
3755
3756 (void)vnode_put(vp);
3757 }
3758 }
3759 zfree(ZV_NAMEI, pathbufp);
3760 goto outdrop;
3761 }
3762
3763 case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync()
3764 case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZE
3765 case F_BARRIERFSYNC: // fsync + barrier
3766 case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd
3767 case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd
3768 if (fp->f_type != DTYPE_VNODE) {
3769 error = EBADF;
3770 goto out;
3771 }
3772 vp = (struct vnode *)fp_get_data(fp);
3773 proc_fdunlock(p);
3774
3775 if ((error = vnode_getwithref(vp)) == 0) {
3776 error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
3777
3778 (void)vnode_put(vp);
3779 }
3780 break;
3781 }
3782
3783 /*
3784 * SPI (private) for opening a file starting from a dir fd
3785 */
3786 case F_OPENFROM: {
3787 /* Check if this isn't a valid file descriptor */
3788 if (fp->f_type != DTYPE_VNODE) {
3789 error = EBADF;
3790 goto out;
3791 }
3792 vp = (struct vnode *)fp_get_data(fp);
3793
3794 return sys_fcntl__OPENFROM(p, fd, cmd, uap->arg, fp, vp, retval);
3795 }
3796
3797 /*
3798 * SPI (private) for unlinking a file starting from a dir fd
3799 */
3800 case F_UNLINKFROM: {
3801 user_addr_t pathname;
3802
3803 /* Check if this isn't a valid file descriptor */
3804 if ((fp->f_type != DTYPE_VNODE) ||
3805 (fp->f_flag & FREAD) == 0) {
3806 error = EBADF;
3807 goto out;
3808 }
3809 vp = (struct vnode *)fp_get_data(fp);
3810 proc_fdunlock(p);
3811
3812 if (vnode_getwithref(vp)) {
3813 error = ENOENT;
3814 goto outdrop;
3815 }
3816
3817 /* Only valid for directories */
3818 if (vp->v_type != VDIR) {
3819 vnode_put(vp);
3820 error = ENOTDIR;
3821 goto outdrop;
3822 }
3823
3824 /*
3825 * Only entitled apps may use the credentials of the thread
3826 * that opened the file descriptor.
3827 * Non-entitled threads will use their own context.
3828 */
3829 if (IOCurrentTaskHasEntitlement(ACCOUNT_OPENFROM_ENTITLEMENT)) {
3830 has_entitlement = 1;
3831 }
3832
3833 /* Get flags, mode and pathname arguments. */
3834 if (IS_64BIT_PROCESS(p)) {
3835 pathname = (user_addr_t)argp;
3836 } else {
3837 pathname = CAST_USER_ADDR_T(argp);
3838 }
3839
3840 /* Start the lookup relative to the file descriptor's vnode. */
3841 error = unlink1(has_entitlement ? &context : vfs_context_current(),
3842 vp, pathname, UIO_USERSPACE, 0);
3843
3844 vnode_put(vp);
3845 break;
3846 }
3847
3848 case F_ADDSIGS:
3849 case F_ADDFILESIGS:
3850 case F_ADDFILESIGS_FOR_DYLD_SIM:
3851 case F_ADDFILESIGS_RETURN:
3852 case F_ADDFILESIGS_INFO:
3853 {
3854 struct cs_blob *blob = NULL;
3855 struct user_fsignatures fs;
3856 kern_return_t kr;
3857 vm_offset_t kernel_blob_addr;
3858 vm_size_t kernel_blob_size;
3859 int blob_add_flags = 0;
3860 const size_t sizeof_fs = (cmd == F_ADDFILESIGS_INFO ?
3861 offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
3862 offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
3863
3864 if (fp->f_type != DTYPE_VNODE) {
3865 error = EBADF;
3866 goto out;
3867 }
3868 vp = (struct vnode *)fp_get_data(fp);
3869 proc_fdunlock(p);
3870
3871 if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3872 blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
3873 if ((proc_getcsflags(p) & CS_KILL) == 0) {
3874 proc_lock(p);
3875 proc_csflags_set(p, CS_KILL);
3876 proc_unlock(p);
3877 }
3878 }
3879
3880 error = vnode_getwithref(vp);
3881 if (error) {
3882 goto outdrop;
3883 }
3884
3885 if (IS_64BIT_PROCESS(p)) {
3886 error = copyin(argp, &fs, sizeof_fs);
3887 } else {
3888 if (cmd == F_ADDFILESIGS_INFO) {
3889 error = EINVAL;
3890 vnode_put(vp);
3891 goto outdrop;
3892 }
3893
3894 struct user32_fsignatures fs32;
3895
3896 error = copyin(argp, &fs32, sizeof(fs32));
3897 fs.fs_file_start = fs32.fs_file_start;
3898 fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
3899 fs.fs_blob_size = fs32.fs_blob_size;
3900 }
3901
3902 if (error) {
3903 vnode_put(vp);
3904 goto outdrop;
3905 }
3906
3907 /*
3908 * First check if we have something loaded a this offset
3909 */
3910 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
3911 if (blob != NULL) {
3912 /* If this is for dyld_sim revalidate the blob */
3913 if (cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
3914 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
3915 if (error) {
3916 blob = NULL;
3917 if (error != EAGAIN) {
3918 vnode_put(vp);
3919 goto outdrop;
3920 }
3921 }
3922 }
3923 }
3924
3925 if (blob == NULL) {
3926 /*
3927 * An arbitrary limit, to prevent someone from mapping in a 20GB blob. This should cover
3928 * our use cases for the immediate future, but note that at the time of this commit, some
3929 * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
3930 *
3931 * We should consider how we can manage this more effectively; the above means that some
3932 * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
3933 * threshold considered ridiculous at the time of this change.
3934 */
3935 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
3936 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
3937 error = E2BIG;
3938 vnode_put(vp);
3939 goto outdrop;
3940 }
3941
3942 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
3943 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
3944 if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
3945 error = ENOMEM;
3946 vnode_put(vp);
3947 goto outdrop;
3948 }
3949
3950 if (cmd == F_ADDSIGS) {
3951 error = copyin(fs.fs_blob_start,
3952 (void *) kernel_blob_addr,
3953 fs.fs_blob_size);
3954 } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
3955 int resid;
3956
3957 error = vn_rdwr(UIO_READ,
3958 vp,
3959 (caddr_t) kernel_blob_addr,
3960 (int)kernel_blob_size,
3961 fs.fs_file_start + fs.fs_blob_start,
3962 UIO_SYSSPACE,
3963 0,
3964 kauth_cred_get(),
3965 &resid,
3966 p);
3967 if ((error == 0) && resid) {
3968 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
3969 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
3970 }
3971 }
3972
3973 if (error) {
3974 ubc_cs_blob_deallocate(kernel_blob_addr,
3975 kernel_blob_size);
3976 vnode_put(vp);
3977 goto outdrop;
3978 }
3979
3980 blob = NULL;
3981 error = ubc_cs_blob_add(vp,
3982 proc_platform(p),
3983 CPU_TYPE_ANY, /* not for a specific architecture */
3984 CPU_SUBTYPE_ANY,
3985 fs.fs_file_start,
3986 &kernel_blob_addr,
3987 kernel_blob_size,
3988 NULL,
3989 blob_add_flags,
3990 &blob);
3991
3992 /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
3993 if (error) {
3994 if (kernel_blob_addr) {
3995 ubc_cs_blob_deallocate(kernel_blob_addr,
3996 kernel_blob_size);
3997 }
3998 vnode_put(vp);
3999 goto outdrop;
4000 } else {
4001 #if CHECK_CS_VALIDATION_BITMAP
4002 ubc_cs_validation_bitmap_allocate( vp );
4003 #endif
4004 }
4005 }
4006
4007 if (cmd == F_ADDFILESIGS_RETURN || cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
4008 cmd == F_ADDFILESIGS_INFO) {
4009 /*
4010 * The first element of the structure is a
4011 * off_t that happen to have the same size for
4012 * all archs. Lets overwrite that.
4013 */
4014 off_t end_offset = 0;
4015 if (blob) {
4016 end_offset = blob->csb_end_offset;
4017 }
4018 error = copyout(&end_offset, argp, sizeof(end_offset));
4019
4020 if (error) {
4021 vnode_put(vp);
4022 goto outdrop;
4023 }
4024 }
4025
4026 if (cmd == F_ADDFILESIGS_INFO) {
4027 /* Return information. What we copy out depends on the size of the
4028 * passed in structure, to keep binary compatibility. */
4029
4030 if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
4031 // enough room for fs_cdhash[20]+fs_hash_type
4032
4033 if (blob != NULL) {
4034 error = copyout(blob->csb_cdhash,
4035 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
4036 USER_FSIGNATURES_CDHASH_LEN);
4037 if (error) {
4038 vnode_put(vp);
4039 goto outdrop;
4040 }
4041 int hashtype = cs_hash_type(blob->csb_hashtype);
4042 error = copyout(&hashtype,
4043 (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
4044 sizeof(int));
4045 if (error) {
4046 vnode_put(vp);
4047 goto outdrop;
4048 }
4049 }
4050 }
4051 }
4052
4053 (void) vnode_put(vp);
4054 break;
4055 }
4056 #if CONFIG_SUPPLEMENTAL_SIGNATURES
4057 case F_ADDFILESUPPL:
4058 {
4059 struct vnode *ivp;
4060 struct cs_blob *blob = NULL;
4061 struct user_fsupplement fs;
4062 int orig_fd;
4063 struct fileproc* orig_fp = NULL;
4064 kern_return_t kr;
4065 vm_offset_t kernel_blob_addr;
4066 vm_size_t kernel_blob_size;
4067
4068 if (!IS_64BIT_PROCESS(p)) {
4069 error = EINVAL;
4070 goto out; // drop fp and unlock fds
4071 }
4072
4073 if (fp->f_type != DTYPE_VNODE) {
4074 error = EBADF;
4075 goto out;
4076 }
4077
4078 error = copyin(argp, &fs, sizeof(fs));
4079 if (error) {
4080 goto out;
4081 }
4082
4083 orig_fd = fs.fs_orig_fd;
4084 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
4085 printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
4086 goto out;
4087 }
4088
4089 if (orig_fp->f_type != DTYPE_VNODE) {
4090 error = EBADF;
4091 fp_drop(p, orig_fd, orig_fp, 1);
4092 goto out;
4093 }
4094
4095 ivp = (struct vnode *)fp_get_data(orig_fp);
4096
4097 vp = (struct vnode *)fp_get_data(fp);
4098
4099 proc_fdunlock(p);
4100
4101 error = vnode_getwithref(ivp);
4102 if (error) {
4103 fp_drop(p, orig_fd, orig_fp, 0);
4104 goto outdrop; //drop fp
4105 }
4106
4107 error = vnode_getwithref(vp);
4108 if (error) {
4109 vnode_put(ivp);
4110 fp_drop(p, orig_fd, orig_fp, 0);
4111 goto outdrop;
4112 }
4113
4114 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
4115 error = E2BIG;
4116 goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
4117 }
4118
4119 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
4120 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
4121 if (kr != KERN_SUCCESS) {
4122 error = ENOMEM;
4123 goto dropboth;
4124 }
4125
4126 int resid;
4127 error = vn_rdwr(UIO_READ, vp,
4128 (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
4129 fs.fs_file_start + fs.fs_blob_start,
4130 UIO_SYSSPACE, 0,
4131 kauth_cred_get(), &resid, p);
4132 if ((error == 0) && resid) {
4133 /* kernel_blob_size rounded to a page size, but signature may be at end of file */
4134 memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
4135 }
4136
4137 if (error) {
4138 ubc_cs_blob_deallocate(kernel_blob_addr,
4139 kernel_blob_size);
4140 goto dropboth;
4141 }
4142
4143 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
4144 &kernel_blob_addr, kernel_blob_size, &blob);
4145
4146 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
4147 if (error) {
4148 if (kernel_blob_addr) {
4149 ubc_cs_blob_deallocate(kernel_blob_addr,
4150 kernel_blob_size);
4151 }
4152 goto dropboth;
4153 }
4154 vnode_put(ivp);
4155 vnode_put(vp);
4156 fp_drop(p, orig_fd, orig_fp, 0);
4157 break;
4158
4159 dropboth:
4160 vnode_put(ivp);
4161 vnode_put(vp);
4162 fp_drop(p, orig_fd, orig_fp, 0);
4163 goto outdrop;
4164 }
4165 #endif
4166 case F_GETCODEDIR:
4167 case F_FINDSIGS: {
4168 error = ENOTSUP;
4169 goto out;
4170 }
4171 case F_CHECK_LV: {
4172 struct fileglob *fg;
4173 fchecklv_t lv = {};
4174
4175 if (fp->f_type != DTYPE_VNODE) {
4176 error = EBADF;
4177 goto out;
4178 }
4179 fg = fp->fp_glob;
4180 proc_fdunlock(p);
4181
4182 if (IS_64BIT_PROCESS(p)) {
4183 error = copyin(argp, &lv, sizeof(lv));
4184 } else {
4185 struct user32_fchecklv lv32 = {};
4186
4187 error = copyin(argp, &lv32, sizeof(lv32));
4188 lv.lv_file_start = lv32.lv_file_start;
4189 lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
4190 lv.lv_error_message_size = lv32.lv_error_message_size;
4191 }
4192 if (error) {
4193 goto outdrop;
4194 }
4195
4196 #if CONFIG_MACF
4197 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
4198 (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
4199 #endif
4200
4201 break;
4202 }
4203 case F_GETSIGSINFO: {
4204 struct cs_blob *blob = NULL;
4205 fgetsigsinfo_t sigsinfo = {};
4206
4207 if (fp->f_type != DTYPE_VNODE) {
4208 error = EBADF;
4209 goto out;
4210 }
4211 vp = (struct vnode *)fp_get_data(fp);
4212 proc_fdunlock(p);
4213
4214 error = vnode_getwithref(vp);
4215 if (error) {
4216 goto outdrop;
4217 }
4218
4219 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
4220 if (error) {
4221 vnode_put(vp);
4222 goto outdrop;
4223 }
4224
4225 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
4226 if (blob == NULL) {
4227 error = ENOENT;
4228 vnode_put(vp);
4229 goto outdrop;
4230 }
4231 switch (sigsinfo.fg_info_request) {
4232 case GETSIGSINFO_PLATFORM_BINARY:
4233 sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
4234 error = copyout(&sigsinfo.fg_sig_is_platform,
4235 (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
4236 sizeof(sigsinfo.fg_sig_is_platform));
4237 if (error) {
4238 vnode_put(vp);
4239 goto outdrop;
4240 }
4241 break;
4242 default:
4243 error = EINVAL;
4244 vnode_put(vp);
4245 goto outdrop;
4246 }
4247 vnode_put(vp);
4248 break;
4249 }
4250 #if CONFIG_PROTECT
4251 case F_GETPROTECTIONCLASS: {
4252 if (fp->f_type != DTYPE_VNODE) {
4253 error = EBADF;
4254 goto out;
4255 }
4256 vp = (struct vnode *)fp_get_data(fp);
4257
4258 proc_fdunlock(p);
4259
4260 if (vnode_getwithref(vp)) {
4261 error = ENOENT;
4262 goto outdrop;
4263 }
4264
4265 struct vnode_attr va;
4266
4267 VATTR_INIT(&va);
4268 VATTR_WANTED(&va, va_dataprotect_class);
4269 error = VNOP_GETATTR(vp, &va, &context);
4270 if (!error) {
4271 if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
4272 *retval = va.va_dataprotect_class;
4273 } else {
4274 error = ENOTSUP;
4275 }
4276 }
4277
4278 vnode_put(vp);
4279 break;
4280 }
4281
4282 case F_SETPROTECTIONCLASS: {
4283 /* tmp must be a valid PROTECTION_CLASS_* */
4284 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4285
4286 if (fp->f_type != DTYPE_VNODE) {
4287 error = EBADF;
4288 goto out;
4289 }
4290 vp = (struct vnode *)fp_get_data(fp);
4291
4292 proc_fdunlock(p);
4293
4294 if (vnode_getwithref(vp)) {
4295 error = ENOENT;
4296 goto outdrop;
4297 }
4298
4299 /* Only go forward if you have write access */
4300 vfs_context_t ctx = vfs_context_current();
4301 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4302 vnode_put(vp);
4303 error = EBADF;
4304 goto outdrop;
4305 }
4306
4307 struct vnode_attr va;
4308
4309 VATTR_INIT(&va);
4310 VATTR_SET(&va, va_dataprotect_class, tmp);
4311
4312 error = VNOP_SETATTR(vp, &va, ctx);
4313
4314 vnode_put(vp);
4315 break;
4316 }
4317
4318 case F_TRANSCODEKEY: {
4319 if (fp->f_type != DTYPE_VNODE) {
4320 error = EBADF;
4321 goto out;
4322 }
4323
4324 vp = (struct vnode *)fp_get_data(fp);
4325 proc_fdunlock(p);
4326
4327 if (vnode_getwithref(vp)) {
4328 error = ENOENT;
4329 goto outdrop;
4330 }
4331
4332 cp_key_t k = {
4333 .len = CP_MAX_WRAPPEDKEYSIZE,
4334 };
4335
4336 k.key = kalloc_data(CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
4337 if (k.key == NULL) {
4338 error = ENOMEM;
4339 } else {
4340 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
4341 }
4342
4343 vnode_put(vp);
4344
4345 if (error == 0) {
4346 error = copyout(k.key, argp, k.len);
4347 *retval = k.len;
4348 }
4349 kfree_data(k.key, CP_MAX_WRAPPEDKEYSIZE);
4350
4351 break;
4352 }
4353
4354 case F_GETPROTECTIONLEVEL: {
4355 if (fp->f_type != DTYPE_VNODE) {
4356 error = EBADF;
4357 goto out;
4358 }
4359
4360 vp = (struct vnode*)fp_get_data(fp);
4361 proc_fdunlock(p);
4362
4363 if (vnode_getwithref(vp)) {
4364 error = ENOENT;
4365 goto outdrop;
4366 }
4367
4368 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
4369
4370 vnode_put(vp);
4371 break;
4372 }
4373
4374 case F_GETDEFAULTPROTLEVEL: {
4375 if (fp->f_type != DTYPE_VNODE) {
4376 error = EBADF;
4377 goto out;
4378 }
4379
4380 vp = (struct vnode*)fp_get_data(fp);
4381 proc_fdunlock(p);
4382
4383 if (vnode_getwithref(vp)) {
4384 error = ENOENT;
4385 goto outdrop;
4386 }
4387
4388 /*
4389 * if cp_get_major_vers fails, error will be set to proper errno
4390 * and cp_version will still be 0.
4391 */
4392
4393 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
4394
4395 vnode_put(vp);
4396 break;
4397 }
4398
4399 #endif /* CONFIG_PROTECT */
4400
4401 case F_MOVEDATAEXTENTS: {
4402 struct fileproc *fp2 = NULL;
4403 struct vnode *src_vp = NULLVP;
4404 struct vnode *dst_vp = NULLVP;
4405 /* We need to grab the 2nd FD out of the arguments before moving on. */
4406 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
4407
4408 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
4409 if (error) {
4410 goto out;
4411 }
4412
4413 if (fp->f_type != DTYPE_VNODE) {
4414 error = EBADF;
4415 goto out;
4416 }
4417
4418 /*
4419 * For now, special case HFS+ and APFS only, since this
4420 * is SPI.
4421 */
4422 src_vp = (struct vnode *)fp_get_data(fp);
4423 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
4424 error = ENOTSUP;
4425 goto out;
4426 }
4427
4428 /*
4429 * Get the references before we start acquiring iocounts on the vnodes,
4430 * while we still hold the proc fd lock
4431 */
4432 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4433 error = EBADF;
4434 goto out;
4435 }
4436 if (fp2->f_type != DTYPE_VNODE) {
4437 fp_drop(p, fd2, fp2, 1);
4438 error = EBADF;
4439 goto out;
4440 }
4441 dst_vp = (struct vnode *)fp_get_data(fp2);
4442 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
4443 fp_drop(p, fd2, fp2, 1);
4444 error = ENOTSUP;
4445 goto out;
4446 }
4447
4448 #if CONFIG_MACF
4449 /* Re-do MAC checks against the new FD, pass in a fake argument */
4450 error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4451 if (error) {
4452 fp_drop(p, fd2, fp2, 1);
4453 goto out;
4454 }
4455 #endif
4456 /* Audit the 2nd FD */
4457 AUDIT_ARG(fd, fd2);
4458
4459 proc_fdunlock(p);
4460
4461 if (vnode_getwithref(src_vp)) {
4462 fp_drop(p, fd2, fp2, 0);
4463 error = ENOENT;
4464 goto outdrop;
4465 }
4466 if (vnode_getwithref(dst_vp)) {
4467 vnode_put(src_vp);
4468 fp_drop(p, fd2, fp2, 0);
4469 error = ENOENT;
4470 goto outdrop;
4471 }
4472
4473 /*
4474 * Basic asserts; validate they are not the same and that
4475 * both live on the same filesystem.
4476 */
4477 if (dst_vp == src_vp) {
4478 vnode_put(src_vp);
4479 vnode_put(dst_vp);
4480 fp_drop(p, fd2, fp2, 0);
4481 error = EINVAL;
4482 goto outdrop;
4483 }
4484
4485 if (dst_vp->v_mount != src_vp->v_mount) {
4486 vnode_put(src_vp);
4487 vnode_put(dst_vp);
4488 fp_drop(p, fd2, fp2, 0);
4489 error = EXDEV;
4490 goto outdrop;
4491 }
4492
4493 /* Now we have a legit pair of FDs. Go to work */
4494
4495 /* Now check for write access to the target files */
4496 if (vnode_authorize(src_vp, NULLVP,
4497 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4498 vnode_put(src_vp);
4499 vnode_put(dst_vp);
4500 fp_drop(p, fd2, fp2, 0);
4501 error = EBADF;
4502 goto outdrop;
4503 }
4504
4505 if (vnode_authorize(dst_vp, NULLVP,
4506 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4507 vnode_put(src_vp);
4508 vnode_put(dst_vp);
4509 fp_drop(p, fd2, fp2, 0);
4510 error = EBADF;
4511 goto outdrop;
4512 }
4513
4514 /* Verify that both vps point to files and not directories */
4515 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4516 error = EINVAL;
4517 vnode_put(src_vp);
4518 vnode_put(dst_vp);
4519 fp_drop(p, fd2, fp2, 0);
4520 goto outdrop;
4521 }
4522
4523 /*
4524 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
4525 * We'll pass in our special bit indicating that the new behavior is expected
4526 */
4527
4528 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
4529
4530 vnode_put(src_vp);
4531 vnode_put(dst_vp);
4532 fp_drop(p, fd2, fp2, 0);
4533 break;
4534 }
4535
4536 case F_TRANSFEREXTENTS: {
4537 struct fileproc *fp2 = NULL;
4538 struct vnode *src_vp = NULLVP;
4539 struct vnode *dst_vp = NULLVP;
4540
4541 /* Get 2nd FD out of the arguments. */
4542 int fd2 = CAST_DOWN_EXPLICIT(int, uap->arg);
4543 if (fd2 < 0) {
4544 error = EINVAL;
4545 goto out;
4546 }
4547
4548 if (fp->f_type != DTYPE_VNODE) {
4549 error = EBADF;
4550 goto out;
4551 }
4552
4553 /*
4554 * Only allow this for APFS
4555 */
4556 src_vp = (struct vnode *)fp_get_data(fp);
4557 if (src_vp->v_tag != VT_APFS) {
4558 error = ENOTSUP;
4559 goto out;
4560 }
4561
4562 /*
4563 * Get the references before we start acquiring iocounts on the vnodes,
4564 * while we still hold the proc fd lock
4565 */
4566 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
4567 error = EBADF;
4568 goto out;
4569 }
4570 if (fp2->f_type != DTYPE_VNODE) {
4571 fp_drop(p, fd2, fp2, 1);
4572 error = EBADF;
4573 goto out;
4574 }
4575 dst_vp = (struct vnode *)fp_get_data(fp2);
4576 if (dst_vp->v_tag != VT_APFS) {
4577 fp_drop(p, fd2, fp2, 1);
4578 error = ENOTSUP;
4579 goto out;
4580 }
4581
4582 #if CONFIG_MACF
4583 /* Re-do MAC checks against the new FD, pass in a fake argument */
4584 error = mac_file_check_fcntl(kauth_cred_get(), fp2->fp_glob, cmd, 0);
4585 if (error) {
4586 fp_drop(p, fd2, fp2, 1);
4587 goto out;
4588 }
4589 #endif
4590 /* Audit the 2nd FD */
4591 AUDIT_ARG(fd, fd2);
4592
4593 proc_fdunlock(p);
4594
4595 if (vnode_getwithref(src_vp)) {
4596 fp_drop(p, fd2, fp2, 0);
4597 error = ENOENT;
4598 goto outdrop;
4599 }
4600 if (vnode_getwithref(dst_vp)) {
4601 vnode_put(src_vp);
4602 fp_drop(p, fd2, fp2, 0);
4603 error = ENOENT;
4604 goto outdrop;
4605 }
4606
4607 /*
4608 * Validate they are not the same and that
4609 * both live on the same filesystem.
4610 */
4611 if (dst_vp == src_vp) {
4612 vnode_put(src_vp);
4613 vnode_put(dst_vp);
4614 fp_drop(p, fd2, fp2, 0);
4615 error = EINVAL;
4616 goto outdrop;
4617 }
4618 if (dst_vp->v_mount != src_vp->v_mount) {
4619 vnode_put(src_vp);
4620 vnode_put(dst_vp);
4621 fp_drop(p, fd2, fp2, 0);
4622 error = EXDEV;
4623 goto outdrop;
4624 }
4625
4626 /* Verify that both vps point to files and not directories */
4627 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
4628 error = EINVAL;
4629 vnode_put(src_vp);
4630 vnode_put(dst_vp);
4631 fp_drop(p, fd2, fp2, 0);
4632 goto outdrop;
4633 }
4634
4635
4636 /*
4637 * Okay, vps are legit. Check access. We'll require write access
4638 * to both files.
4639 */
4640 if (vnode_authorize(src_vp, NULLVP,
4641 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4642 vnode_put(src_vp);
4643 vnode_put(dst_vp);
4644 fp_drop(p, fd2, fp2, 0);
4645 error = EBADF;
4646 goto outdrop;
4647 }
4648 if (vnode_authorize(dst_vp, NULLVP,
4649 (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
4650 vnode_put(src_vp);
4651 vnode_put(dst_vp);
4652 fp_drop(p, fd2, fp2, 0);
4653 error = EBADF;
4654 goto outdrop;
4655 }
4656
4657 /* Pass it on through to the fs */
4658 error = VNOP_IOCTL(src_vp, cmd, (caddr_t)dst_vp, 0, &context);
4659
4660 vnode_put(src_vp);
4661 vnode_put(dst_vp);
4662 fp_drop(p, fd2, fp2, 0);
4663 break;
4664 }
4665
4666 /*
4667 * SPI for making a file compressed.
4668 */
4669 case F_MAKECOMPRESSED: {
4670 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
4671
4672 if (fp->f_type != DTYPE_VNODE) {
4673 error = EBADF;
4674 goto out;
4675 }
4676
4677 vp = (struct vnode*)fp_get_data(fp);
4678 proc_fdunlock(p);
4679
4680 /* get the vnode */
4681 if (vnode_getwithref(vp)) {
4682 error = ENOENT;
4683 goto outdrop;
4684 }
4685
4686 /* Is it a file? */
4687 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
4688 vnode_put(vp);
4689 error = EBADF;
4690 goto outdrop;
4691 }
4692
4693 /* invoke ioctl to pass off to FS */
4694 /* Only go forward if you have write access */
4695 vfs_context_t ctx = vfs_context_current();
4696 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4697 vnode_put(vp);
4698 error = EBADF;
4699 goto outdrop;
4700 }
4701
4702 error = VNOP_IOCTL(vp, cmd, (caddr_t)&gcounter, 0, &context);
4703
4704 vnode_put(vp);
4705 break;
4706 }
4707
4708 /*
4709 * SPI (private) for indicating to a filesystem that subsequent writes to
4710 * the open FD will written to the Fastflow.
4711 */
4712 case F_SET_GREEDY_MODE:
4713 /* intentionally drop through to the same handler as F_SETSTATIC.
4714 * both fcntls should pass the argument and their selector into VNOP_IOCTL.
4715 */
4716
4717 /*
4718 * SPI (private) for indicating to a filesystem that subsequent writes to
4719 * the open FD will represent static content.
4720 */
4721 case F_SETSTATICCONTENT: {
4722 caddr_t ioctl_arg = NULL;
4723
4724 if (uap->arg) {
4725 ioctl_arg = (caddr_t) 1;
4726 }
4727
4728 if (fp->f_type != DTYPE_VNODE) {
4729 error = EBADF;
4730 goto out;
4731 }
4732 vp = (struct vnode *)fp_get_data(fp);
4733 proc_fdunlock(p);
4734
4735 error = vnode_getwithref(vp);
4736 if (error) {
4737 error = ENOENT;
4738 goto outdrop;
4739 }
4740
4741 /* Only go forward if you have write access */
4742 vfs_context_t ctx = vfs_context_current();
4743 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4744 vnode_put(vp);
4745 error = EBADF;
4746 goto outdrop;
4747 }
4748
4749 error = VNOP_IOCTL(vp, cmd, ioctl_arg, 0, &context);
4750 (void)vnode_put(vp);
4751
4752 break;
4753 }
4754
4755 /*
4756 * SPI (private) for indicating to the lower level storage driver that the
4757 * subsequent writes should be of a particular IO type (burst, greedy, static),
4758 * or other flavors that may be necessary.
4759 */
4760 case F_SETIOTYPE: {
4761 caddr_t param_ptr;
4762 uint32_t param;
4763
4764 if (uap->arg) {
4765 /* extract 32 bits of flags from userland */
4766 param_ptr = (caddr_t) uap->arg;
4767 param = (uint32_t) param_ptr;
4768 } else {
4769 /* If no argument is specified, error out */
4770 error = EINVAL;
4771 goto out;
4772 }
4773
4774 /*
4775 * Validate the different types of flags that can be specified:
4776 * all of them are mutually exclusive for now.
4777 */
4778 switch (param) {
4779 case F_IOTYPE_ISOCHRONOUS:
4780 break;
4781
4782 default:
4783 error = EINVAL;
4784 goto out;
4785 }
4786
4787
4788 if (fp->f_type != DTYPE_VNODE) {
4789 error = EBADF;
4790 goto out;
4791 }
4792 vp = (struct vnode *)fp_get_data(fp);
4793 proc_fdunlock(p);
4794
4795 error = vnode_getwithref(vp);
4796 if (error) {
4797 error = ENOENT;
4798 goto outdrop;
4799 }
4800
4801 /* Only go forward if you have write access */
4802 vfs_context_t ctx = vfs_context_current();
4803 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4804 vnode_put(vp);
4805 error = EBADF;
4806 goto outdrop;
4807 }
4808
4809 error = VNOP_IOCTL(vp, cmd, param_ptr, 0, &context);
4810 (void)vnode_put(vp);
4811
4812 break;
4813 }
4814
4815 /*
4816 * Set the vnode pointed to by 'fd'
4817 * and tag it as the (potentially future) backing store
4818 * for another filesystem
4819 */
4820 case F_SETBACKINGSTORE: {
4821 if (fp->f_type != DTYPE_VNODE) {
4822 error = EBADF;
4823 goto out;
4824 }
4825
4826 vp = (struct vnode *)fp_get_data(fp);
4827
4828 if (vp->v_tag != VT_HFS) {
4829 error = EINVAL;
4830 goto out;
4831 }
4832 proc_fdunlock(p);
4833
4834 if (vnode_getwithref(vp)) {
4835 error = ENOENT;
4836 goto outdrop;
4837 }
4838
4839 /* only proceed if you have write access */
4840 vfs_context_t ctx = vfs_context_current();
4841 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
4842 vnode_put(vp);
4843 error = EBADF;
4844 goto outdrop;
4845 }
4846
4847
4848 /* If arg != 0, set, otherwise unset */
4849 if (uap->arg) {
4850 error = VNOP_IOCTL(vp, cmd, (caddr_t)1, 0, &context);
4851 } else {
4852 error = VNOP_IOCTL(vp, cmd, (caddr_t)NULL, 0, &context);
4853 }
4854
4855 vnode_put(vp);
4856 break;
4857 }
4858
4859 /*
4860 * like F_GETPATH, but special semantics for
4861 * the mobile time machine handler.
4862 */
4863 case F_GETPATH_MTMINFO: {
4864 char *pathbufp;
4865 int pathlen;
4866
4867 if (fp->f_type != DTYPE_VNODE) {
4868 error = EBADF;
4869 goto out;
4870 }
4871 vp = (struct vnode *)fp_get_data(fp);
4872 proc_fdunlock(p);
4873
4874 pathlen = MAXPATHLEN;
4875 pathbufp = zalloc(ZV_NAMEI);
4876
4877 if ((error = vnode_getwithref(vp)) == 0) {
4878 int backingstore = 0;
4879
4880 /* Check for error from vn_getpath before moving on */
4881 if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
4882 if (vp->v_tag == VT_HFS) {
4883 error = VNOP_IOCTL(vp, cmd, (caddr_t) &backingstore, 0, &context);
4884 }
4885 (void)vnode_put(vp);
4886
4887 if (error == 0) {
4888 error = copyout((caddr_t)pathbufp, argp, pathlen);
4889 }
4890 if (error == 0) {
4891 /*
4892 * If the copyout was successful, now check to ensure
4893 * that this vnode is not a BACKINGSTORE vnode. mtmd
4894 * wants the path regardless.
4895 */
4896 if (backingstore) {
4897 error = EBUSY;
4898 }
4899 }
4900 } else {
4901 (void)vnode_put(vp);
4902 }
4903 }
4904
4905 zfree(ZV_NAMEI, pathbufp);
4906 goto outdrop;
4907 }
4908
4909 case F_RECYCLE: {
4910 #if !DEBUG && !DEVELOPMENT
4911 bool allowed = false;
4912
4913 //
4914 // non-debug and non-development kernels have restrictions
4915 // on who can all this fcntl. the process has to be marked
4916 // with the dataless-manipulator entitlement and either the
4917 // process or thread have to be marked rapid-aging.
4918 //
4919 if (!vfs_context_is_dataless_manipulator(&context)) {
4920 error = EPERM;
4921 goto out;
4922 }
4923
4924 proc_t proc = vfs_context_proc(&context);
4925 if (proc && (proc->p_lflag & P_LRAGE_VNODES)) {
4926 allowed = true;
4927 } else {
4928 thread_t thr = vfs_context_thread(&context);
4929 if (thr) {
4930 struct uthread *ut = get_bsdthread_info(thr);
4931
4932 if (ut && (ut->uu_flag & UT_RAGE_VNODES)) {
4933 allowed = true;
4934 }
4935 }
4936 }
4937 if (!allowed) {
4938 error = EPERM;
4939 goto out;
4940 }
4941 #endif
4942
4943 if (fp->f_type != DTYPE_VNODE) {
4944 error = EBADF;
4945 goto out;
4946 }
4947 vp = (struct vnode *)fp_get_data(fp);
4948 proc_fdunlock(p);
4949
4950 vnode_recycle(vp);
4951 break;
4952 }
4953
4954 #if CONFIG_FILE_LEASES
4955 case F_SETLEASE: {
4956 struct fileglob *fg;
4957 int fl_type;
4958 int expcounts;
4959
4960 if (fp->f_type != DTYPE_VNODE) {
4961 error = EBADF;
4962 goto out;
4963 }
4964 vp = (struct vnode *)fp_get_data(fp);
4965 fg = fp->fp_glob;;
4966 proc_fdunlock(p);
4967
4968 /*
4969 * In order to allow a process to avoid breaking
4970 * its own leases, the expected open count needs
4971 * to be provided to F_SETLEASE when placing write lease.
4972 * Similarly, in order to allow a process to place a read lease
4973 * after opening the file multiple times in RW mode, the expected
4974 * write count needs to be provided to F_SETLEASE when placing a
4975 * read lease.
4976 *
4977 * We use the upper 30 bits of the integer argument (way more than
4978 * enough) as the expected open/write count.
4979 *
4980 * If the caller passed 0 for the expected open count,
4981 * assume 1.
4982 */
4983 fl_type = CAST_DOWN_EXPLICIT(int, uap->arg);
4984 expcounts = (unsigned int)fl_type >> 2;
4985 fl_type &= 3;
4986
4987 if (fl_type == F_WRLCK && expcounts == 0) {
4988 expcounts = 1;
4989 }
4990
4991 AUDIT_ARG(value32, fl_type);
4992
4993 if ((error = vnode_getwithref(vp))) {
4994 goto outdrop;
4995 }
4996
4997 /*
4998 * Only support for regular file/dir mounted on local-based filesystem.
4999 */
5000 if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
5001 !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
5002 error = EBADF;
5003 vnode_put(vp);
5004 goto outdrop;
5005 }
5006
5007 /* For directory, we only support read lease. */
5008 if (vnode_vtype(vp) == VDIR && fl_type == F_WRLCK) {
5009 error = ENOTSUP;
5010 vnode_put(vp);
5011 goto outdrop;
5012 }
5013
5014 switch (fl_type) {
5015 case F_RDLCK:
5016 case F_WRLCK:
5017 case F_UNLCK:
5018 error = vnode_setlease(vp, fg, fl_type, expcounts,
5019 vfs_context_current());
5020 break;
5021 default:
5022 error = EINVAL;
5023 break;
5024 }
5025
5026 vnode_put(vp);
5027 goto outdrop;
5028 }
5029
5030 case F_GETLEASE: {
5031 if (fp->f_type != DTYPE_VNODE) {
5032 error = EBADF;
5033 goto out;
5034 }
5035 vp = (struct vnode *)fp_get_data(fp);
5036 proc_fdunlock(p);
5037
5038 if ((error = vnode_getwithref(vp))) {
5039 goto outdrop;
5040 }
5041
5042 if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VDIR) ||
5043 !(vfs_flags(vnode_mount(vp)) & MNT_LOCAL)) {
5044 error = EBADF;
5045 vnode_put(vp);
5046 goto outdrop;
5047 }
5048
5049 error = 0;
5050 *retval = vnode_getlease(vp);
5051 vnode_put(vp);
5052 goto outdrop;
5053 }
5054 #endif /* CONFIG_FILE_LEASES */
5055
5056 /* SPI (private) for asserting background access to a file */
5057 case F_ASSERT_BG_ACCESS:
5058 /* SPI (private) for releasing background access to a file */
5059 case F_RELEASE_BG_ACCESS: {
5060 /*
5061 * Check if the process is platform code, which means
5062 * that it is considered part of the Operating System.
5063 */
5064 if (!csproc_get_platform_binary(p)) {
5065 error = EPERM;
5066 goto out;
5067 }
5068
5069 if (fp->f_type != DTYPE_VNODE) {
5070 error = EBADF;
5071 goto out;
5072 }
5073
5074 vp = (struct vnode *)fp_get_data(fp);
5075 proc_fdunlock(p);
5076
5077 if (vnode_getwithref(vp)) {
5078 error = ENOENT;
5079 goto outdrop;
5080 }
5081
5082 /* Verify that vp points to a file and not a directory */
5083 if (!vnode_isreg(vp)) {
5084 vnode_put(vp);
5085 error = EINVAL;
5086 goto outdrop;
5087 }
5088
5089 /* Only proceed if you have write access */
5090 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
5091 vnode_put(vp);
5092 error = EBADF;
5093 goto outdrop;
5094 }
5095
5096 if (cmd == F_ASSERT_BG_ACCESS) {
5097 fassertbgaccess_t args;
5098
5099 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
5100 vnode_put(vp);
5101 goto outdrop;
5102 }
5103
5104 error = VNOP_IOCTL(vp, F_ASSERT_BG_ACCESS, (caddr_t)&args, 0, &context);
5105 } else {
5106 // cmd == F_RELEASE_BG_ACCESS
5107 error = VNOP_IOCTL(vp, F_RELEASE_BG_ACCESS, (caddr_t)NULL, 0, &context);
5108 }
5109
5110 vnode_put(vp);
5111
5112 goto outdrop;
5113 }
5114
5115 default:
5116 /*
5117 * This is an fcntl() that we d not recognize at this level;
5118 * if this is a vnode, we send it down into the VNOP_IOCTL
5119 * for this vnode; this can include special devices, and will
5120 * effectively overload fcntl() to send ioctl()'s.
5121 */
5122 if ((cmd & IOC_VOID) && (cmd & IOC_INOUT)) {
5123 error = EINVAL;
5124 goto out;
5125 }
5126
5127 /* Catch any now-invalid fcntl() selectors */
5128 switch (cmd) {
5129 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
5130 case (int)FSIOC_FIOSEEKHOLE:
5131 case (int)FSIOC_FIOSEEKDATA:
5132 case (int)FSIOC_CAS_BSDFLAGS:
5133 case (int)FSIOC_AUTH_FS:
5134 case HFS_GET_BOOT_INFO:
5135 case HFS_SET_BOOT_INFO:
5136 case FIOPINSWAP:
5137 case F_MARKDEPENDENCY:
5138 case TIOCREVOKE:
5139 case TIOCREVOKECLEAR:
5140 error = EINVAL;
5141 goto out;
5142 default:
5143 break;
5144 }
5145
5146 if (fp->f_type != DTYPE_VNODE) {
5147 error = EBADF;
5148 goto out;
5149 }
5150 vp = (struct vnode *)fp_get_data(fp);
5151 proc_fdunlock(p);
5152
5153 if ((error = vnode_getwithref(vp)) == 0) {
5154 #define STK_PARAMS 128
5155 char stkbuf[STK_PARAMS] = {0};
5156 unsigned int size;
5157 caddr_t data, memp;
5158 /*
5159 * For this to work properly, we have to copy in the
5160 * ioctl() cmd argument if there is one; we must also
5161 * check that a command parameter, if present, does
5162 * not exceed the maximum command length dictated by
5163 * the number of bits we have available in the command
5164 * to represent a structure length. Finally, we have
5165 * to copy the results back out, if it is that type of
5166 * ioctl().
5167 */
5168 size = IOCPARM_LEN(cmd);
5169 if (size > IOCPARM_MAX) {
5170 (void)vnode_put(vp);
5171 error = EINVAL;
5172 break;
5173 }
5174
5175 memp = NULL;
5176 if (size > sizeof(stkbuf)) {
5177 memp = (caddr_t)kalloc_data(size, Z_WAITOK);
5178 if (memp == 0) {
5179 (void)vnode_put(vp);
5180 error = ENOMEM;
5181 goto outdrop;
5182 }
5183 data = memp;
5184 } else {
5185 data = &stkbuf[0];
5186 }
5187
5188 if (cmd & IOC_IN) {
5189 if (size) {
5190 /* structure */
5191 error = copyin(argp, data, size);
5192 if (error) {
5193 (void)vnode_put(vp);
5194 if (memp) {
5195 kfree_data(memp, size);
5196 }
5197 goto outdrop;
5198 }
5199
5200 /* Bzero the section beyond that which was needed */
5201 if (size <= sizeof(stkbuf)) {
5202 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
5203 }
5204 } else {
5205 /* int */
5206 if (is64bit) {
5207 *(user_addr_t *)data = argp;
5208 } else {
5209 *(uint32_t *)data = (uint32_t)argp;
5210 }
5211 };
5212 } else if ((cmd & IOC_OUT) && size) {
5213 /*
5214 * Zero the buffer so the user always
5215 * gets back something deterministic.
5216 */
5217 bzero(data, size);
5218 } else if (cmd & IOC_VOID) {
5219 if (is64bit) {
5220 *(user_addr_t *)data = argp;
5221 } else {
5222 *(uint32_t *)data = (uint32_t)argp;
5223 }
5224 }
5225
5226 error = VNOP_IOCTL(vp, cmd, CAST_DOWN(caddr_t, data), 0, &context);
5227
5228 (void)vnode_put(vp);
5229
5230 /* Copy any output data to user */
5231 if (error == 0 && (cmd & IOC_OUT) && size) {
5232 error = copyout(data, argp, size);
5233 }
5234 if (memp) {
5235 kfree_data(memp, size);
5236 }
5237 }
5238 break;
5239 }
5240
5241 outdrop:
5242 return sys_fcntl_outdrop(p, fd, fp, vp, error);
5243
5244 out:
5245 return sys_fcntl_out(p, fd, fp, error);
5246 }
5247
5248
5249 /*
5250 * sys_close
5251 *
5252 * Description: The implementation of the close(2) system call
5253 *
5254 * Parameters: p Process in whose per process file table
5255 * the close is to occur
5256 * uap->fd fd to be closed
5257 * retval <unused>
5258 *
5259 * Returns: 0 Success
5260 * fp_lookup:EBADF Bad file descriptor
5261 * fp_guard_exception:??? Guarded file descriptor
5262 * close_internal:EBADF
5263 * close_internal:??? Anything returnable by a per-fileops
5264 * close function
5265 */
5266 int
5267 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
5268 {
5269 __pthread_testcancel(1);
5270 return close_nocancel(p, uap->fd);
5271 }
5272
5273 int
5274 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
5275 {
5276 return close_nocancel(p, uap->fd);
5277 }
5278
5279 int
5280 close_nocancel(proc_t p, int fd)
5281 {
5282 struct fileproc *fp;
5283
5284 AUDIT_SYSCLOSE(p, fd);
5285
5286 proc_fdlock(p);
5287 if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
5288 proc_fdunlock(p);
5289 return EBADF;
5290 }
5291
5292 if (fp_isguarded(fp, GUARD_CLOSE)) {
5293 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
5294 proc_fdunlock(p);
5295 return error;
5296 }
5297
5298 return fp_close_and_unlock(p, fd, fp, 0);
5299 }
5300
5301
5302 /*
5303 * fstat
5304 *
5305 * Description: Return status information about a file descriptor.
5306 *
5307 * Parameters: p The process doing the fstat
5308 * fd The fd to stat
5309 * ub The user stat buffer
5310 * xsecurity The user extended security
5311 * buffer, or 0 if none
5312 * xsecurity_size The size of xsecurity, or 0
5313 * if no xsecurity
5314 * isstat64 Flag to indicate 64 bit version
5315 * for inode size, etc.
5316 *
5317 * Returns: 0 Success
5318 * EBADF
5319 * EFAULT
5320 * fp_lookup:EBADF Bad file descriptor
5321 * vnode_getwithref:???
5322 * copyout:EFAULT
5323 * vnode_getwithref:???
5324 * vn_stat:???
5325 * soo_stat:???
5326 * pipe_stat:???
5327 * pshm_stat:???
5328 * kqueue_stat:???
5329 *
5330 * Notes: Internal implementation for all other fstat() related
5331 * functions
5332 *
5333 * XXX switch on node type is bogus; need a stat in struct
5334 * XXX fileops instead.
5335 */
5336 static int
5337 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity,
5338 user_addr_t xsecurity_size, int isstat64)
5339 {
5340 struct fileproc *fp;
5341 union {
5342 struct stat sb;
5343 struct stat64 sb64;
5344 } source;
5345 union {
5346 struct user64_stat user64_sb;
5347 struct user32_stat user32_sb;
5348 struct user64_stat64 user64_sb64;
5349 struct user32_stat64 user32_sb64;
5350 } dest;
5351 int error, my_size;
5352 file_type_t type;
5353 caddr_t data;
5354 kauth_filesec_t fsec;
5355 user_size_t xsecurity_bufsize;
5356 vfs_context_t ctx = vfs_context_current();
5357 void * sbptr;
5358
5359
5360 AUDIT_ARG(fd, fd);
5361
5362 if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
5363 return error;
5364 }
5365 type = fp->f_type;
5366 data = (caddr_t)fp_get_data(fp);
5367 fsec = KAUTH_FILESEC_NONE;
5368
5369 sbptr = (void *)&source;
5370
5371 switch (type) {
5372 case DTYPE_VNODE:
5373 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
5374 /*
5375 * If the caller has the file open, and is not
5376 * requesting extended security information, we are
5377 * going to let them get the basic stat information.
5378 */
5379 if (xsecurity == USER_ADDR_NULL) {
5380 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
5381 fp->fp_glob->fg_cred);
5382 } else {
5383 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
5384 }
5385
5386 AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
5387 (void)vnode_put((vnode_t)data);
5388 }
5389 break;
5390
5391 #if SOCKETS
5392 case DTYPE_SOCKET:
5393 error = soo_stat((struct socket *)data, sbptr, isstat64);
5394 break;
5395 #endif /* SOCKETS */
5396
5397 case DTYPE_PIPE:
5398 error = pipe_stat((void *)data, sbptr, isstat64);
5399 break;
5400
5401 case DTYPE_PSXSHM:
5402 error = pshm_stat((void *)data, sbptr, isstat64);
5403 break;
5404
5405 case DTYPE_KQUEUE:
5406 error = kqueue_stat((void *)data, sbptr, isstat64, p);
5407 break;
5408
5409 default:
5410 error = EBADF;
5411 goto out;
5412 }
5413 if (error == 0) {
5414 caddr_t sbp;
5415
5416 if (isstat64 != 0) {
5417 source.sb64.st_lspare = 0;
5418 source.sb64.st_qspare[0] = 0LL;
5419 source.sb64.st_qspare[1] = 0LL;
5420
5421 if (IS_64BIT_PROCESS(p)) {
5422 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5423 my_size = sizeof(dest.user64_sb64);
5424 sbp = (caddr_t)&dest.user64_sb64;
5425 } else {
5426 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5427 my_size = sizeof(dest.user32_sb64);
5428 sbp = (caddr_t)&dest.user32_sb64;
5429 }
5430 } else {
5431 source.sb.st_lspare = 0;
5432 source.sb.st_qspare[0] = 0LL;
5433 source.sb.st_qspare[1] = 0LL;
5434 if (IS_64BIT_PROCESS(p)) {
5435 munge_user64_stat(&source.sb, &dest.user64_sb);
5436 my_size = sizeof(dest.user64_sb);
5437 sbp = (caddr_t)&dest.user64_sb;
5438 } else {
5439 munge_user32_stat(&source.sb, &dest.user32_sb);
5440 my_size = sizeof(dest.user32_sb);
5441 sbp = (caddr_t)&dest.user32_sb;
5442 }
5443 }
5444
5445 error = copyout(sbp, ub, my_size);
5446 }
5447
5448 /* caller wants extended security information? */
5449 if (xsecurity != USER_ADDR_NULL) {
5450 /* did we get any? */
5451 if (fsec == KAUTH_FILESEC_NONE) {
5452 if (susize(xsecurity_size, 0) != 0) {
5453 error = EFAULT;
5454 goto out;
5455 }
5456 } else {
5457 /* find the user buffer size */
5458 xsecurity_bufsize = fusize(xsecurity_size);
5459
5460 /* copy out the actual data size */
5461 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5462 error = EFAULT;
5463 goto out;
5464 }
5465
5466 /* if the caller supplied enough room, copy out to it */
5467 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
5468 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5469 }
5470 }
5471 }
5472 out:
5473 fp_drop(p, fd, fp, 0);
5474 if (fsec != NULL) {
5475 kauth_filesec_free(fsec);
5476 }
5477 return error;
5478 }
5479
5480
5481 /*
5482 * sys_fstat_extended
5483 *
5484 * Description: Extended version of fstat supporting returning extended
5485 * security information
5486 *
5487 * Parameters: p The process doing the fstat
5488 * uap->fd The fd to stat
5489 * uap->ub The user stat buffer
5490 * uap->xsecurity The user extended security
5491 * buffer, or 0 if none
5492 * uap->xsecurity_size The size of xsecurity, or 0
5493 *
5494 * Returns: 0 Success
5495 * !0 Errno (see fstat)
5496 */
5497 int
5498 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
5499 {
5500 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
5501 }
5502
5503
5504 /*
5505 * sys_fstat
5506 *
5507 * Description: Get file status for the file associated with fd
5508 *
5509 * Parameters: p The process doing the fstat
5510 * uap->fd The fd to stat
5511 * uap->ub The user stat buffer
5512 *
5513 * Returns: 0 Success
5514 * !0 Errno (see fstat)
5515 */
5516 int
5517 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
5518 {
5519 return fstat(p, uap->fd, uap->ub, 0, 0, 0);
5520 }
5521
5522
5523 /*
5524 * sys_fstat64_extended
5525 *
5526 * Description: Extended version of fstat64 supporting returning extended
5527 * security information
5528 *
5529 * Parameters: p The process doing the fstat
5530 * uap->fd The fd to stat
5531 * uap->ub The user stat buffer
5532 * uap->xsecurity The user extended security
5533 * buffer, or 0 if none
5534 * uap->xsecurity_size The size of xsecurity, or 0
5535 *
5536 * Returns: 0 Success
5537 * !0 Errno (see fstat)
5538 */
5539 int
5540 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
5541 {
5542 return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
5543 }
5544
5545
5546 /*
5547 * sys_fstat64
5548 *
5549 * Description: Get 64 bit version of the file status for the file associated
5550 * with fd
5551 *
5552 * Parameters: p The process doing the fstat
5553 * uap->fd The fd to stat
5554 * uap->ub The user stat buffer
5555 *
5556 * Returns: 0 Success
5557 * !0 Errno (see fstat)
5558 */
5559 int
5560 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
5561 {
5562 return fstat(p, uap->fd, uap->ub, 0, 0, 1);
5563 }
5564
5565
5566 /*
5567 * sys_fpathconf
5568 *
5569 * Description: Return pathconf information about a file descriptor.
5570 *
5571 * Parameters: p Process making the request
5572 * uap->fd fd to get information about
5573 * uap->name Name of information desired
5574 * retval Pointer to the call return area
5575 *
5576 * Returns: 0 Success
5577 * EINVAL
5578 * fp_lookup:EBADF Bad file descriptor
5579 * vnode_getwithref:???
5580 * vn_pathconf:???
5581 *
5582 * Implicit returns:
5583 * *retval (modified) Returned information (numeric)
5584 */
5585 int
5586 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
5587 {
5588 int fd = uap->fd;
5589 struct fileproc *fp;
5590 struct vnode *vp;
5591 int error = 0;
5592 file_type_t type;
5593
5594
5595 AUDIT_ARG(fd, uap->fd);
5596 if ((error = fp_lookup(p, fd, &fp, 0))) {
5597 return error;
5598 }
5599 type = fp->f_type;
5600
5601 switch (type) {
5602 case DTYPE_SOCKET:
5603 if (uap->name != _PC_PIPE_BUF) {
5604 error = EINVAL;
5605 goto out;
5606 }
5607 *retval = PIPE_BUF;
5608 error = 0;
5609 goto out;
5610
5611 case DTYPE_PIPE:
5612 if (uap->name != _PC_PIPE_BUF) {
5613 error = EINVAL;
5614 goto out;
5615 }
5616 *retval = PIPE_BUF;
5617 error = 0;
5618 goto out;
5619
5620 case DTYPE_VNODE:
5621 vp = (struct vnode *)fp_get_data(fp);
5622
5623 if ((error = vnode_getwithref(vp)) == 0) {
5624 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5625
5626 error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
5627
5628 (void)vnode_put(vp);
5629 }
5630 goto out;
5631
5632 default:
5633 error = EINVAL;
5634 goto out;
5635 }
5636 /*NOTREACHED*/
5637 out:
5638 fp_drop(p, fd, fp, 0);
5639 return error;
5640 }
5641
5642 /*
5643 * sys_flock
5644 *
5645 * Description: Apply an advisory lock on a file descriptor.
5646 *
5647 * Parameters: p Process making request
5648 * uap->fd fd on which the lock is to be
5649 * attempted
5650 * uap->how (Un)Lock bits, including type
5651 * retval Pointer to the call return area
5652 *
5653 * Returns: 0 Success
5654 * fp_getfvp:EBADF Bad file descriptor
5655 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5656 * vnode_getwithref:???
5657 * VNOP_ADVLOCK:???
5658 *
5659 * Implicit returns:
5660 * *retval (modified) Size of dtable
5661 *
5662 * Notes: Just attempt to get a record lock of the requested type on
5663 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5664 */
5665 int
5666 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5667 {
5668 int fd = uap->fd;
5669 int how = uap->how;
5670 struct fileproc *fp;
5671 struct vnode *vp;
5672 struct flock lf;
5673 vfs_context_t ctx = vfs_context_current();
5674 int error = 0;
5675
5676 AUDIT_ARG(fd, uap->fd);
5677 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5678 return error;
5679 }
5680 if ((error = vnode_getwithref(vp))) {
5681 goto out1;
5682 }
5683 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5684
5685 lf.l_whence = SEEK_SET;
5686 lf.l_start = 0;
5687 lf.l_len = 0;
5688 if (how & LOCK_UN) {
5689 lf.l_type = F_UNLCK;
5690 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5691 goto out;
5692 }
5693 if (how & LOCK_EX) {
5694 lf.l_type = F_WRLCK;
5695 } else if (how & LOCK_SH) {
5696 lf.l_type = F_RDLCK;
5697 } else {
5698 error = EBADF;
5699 goto out;
5700 }
5701 #if CONFIG_MACF
5702 error = mac_file_check_lock(kauth_cred_get(), fp->fp_glob, F_SETLK, &lf);
5703 if (error) {
5704 goto out;
5705 }
5706 #endif
5707 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5708 (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5709 ctx, NULL);
5710 if (!error) {
5711 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5712 }
5713 out:
5714 (void)vnode_put(vp);
5715 out1:
5716 fp_drop(p, fd, fp, 0);
5717 return error;
5718 }
5719
5720 /*
5721 * sys_fileport_makeport
5722 *
5723 * Description: Obtain a Mach send right for a given file descriptor.
5724 *
5725 * Parameters: p Process calling fileport
5726 * uap->fd The fd to reference
5727 * uap->portnamep User address at which to place port name.
5728 *
5729 * Returns: 0 Success.
5730 * EBADF Bad file descriptor.
5731 * EINVAL File descriptor had type that cannot be sent, misc. other errors.
5732 * EFAULT Address at which to store port name is not valid.
5733 * EAGAIN Resource shortage.
5734 *
5735 * Implicit returns:
5736 * On success, name of send right is stored at user-specified address.
5737 */
5738 int
5739 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5740 __unused int *retval)
5741 {
5742 int err;
5743 int fd = uap->fd;
5744 user_addr_t user_portaddr = uap->portnamep;
5745 struct fileproc *fp = FILEPROC_NULL;
5746 struct fileglob *fg = NULL;
5747 ipc_port_t fileport;
5748 mach_port_name_t name = MACH_PORT_NULL;
5749
5750 proc_fdlock(p);
5751 err = fp_lookup(p, fd, &fp, 1);
5752 if (err != 0) {
5753 goto out_unlock;
5754 }
5755
5756 fg = fp->fp_glob;
5757 if (!fg_sendable(fg)) {
5758 err = EINVAL;
5759 goto out_unlock;
5760 }
5761
5762 if (fp_isguarded(fp, GUARD_FILEPORT)) {
5763 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5764 goto out_unlock;
5765 }
5766
5767 /* Dropped when port is deallocated */
5768 fg_ref(p, fg);
5769
5770 proc_fdunlock(p);
5771
5772 /* Allocate and initialize a port */
5773 fileport = fileport_alloc(fg);
5774 if (fileport == IPC_PORT_NULL) {
5775 fg_drop_live(fg);
5776 err = EAGAIN;
5777 goto out;
5778 }
5779
5780 /* Add an entry. Deallocates port on failure. */
5781 name = ipc_port_copyout_send(fileport, get_task_ipcspace(proc_task(p)));
5782 if (!MACH_PORT_VALID(name)) {
5783 err = EINVAL;
5784 goto out;
5785 }
5786
5787 err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5788 if (err != 0) {
5789 goto out;
5790 }
5791
5792 /* Tag the fileglob for debugging purposes */
5793 lck_mtx_lock_spin(&fg->fg_lock);
5794 fg->fg_lflags |= FG_PORTMADE;
5795 lck_mtx_unlock(&fg->fg_lock);
5796
5797 fp_drop(p, fd, fp, 0);
5798
5799 return 0;
5800
5801 out_unlock:
5802 proc_fdunlock(p);
5803 out:
5804 if (MACH_PORT_VALID(name)) {
5805 /* Don't care if another thread races us to deallocate the entry */
5806 (void) mach_port_deallocate(get_task_ipcspace(proc_task(p)), name);
5807 }
5808
5809 if (fp != FILEPROC_NULL) {
5810 fp_drop(p, fd, fp, 0);
5811 }
5812
5813 return err;
5814 }
5815
5816 void
5817 fileport_releasefg(struct fileglob *fg)
5818 {
5819 (void)fg_drop(PROC_NULL, fg);
5820 }
5821
5822 /*
5823 * fileport_makefd
5824 *
5825 * Description: Obtain the file descriptor for a given Mach send right.
5826 *
5827 * Returns: 0 Success
5828 * EINVAL Invalid Mach port name, or port is not for a file.
5829 * fdalloc:EMFILE
5830 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5831 *
5832 * Implicit returns:
5833 * *retval (modified) The new descriptor
5834 */
5835 int
5836 fileport_makefd(proc_t p, ipc_port_t port, fileproc_flags_t fp_flags, int *retval)
5837 {
5838 struct fileglob *fg;
5839 struct fileproc *fp = FILEPROC_NULL;
5840 int fd;
5841 int err;
5842
5843 fg = fileport_port_to_fileglob(port);
5844 if (fg == NULL) {
5845 err = EINVAL;
5846 goto out;
5847 }
5848
5849 fp = fileproc_alloc_init();
5850
5851 proc_fdlock(p);
5852 err = fdalloc(p, 0, &fd);
5853 if (err != 0) {
5854 proc_fdunlock(p);
5855 goto out;
5856 }
5857 if (fp_flags) {
5858 fp->fp_flags |= fp_flags;
5859 }
5860
5861 fp->fp_glob = fg;
5862 fg_ref(p, fg);
5863
5864 procfdtbl_releasefd(p, fd, fp);
5865 proc_fdunlock(p);
5866
5867 *retval = fd;
5868 err = 0;
5869 out:
5870 if ((fp != NULL) && (0 != err)) {
5871 fileproc_free(fp);
5872 }
5873
5874 return err;
5875 }
5876
5877 /*
5878 * sys_fileport_makefd
5879 *
5880 * Description: Obtain the file descriptor for a given Mach send right.
5881 *
5882 * Parameters: p Process calling fileport
5883 * uap->port Name of send right to file port.
5884 *
5885 * Returns: 0 Success
5886 * EINVAL Invalid Mach port name, or port is not for a file.
5887 * fdalloc:EMFILE
5888 * fdalloc:ENOMEM Unable to allocate fileproc or extend file table.
5889 *
5890 * Implicit returns:
5891 * *retval (modified) The new descriptor
5892 */
5893 int
5894 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5895 {
5896 ipc_port_t port = IPC_PORT_NULL;
5897 mach_port_name_t send = uap->port;
5898 kern_return_t res;
5899 int err;
5900
5901 res = ipc_object_copyin(get_task_ipcspace(proc_task(p)),
5902 send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
5903
5904 if (res == KERN_SUCCESS) {
5905 err = fileport_makefd(p, port, FP_CLOEXEC, retval);
5906 } else {
5907 err = EINVAL;
5908 }
5909
5910 if (IPC_PORT_NULL != port) {
5911 ipc_port_release_send(port);
5912 }
5913
5914 return err;
5915 }
5916
5917
5918 #pragma mark fileops wrappers
5919
5920 /*
5921 * fo_read
5922 *
5923 * Description: Generic fileops read indirected through the fileops pointer
5924 * in the fileproc structure
5925 *
5926 * Parameters: fp fileproc structure pointer
5927 * uio user I/O structure pointer
5928 * flags FOF_ flags
5929 * ctx VFS context for operation
5930 *
5931 * Returns: 0 Success
5932 * !0 Errno from read
5933 */
5934 int
5935 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5936 {
5937 return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5938 }
5939
5940 int
5941 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5942 {
5943 #pragma unused(fp, uio, flags, ctx)
5944 return ENXIO;
5945 }
5946
5947
5948 /*
5949 * fo_write
5950 *
5951 * Description: Generic fileops write indirected through the fileops pointer
5952 * in the fileproc structure
5953 *
5954 * Parameters: fp fileproc structure pointer
5955 * uio user I/O structure pointer
5956 * flags FOF_ flags
5957 * ctx VFS context for operation
5958 *
5959 * Returns: 0 Success
5960 * !0 Errno from write
5961 */
5962 int
5963 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5964 {
5965 return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5966 }
5967
5968 int
5969 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5970 {
5971 #pragma unused(fp, uio, flags, ctx)
5972 return ENXIO;
5973 }
5974
5975
5976 /*
5977 * fo_ioctl
5978 *
5979 * Description: Generic fileops ioctl indirected through the fileops pointer
5980 * in the fileproc structure
5981 *
5982 * Parameters: fp fileproc structure pointer
5983 * com ioctl command
5984 * data pointer to internalized copy
5985 * of user space ioctl command
5986 * parameter data in kernel space
5987 * ctx VFS context for operation
5988 *
5989 * Returns: 0 Success
5990 * !0 Errno from ioctl
5991 *
5992 * Locks: The caller is assumed to have held the proc_fdlock; this
5993 * function releases and reacquires this lock. If the caller
5994 * accesses data protected by this lock prior to calling this
5995 * function, it will need to revalidate/reacquire any cached
5996 * protected data obtained prior to the call.
5997 */
5998 int
5999 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6000 {
6001 int error;
6002
6003 proc_fdunlock(vfs_context_proc(ctx));
6004 error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
6005 proc_fdlock(vfs_context_proc(ctx));
6006 return error;
6007 }
6008
6009 int
6010 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6011 {
6012 #pragma unused(fp, com, data, ctx)
6013 return ENOTTY;
6014 }
6015
6016
6017 /*
6018 * fo_select
6019 *
6020 * Description: Generic fileops select indirected through the fileops pointer
6021 * in the fileproc structure
6022 *
6023 * Parameters: fp fileproc structure pointer
6024 * which select which
6025 * wql pointer to wait queue list
6026 * ctx VFS context for operation
6027 *
6028 * Returns: 0 Success
6029 * !0 Errno from select
6030 */
6031 int
6032 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6033 {
6034 return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
6035 }
6036
6037 int
6038 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6039 {
6040 #pragma unused(fp, which, wql, ctx)
6041 return ENOTSUP;
6042 }
6043
6044
6045 /*
6046 * fo_close
6047 *
6048 * Description: Generic fileops close indirected through the fileops pointer
6049 * in the fileproc structure
6050 *
6051 * Parameters: fp fileproc structure pointer for
6052 * file to close
6053 * ctx VFS context for operation
6054 *
6055 * Returns: 0 Success
6056 * !0 Errno from close
6057 */
6058 int
6059 fo_close(struct fileglob *fg, vfs_context_t ctx)
6060 {
6061 return (*fg->fg_ops->fo_close)(fg, ctx);
6062 }
6063
6064
6065 /*
6066 * fo_drain
6067 *
6068 * Description: Generic fileops kqueue filter indirected through the fileops
6069 * pointer in the fileproc structure
6070 *
6071 * Parameters: fp fileproc structure pointer
6072 * ctx VFS context for operation
6073 *
6074 * Returns: 0 Success
6075 * !0 errno from drain
6076 */
6077 int
6078 fo_drain(struct fileproc *fp, vfs_context_t ctx)
6079 {
6080 return (*fp->f_ops->fo_drain)(fp, ctx);
6081 }
6082
6083 int
6084 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
6085 {
6086 #pragma unused(fp, ctx)
6087 return ENOTSUP;
6088 }
6089
6090
6091 /*
6092 * fo_kqfilter
6093 *
6094 * Description: Generic fileops kqueue filter indirected through the fileops
6095 * pointer in the fileproc structure
6096 *
6097 * Parameters: fp fileproc structure pointer
6098 * kn pointer to knote to filter on
6099 *
6100 * Returns: (kn->kn_flags & EV_ERROR) error in kn->kn_data
6101 * 0 Filter is not active
6102 * !0 Filter is active
6103 */
6104 int
6105 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6106 {
6107 return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
6108 }
6109
6110 int
6111 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6112 {
6113 #pragma unused(fp, kev)
6114 knote_set_error(kn, ENOTSUP);
6115 return 0;
6116 }
Cache object: 79e6c158e3ed6d392573e87d900d7247
|