1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Google Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Copyright (C) 2005 Csaba Henk.
34 * All rights reserved.
35 *
36 * Copyright (c) 2019 The FreeBSD Foundation
37 *
38 * Portions of this software were developed by BFF Storage Systems, LLC under
39 * sponsorship from the FreeBSD Foundation.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 *
50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65
66 #include <sys/param.h>
67 #include <sys/module.h>
68 #include <sys/systm.h>
69 #include <sys/errno.h>
70 #include <sys/kernel.h>
71 #include <sys/conf.h>
72 #include <sys/uio.h>
73 #include <sys/malloc.h>
74 #include <sys/queue.h>
75 #include <sys/limits.h>
76 #include <sys/lock.h>
77 #include <sys/rwlock.h>
78 #include <sys/sx.h>
79 #include <sys/proc.h>
80 #include <sys/mount.h>
81 #include <sys/vnode.h>
82 #include <sys/namei.h>
83 #include <sys/extattr.h>
84 #include <sys/stat.h>
85 #include <sys/unistd.h>
86 #include <sys/filedesc.h>
87 #include <sys/file.h>
88 #include <sys/fcntl.h>
89 #include <sys/dirent.h>
90 #include <sys/bio.h>
91 #include <sys/buf.h>
92 #include <sys/sysctl.h>
93 #include <sys/vmmeter.h>
94
95 #include <vm/vm.h>
96 #include <vm/vm_extern.h>
97 #include <vm/pmap.h>
98 #include <vm/vm_map.h>
99 #include <vm/vm_page.h>
100 #include <vm/vm_param.h>
101 #include <vm/vm_object.h>
102 #include <vm/vm_pager.h>
103 #include <vm/vnode_pager.h>
104 #include <vm/vm_object.h>
105
106 #include "fuse.h"
107 #include "fuse_file.h"
108 #include "fuse_internal.h"
109 #include "fuse_ipc.h"
110 #include "fuse_node.h"
111 #include "fuse_io.h"
112
113 #include <sys/priv.h>
114
115 /* Maximum number of hardlinks to a single FUSE file */
116 #define FUSE_LINK_MAX UINT32_MAX
117
118 SDT_PROVIDER_DECLARE(fusefs);
119 /*
120 * Fuse trace probe:
121 * arg0: verbosity. Higher numbers give more verbose messages
122 * arg1: Textual message
123 */
124 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*");
125
126 /* vnode ops */
127 static vop_access_t fuse_vnop_access;
128 static vop_advlock_t fuse_vnop_advlock;
129 static vop_bmap_t fuse_vnop_bmap;
130 static vop_close_t fuse_fifo_close;
131 static vop_close_t fuse_vnop_close;
132 static vop_create_t fuse_vnop_create;
133 static vop_deleteextattr_t fuse_vnop_deleteextattr;
134 static vop_fdatasync_t fuse_vnop_fdatasync;
135 static vop_fsync_t fuse_vnop_fsync;
136 static vop_getattr_t fuse_vnop_getattr;
137 static vop_getextattr_t fuse_vnop_getextattr;
138 static vop_inactive_t fuse_vnop_inactive;
139 static vop_link_t fuse_vnop_link;
140 static vop_listextattr_t fuse_vnop_listextattr;
141 static vop_lookup_t fuse_vnop_lookup;
142 static vop_mkdir_t fuse_vnop_mkdir;
143 static vop_mknod_t fuse_vnop_mknod;
144 static vop_open_t fuse_vnop_open;
145 static vop_pathconf_t fuse_vnop_pathconf;
146 static vop_read_t fuse_vnop_read;
147 static vop_readdir_t fuse_vnop_readdir;
148 static vop_readlink_t fuse_vnop_readlink;
149 static vop_reclaim_t fuse_vnop_reclaim;
150 static vop_remove_t fuse_vnop_remove;
151 static vop_rename_t fuse_vnop_rename;
152 static vop_rmdir_t fuse_vnop_rmdir;
153 static vop_setattr_t fuse_vnop_setattr;
154 static vop_setextattr_t fuse_vnop_setextattr;
155 static vop_strategy_t fuse_vnop_strategy;
156 static vop_symlink_t fuse_vnop_symlink;
157 static vop_write_t fuse_vnop_write;
158 static vop_getpages_t fuse_vnop_getpages;
159 static vop_print_t fuse_vnop_print;
160 static vop_vptofh_t fuse_vnop_vptofh;
161
162 struct vop_vector fuse_fifoops = {
163 .vop_default = &fifo_specops,
164 .vop_access = fuse_vnop_access,
165 .vop_close = fuse_fifo_close,
166 .vop_fsync = fuse_vnop_fsync,
167 .vop_getattr = fuse_vnop_getattr,
168 .vop_inactive = fuse_vnop_inactive,
169 .vop_pathconf = fuse_vnop_pathconf,
170 .vop_print = fuse_vnop_print,
171 .vop_read = VOP_PANIC,
172 .vop_reclaim = fuse_vnop_reclaim,
173 .vop_setattr = fuse_vnop_setattr,
174 .vop_write = VOP_PANIC,
175 .vop_vptofh = fuse_vnop_vptofh,
176 };
177
178 struct vop_vector fuse_vnops = {
179 .vop_allocate = VOP_EINVAL,
180 .vop_default = &default_vnodeops,
181 .vop_access = fuse_vnop_access,
182 .vop_advlock = fuse_vnop_advlock,
183 .vop_bmap = fuse_vnop_bmap,
184 .vop_close = fuse_vnop_close,
185 .vop_create = fuse_vnop_create,
186 .vop_deleteextattr = fuse_vnop_deleteextattr,
187 .vop_fsync = fuse_vnop_fsync,
188 .vop_fdatasync = fuse_vnop_fdatasync,
189 .vop_getattr = fuse_vnop_getattr,
190 .vop_getextattr = fuse_vnop_getextattr,
191 .vop_inactive = fuse_vnop_inactive,
192 /*
193 * TODO: implement vop_ioctl after upgrading to protocol 7.16.
194 * FUSE_IOCTL was added in 7.11, but 32-bit compat is broken until
195 * 7.16.
196 */
197 .vop_link = fuse_vnop_link,
198 .vop_listextattr = fuse_vnop_listextattr,
199 .vop_lookup = fuse_vnop_lookup,
200 .vop_mkdir = fuse_vnop_mkdir,
201 .vop_mknod = fuse_vnop_mknod,
202 .vop_open = fuse_vnop_open,
203 .vop_pathconf = fuse_vnop_pathconf,
204 /*
205 * TODO: implement vop_poll after upgrading to protocol 7.21.
206 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until
207 * 7.21, which adds the ability for the client to choose which poll
208 * events it wants, and for a client to deregister a file handle
209 */
210 .vop_read = fuse_vnop_read,
211 .vop_readdir = fuse_vnop_readdir,
212 .vop_readlink = fuse_vnop_readlink,
213 .vop_reclaim = fuse_vnop_reclaim,
214 .vop_remove = fuse_vnop_remove,
215 .vop_rename = fuse_vnop_rename,
216 .vop_rmdir = fuse_vnop_rmdir,
217 .vop_setattr = fuse_vnop_setattr,
218 .vop_setextattr = fuse_vnop_setextattr,
219 .vop_strategy = fuse_vnop_strategy,
220 .vop_symlink = fuse_vnop_symlink,
221 .vop_write = fuse_vnop_write,
222 .vop_getpages = fuse_vnop_getpages,
223 .vop_print = fuse_vnop_print,
224 .vop_vptofh = fuse_vnop_vptofh,
225 };
226
227 int fuse_pbuf_freecnt = -1;
228
229 /* Check permission for extattr operations, much like extattr_check_cred */
230 static int
231 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred,
232 struct thread *td, accmode_t accmode)
233 {
234 struct mount *mp = vnode_mount(vp);
235 struct fuse_data *data = fuse_get_mpdata(mp);
236 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
237
238 /*
239 * Kernel-invoked always succeeds.
240 */
241 if (cred == NOCRED)
242 return (0);
243
244 /*
245 * Do not allow privileged processes in jail to directly manipulate
246 * system attributes.
247 */
248 switch (ns) {
249 case EXTATTR_NAMESPACE_SYSTEM:
250 if (default_permissions) {
251 return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM,
252 0));
253 }
254 return (0);
255 case EXTATTR_NAMESPACE_USER:
256 if (default_permissions) {
257 return (fuse_internal_access(vp, accmode, td, cred));
258 }
259 return (0);
260 default:
261 return (EPERM);
262 }
263 }
264
265 /* Get a filehandle for a directory */
266 static int
267 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp,
268 struct ucred *cred, pid_t pid)
269 {
270 if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0)
271 return 0;
272 return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid);
273 }
274
275 /* Send FUSE_FLUSH for this vnode */
276 static int
277 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
278 {
279 struct fuse_flush_in *ffi;
280 struct fuse_filehandle *fufh;
281 struct fuse_dispatcher fdi;
282 struct thread *td = curthread;
283 struct mount *mp = vnode_mount(vp);
284 int err;
285
286 if (!fsess_isimpl(vnode_mount(vp), FUSE_FLUSH))
287 return 0;
288
289 err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
290 if (err)
291 return err;
292
293 fdisp_init(&fdi, sizeof(*ffi));
294 fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
295 ffi = fdi.indata;
296 ffi->fh = fufh->fh_id;
297 /*
298 * If the file has a POSIX lock then we're supposed to set lock_owner.
299 * If not, then lock_owner is undefined. So we may as well always set
300 * it.
301 */
302 ffi->lock_owner = td->td_proc->p_pid;
303
304 err = fdisp_wait_answ(&fdi);
305 if (err == ENOSYS) {
306 fsess_set_notimpl(mp, FUSE_FLUSH);
307 err = 0;
308 }
309 fdisp_destroy(&fdi);
310 return err;
311 }
312
313 /* Close wrapper for fifos. */
314 static int
315 fuse_fifo_close(struct vop_close_args *ap)
316 {
317 return (fifo_specops.vop_close(ap));
318 }
319
320 /*
321 struct vnop_access_args {
322 struct vnode *a_vp;
323 #if VOP_ACCESS_TAKES_ACCMODE_T
324 accmode_t a_accmode;
325 #else
326 int a_mode;
327 #endif
328 struct ucred *a_cred;
329 struct thread *a_td;
330 };
331 */
332 static int
333 fuse_vnop_access(struct vop_access_args *ap)
334 {
335 struct vnode *vp = ap->a_vp;
336 int accmode = ap->a_accmode;
337 struct ucred *cred = ap->a_cred;
338
339 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
340
341 int err;
342
343 if (fuse_isdeadfs(vp)) {
344 if (vnode_isvroot(vp)) {
345 return 0;
346 }
347 return ENXIO;
348 }
349 if (!(data->dataflags & FSESS_INITED)) {
350 if (vnode_isvroot(vp)) {
351 if (priv_check_cred(cred, PRIV_VFS_ADMIN, 0) ||
352 (fuse_match_cred(data->daemoncred, cred) == 0)) {
353 return 0;
354 }
355 }
356 return EBADF;
357 }
358 if (vnode_islnk(vp)) {
359 return 0;
360 }
361
362 err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred);
363 return err;
364 }
365
366 /*
367 * struct vop_advlock_args {
368 * struct vop_generic_args a_gen;
369 * struct vnode *a_vp;
370 * void *a_id;
371 * int a_op;
372 * struct flock *a_fl;
373 * int a_flags;
374 * }
375 */
376 static int
377 fuse_vnop_advlock(struct vop_advlock_args *ap)
378 {
379 struct vnode *vp = ap->a_vp;
380 struct flock *fl = ap->a_fl;
381 struct thread *td = curthread;
382 struct ucred *cred = td->td_ucred;
383 pid_t pid = td->td_proc->p_pid;
384 struct fuse_filehandle *fufh;
385 struct fuse_dispatcher fdi;
386 struct fuse_lk_in *fli;
387 struct fuse_lk_out *flo;
388 struct vattr vattr;
389 enum fuse_opcode op;
390 off_t size, start;
391 int dataflags, err;
392 int flags = ap->a_flags;
393
394 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
395
396 if (fuse_isdeadfs(vp)) {
397 return ENXIO;
398 }
399
400 switch(ap->a_op) {
401 case F_GETLK:
402 op = FUSE_GETLK;
403 break;
404 case F_SETLK:
405 if (flags & F_WAIT)
406 op = FUSE_SETLKW;
407 else
408 op = FUSE_SETLK;
409 break;
410 case F_UNLCK:
411 op = FUSE_SETLK;
412 break;
413 default:
414 return EINVAL;
415 }
416
417 if (!(dataflags & FSESS_POSIX_LOCKS))
418 return vop_stdadvlock(ap);
419 /* FUSE doesn't properly support flock until protocol 7.17 */
420 if (flags & F_FLOCK)
421 return vop_stdadvlock(ap);
422
423 vn_lock(vp, LK_SHARED | LK_RETRY);
424
425 switch (fl->l_whence) {
426 case SEEK_SET:
427 case SEEK_CUR:
428 /*
429 * Caller is responsible for adding any necessary offset
430 * when SEEK_CUR is used.
431 */
432 start = fl->l_start;
433 break;
434
435 case SEEK_END:
436 err = fuse_internal_getattr(vp, &vattr, cred, td);
437 if (err)
438 goto out;
439 size = vattr.va_size;
440 if (size > OFF_MAX ||
441 (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) {
442 err = EOVERFLOW;
443 goto out;
444 }
445 start = size + fl->l_start;
446 break;
447
448 default:
449 return (EINVAL);
450 }
451
452 err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
453 if (err)
454 goto out;
455
456 fdisp_init(&fdi, sizeof(*fli));
457
458 fdisp_make_vp(&fdi, op, vp, td, cred);
459 fli = fdi.indata;
460 fli->fh = fufh->fh_id;
461 fli->owner = td->td_proc->p_pid;
462 fli->lk.start = start;
463 if (fl->l_len != 0)
464 fli->lk.end = start + fl->l_len - 1;
465 else
466 fli->lk.end = INT64_MAX;
467 fli->lk.type = fl->l_type;
468 fli->lk.pid = td->td_proc->p_pid;
469
470 err = fdisp_wait_answ(&fdi);
471 fdisp_destroy(&fdi);
472
473 if (err == 0 && op == FUSE_GETLK) {
474 flo = fdi.answ;
475 fl->l_type = flo->lk.type;
476 fl->l_whence = SEEK_SET;
477 if (flo->lk.type != F_UNLCK) {
478 fl->l_pid = flo->lk.pid;
479 fl->l_start = flo->lk.start;
480 if (flo->lk.end == INT64_MAX)
481 fl->l_len = 0;
482 else
483 fl->l_len = flo->lk.end - flo->lk.start + 1;
484 fl->l_start = flo->lk.start;
485 }
486 }
487
488 out:
489 VOP_UNLOCK(vp, 0);
490 return err;
491 }
492
493 /* {
494 struct vnode *a_vp;
495 daddr_t a_bn;
496 struct bufobj **a_bop;
497 daddr_t *a_bnp;
498 int *a_runp;
499 int *a_runb;
500 } */
501 static int
502 fuse_vnop_bmap(struct vop_bmap_args *ap)
503 {
504 struct vnode *vp = ap->a_vp;
505 struct bufobj **bo = ap->a_bop;
506 struct thread *td = curthread;
507 struct mount *mp;
508 struct fuse_dispatcher fdi;
509 struct fuse_bmap_in *fbi;
510 struct fuse_bmap_out *fbo;
511 struct fuse_data *data;
512 struct fuse_vnode_data *fvdat = VTOFUD(vp);
513 uint64_t biosize;
514 off_t fsize;
515 daddr_t lbn = ap->a_bn;
516 daddr_t *pbn = ap->a_bnp;
517 int *runp = ap->a_runp;
518 int *runb = ap->a_runb;
519 int error = 0;
520 int maxrun;
521
522 if (fuse_isdeadfs(vp)) {
523 return ENXIO;
524 }
525
526 mp = vnode_mount(vp);
527 data = fuse_get_mpdata(mp);
528 biosize = fuse_iosize(vp);
529 maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1,
530 data->max_readahead_blocks);
531
532 if (bo != NULL)
533 *bo = &vp->v_bufobj;
534
535 /*
536 * The FUSE_BMAP operation does not include the runp and runb
537 * variables, so we must guess. Report nonzero contiguous runs so
538 * cluster_read will combine adjacent reads. It's worthwhile to reduce
539 * upcalls even if we don't know the true physical layout of the file.
540 *
541 * FUSE file systems may opt out of read clustering in two ways:
542 * * mounting with -onoclusterr
543 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT
544 */
545 if (runb != NULL)
546 *runb = MIN(lbn, maxrun);
547 if (runp != NULL && maxrun == 0)
548 *runp = 0;
549 else if (runp != NULL) {
550 /*
551 * If the file's size is cached, use that value to calculate
552 * runp, even if the cache is expired. runp is only advisory,
553 * and the risk of getting it wrong is not worth the cost of
554 * another upcall.
555 */
556 if (fvdat->cached_attrs.va_size != VNOVAL)
557 fsize = fvdat->cached_attrs.va_size;
558 else
559 error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
560 if (error == 0)
561 *runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
562 maxrun);
563 else
564 *runp = 0;
565 }
566
567 if (fsess_isimpl(mp, FUSE_BMAP)) {
568 fdisp_init(&fdi, sizeof(*fbi));
569 fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred);
570 fbi = fdi.indata;
571 fbi->block = lbn;
572 fbi->blocksize = biosize;
573 error = fdisp_wait_answ(&fdi);
574 if (error == ENOSYS) {
575 fdisp_destroy(&fdi);
576 fsess_set_notimpl(mp, FUSE_BMAP);
577 error = 0;
578 } else {
579 fbo = fdi.answ;
580 if (error == 0 && pbn != NULL)
581 *pbn = fbo->block;
582 fdisp_destroy(&fdi);
583 return error;
584 }
585 }
586
587 /* If the daemon doesn't support BMAP, make up a sensible default */
588 if (pbn != NULL)
589 *pbn = lbn * btodb(biosize);
590 return (error);
591 }
592
593 /*
594 struct vop_close_args {
595 struct vnode *a_vp;
596 int a_fflag;
597 struct ucred *a_cred;
598 struct thread *a_td;
599 };
600 */
601 static int
602 fuse_vnop_close(struct vop_close_args *ap)
603 {
604 struct vnode *vp = ap->a_vp;
605 struct ucred *cred = ap->a_cred;
606 int fflag = ap->a_fflag;
607 struct thread *td = ap->a_td;
608 pid_t pid = td->td_proc->p_pid;
609 struct fuse_vnode_data *fvdat = VTOFUD(vp);
610 int err = 0;
611
612 if (fuse_isdeadfs(vp))
613 return 0;
614 if (vnode_isdir(vp))
615 return 0;
616 if (fflag & IO_NDELAY)
617 return 0;
618
619 err = fuse_flush(vp, cred, pid, fflag);
620 if (err == 0 && (fvdat->flag & FN_ATIMECHANGE)) {
621 struct vattr vap;
622
623 VATTR_NULL(&vap);
624 vap.va_atime = fvdat->cached_attrs.va_atime;
625 err = fuse_internal_setattr(vp, &vap, td, NULL);
626 }
627 /* TODO: close the file handle, if we're sure it's no longer used */
628 if ((fvdat->flag & FN_SIZECHANGE) != 0) {
629 fuse_vnode_savesize(vp, cred, td->td_proc->p_pid);
630 }
631 return err;
632 }
633
634 static void
635 fdisp_make_mknod_for_fallback(
636 struct fuse_dispatcher *fdip,
637 struct componentname *cnp,
638 struct vnode *dvp,
639 uint64_t parentnid,
640 struct thread *td,
641 struct ucred *cred,
642 mode_t mode,
643 enum fuse_opcode *op)
644 {
645 struct fuse_mknod_in *fmni;
646
647 fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1);
648 *op = FUSE_MKNOD;
649 fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred);
650 fmni = fdip->indata;
651 fmni->mode = mode;
652 fmni->rdev = 0;
653 memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr,
654 cnp->cn_namelen);
655 ((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0';
656 }
657 /*
658 struct vnop_create_args {
659 struct vnode *a_dvp;
660 struct vnode **a_vpp;
661 struct componentname *a_cnp;
662 struct vattr *a_vap;
663 };
664 */
665 static int
666 fuse_vnop_create(struct vop_create_args *ap)
667 {
668 struct vnode *dvp = ap->a_dvp;
669 struct vnode **vpp = ap->a_vpp;
670 struct componentname *cnp = ap->a_cnp;
671 struct vattr *vap = ap->a_vap;
672 struct thread *td = cnp->cn_thread;
673 struct ucred *cred = cnp->cn_cred;
674
675 struct fuse_data *data;
676 struct fuse_create_in *fci;
677 struct fuse_entry_out *feo;
678 struct fuse_open_out *foo;
679 struct fuse_dispatcher fdi, fdi2;
680 struct fuse_dispatcher *fdip = &fdi;
681 struct fuse_dispatcher *fdip2 = NULL;
682
683 int err;
684
685 struct mount *mp = vnode_mount(dvp);
686 data = fuse_get_mpdata(mp);
687 uint64_t parentnid = VTOFUD(dvp)->nid;
688 mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
689 enum fuse_opcode op;
690 int flags;
691
692 if (fuse_isdeadfs(dvp))
693 return ENXIO;
694
695 /* FUSE expects sockets to be created with FUSE_MKNOD */
696 if (vap->va_type == VSOCK)
697 return fuse_internal_mknod(dvp, vpp, cnp, vap);
698
699 /*
700 * VOP_CREATE doesn't tell us the open(2) flags, so we guess. Only a
701 * writable mode makes sense, and we might as well include readability
702 * too.
703 */
704 flags = O_RDWR;
705
706 bzero(&fdi, sizeof(fdi));
707
708 if (vap->va_type != VREG)
709 return (EINVAL);
710
711 if (!fsess_isimpl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
712 /* Fallback to FUSE_MKNOD/FUSE_OPEN */
713 fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
714 cred, mode, &op);
715 } else {
716 /* Use FUSE_CREATE */
717 size_t insize;
718
719 op = FUSE_CREATE;
720 fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1);
721 fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred);
722 fci = fdip->indata;
723 fci->mode = mode;
724 fci->flags = O_CREAT | flags;
725 if (fuse_libabi_geq(data, 7, 12)) {
726 insize = sizeof(*fci);
727 fci->umask = td->td_proc->p_fd->fd_cmask;
728 } else {
729 insize = sizeof(struct fuse_open_in);
730 }
731
732 memcpy((char *)fdip->indata + insize, cnp->cn_nameptr,
733 cnp->cn_namelen);
734 ((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0';
735 }
736
737 err = fdisp_wait_answ(fdip);
738
739 if (err) {
740 if (err == ENOSYS && op == FUSE_CREATE) {
741 fsess_set_notimpl(mp, FUSE_CREATE);
742 fdisp_destroy(fdip);
743 fdisp_make_mknod_for_fallback(fdip, cnp, dvp,
744 parentnid, td, cred, mode, &op);
745 err = fdisp_wait_answ(fdip);
746 }
747 if (err)
748 goto out;
749 }
750
751 feo = fdip->answ;
752
753 if ((err = fuse_internal_checkentry(feo, vap->va_type))) {
754 goto out;
755 }
756
757 if (op == FUSE_CREATE) {
758 if (fuse_libabi_geq(data, 7, 9))
759 foo = (struct fuse_open_out*)(feo + 1);
760 else
761 foo = (struct fuse_open_out*)((char*)feo +
762 FUSE_COMPAT_ENTRY_OUT_SIZE);
763 } else {
764 /* Issue a separate FUSE_OPEN */
765 struct fuse_open_in *foi;
766
767 fdip2 = &fdi2;
768 fdisp_init(fdip2, sizeof(*foi));
769 fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td,
770 cred);
771 foi = fdip2->indata;
772 foi->flags = flags;
773 err = fdisp_wait_answ(fdip2);
774 if (err)
775 goto out;
776 foo = fdip2->answ;
777 }
778 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type);
779 if (err) {
780 struct fuse_release_in *fri;
781 uint64_t nodeid = feo->nodeid;
782 uint64_t fh_id = foo->fh;
783
784 fdisp_init(fdip, sizeof(*fri));
785 fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
786 fri = fdip->indata;
787 fri->fh = fh_id;
788 fri->flags = flags;
789 fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
790 fuse_insert_message(fdip->tick, false);
791 goto out;
792 }
793 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
794 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
795 feo->attr_valid_nsec, NULL, true);
796
797 fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo);
798 fuse_vnode_open(*vpp, foo->open_flags, td);
799 /*
800 * Purge the parent's attribute cache because the daemon should've
801 * updated its mtime and ctime
802 */
803 fuse_vnode_clear_attr_cache(dvp);
804 cache_purge_negative(dvp);
805
806 out:
807 if (fdip2)
808 fdisp_destroy(fdip2);
809 fdisp_destroy(fdip);
810 return err;
811 }
812
813 /*
814 struct vnop_fdatasync_args {
815 struct vop_generic_args a_gen;
816 struct vnode * a_vp;
817 struct thread * a_td;
818 };
819 */
820 static int
821 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap)
822 {
823 struct vnode *vp = ap->a_vp;
824 struct thread *td = ap->a_td;
825 int waitfor = MNT_WAIT;
826
827 int err = 0;
828
829 if (fuse_isdeadfs(vp)) {
830 return 0;
831 }
832 if ((err = vop_stdfdatasync_buf(ap)))
833 return err;
834
835 return fuse_internal_fsync(vp, td, waitfor, true);
836 }
837
838 /*
839 struct vnop_fsync_args {
840 struct vop_generic_args a_gen;
841 struct vnode * a_vp;
842 int a_waitfor;
843 struct thread * a_td;
844 };
845 */
846 static int
847 fuse_vnop_fsync(struct vop_fsync_args *ap)
848 {
849 struct vnode *vp = ap->a_vp;
850 struct thread *td = ap->a_td;
851 int waitfor = ap->a_waitfor;
852 int err = 0;
853
854 if (fuse_isdeadfs(vp)) {
855 return 0;
856 }
857 if ((err = vop_stdfsync(ap)))
858 return err;
859
860 return fuse_internal_fsync(vp, td, waitfor, false);
861 }
862
863 /*
864 struct vnop_getattr_args {
865 struct vnode *a_vp;
866 struct vattr *a_vap;
867 struct ucred *a_cred;
868 struct thread *a_td;
869 };
870 */
871 static int
872 fuse_vnop_getattr(struct vop_getattr_args *ap)
873 {
874 struct vnode *vp = ap->a_vp;
875 struct vattr *vap = ap->a_vap;
876 struct ucred *cred = ap->a_cred;
877 struct thread *td = curthread;
878
879 int err = 0;
880 int dataflags;
881
882 dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
883
884 /* Note that we are not bailing out on a dead file system just yet. */
885
886 if (!(dataflags & FSESS_INITED)) {
887 if (!vnode_isvroot(vp)) {
888 fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
889 err = ENOTCONN;
890 return err;
891 } else {
892 goto fake;
893 }
894 }
895 err = fuse_internal_getattr(vp, vap, cred, td);
896 if (err == ENOTCONN && vnode_isvroot(vp)) {
897 /* see comment in fuse_vfsop_statfs() */
898 goto fake;
899 } else {
900 return err;
901 }
902
903 fake:
904 bzero(vap, sizeof(*vap));
905 vap->va_type = vnode_vtype(vp);
906
907 return 0;
908 }
909
910 /*
911 struct vnop_inactive_args {
912 struct vnode *a_vp;
913 struct thread *a_td;
914 };
915 */
916 static int
917 fuse_vnop_inactive(struct vop_inactive_args *ap)
918 {
919 struct vnode *vp = ap->a_vp;
920 struct thread *td = ap->a_td;
921
922 struct fuse_vnode_data *fvdat = VTOFUD(vp);
923 struct fuse_filehandle *fufh, *fufh_tmp;
924
925 int need_flush = 1;
926
927 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
928 if (need_flush && vp->v_type == VREG) {
929 if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
930 fuse_vnode_savesize(vp, NULL, 0);
931 }
932 if ((fvdat->flag & FN_REVOKED) != 0)
933 fuse_io_invalbuf(vp, td);
934 else
935 fuse_io_flushbuf(vp, MNT_WAIT, td);
936 need_flush = 0;
937 }
938 fuse_filehandle_close(vp, fufh, td, NULL);
939 }
940
941 if ((fvdat->flag & FN_REVOKED) != 0)
942 vrecycle(vp);
943
944 return 0;
945 }
946
947 /*
948 struct vnop_link_args {
949 struct vnode *a_tdvp;
950 struct vnode *a_vp;
951 struct componentname *a_cnp;
952 };
953 */
954 static int
955 fuse_vnop_link(struct vop_link_args *ap)
956 {
957 struct vnode *vp = ap->a_vp;
958 struct vnode *tdvp = ap->a_tdvp;
959 struct componentname *cnp = ap->a_cnp;
960
961 struct vattr *vap = VTOVA(vp);
962
963 struct fuse_dispatcher fdi;
964 struct fuse_entry_out *feo;
965 struct fuse_link_in fli;
966
967 int err;
968
969 if (fuse_isdeadfs(vp)) {
970 return ENXIO;
971 }
972 if (vnode_mount(tdvp) != vnode_mount(vp)) {
973 return EXDEV;
974 }
975
976 /*
977 * This is a seatbelt check to protect naive userspace filesystems from
978 * themselves and the limitations of the FUSE IPC protocol. If a
979 * filesystem does not allow attribute caching, assume it is capable of
980 * validating that nlink does not overflow.
981 */
982 if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
983 return EMLINK;
984 fli.oldnodeid = VTOI(vp);
985
986 fdisp_init(&fdi, 0);
987 fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp,
988 FUSE_LINK, &fli, sizeof(fli), &fdi);
989 if ((err = fdisp_wait_answ(&fdi))) {
990 goto out;
991 }
992 feo = fdi.answ;
993
994 if (fli.oldnodeid != feo->nodeid) {
995 struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
996 fuse_warn(data, FSESS_WARN_ILLEGAL_INODE,
997 "Assigned wrong inode for a hard link.");
998 fuse_vnode_clear_attr_cache(vp);
999 fuse_vnode_clear_attr_cache(tdvp);
1000 err = EIO;
1001 goto out;
1002 }
1003
1004 err = fuse_internal_checkentry(feo, vnode_vtype(vp));
1005 if (!err) {
1006 /*
1007 * Purge the parent's attribute cache because the daemon
1008 * should've updated its mtime and ctime
1009 */
1010 fuse_vnode_clear_attr_cache(tdvp);
1011 fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid,
1012 feo->attr_valid_nsec, NULL, true);
1013 }
1014 out:
1015 fdisp_destroy(&fdi);
1016 return err;
1017 }
1018
1019 struct fuse_lookup_alloc_arg {
1020 struct fuse_entry_out *feo;
1021 struct componentname *cnp;
1022 uint64_t nid;
1023 enum vtype vtyp;
1024 };
1025
1026 /* Callback for vn_get_ino */
1027 static int
1028 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
1029 {
1030 struct fuse_lookup_alloc_arg *flaa = arg;
1031
1032 return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp,
1033 flaa->vtyp);
1034 }
1035
1036 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup,
1037 "int", "struct timespec*", "struct timespec*");
1038 /*
1039 struct vnop_lookup_args {
1040 struct vnodeop_desc *a_desc;
1041 struct vnode *a_dvp;
1042 struct vnode **a_vpp;
1043 struct componentname *a_cnp;
1044 };
1045 */
1046 int
1047 fuse_vnop_lookup(struct vop_lookup_args *ap)
1048 {
1049 struct vnode *dvp = ap->a_dvp;
1050 struct vnode **vpp = ap->a_vpp;
1051 struct componentname *cnp = ap->a_cnp;
1052 struct thread *td = cnp->cn_thread;
1053 struct ucred *cred = cnp->cn_cred;
1054 struct timespec now;
1055
1056 int nameiop = cnp->cn_nameiop;
1057 int flags = cnp->cn_flags;
1058 int wantparent = flags & (LOCKPARENT | WANTPARENT);
1059 int islastcn = flags & ISLASTCN;
1060 struct mount *mp = vnode_mount(dvp);
1061 struct fuse_data *data = fuse_get_mpdata(mp);
1062 int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
1063 bool is_dot;
1064
1065 int err = 0;
1066 int lookup_err = 0;
1067 struct vnode *vp = NULL;
1068
1069 struct fuse_dispatcher fdi;
1070 bool did_lookup = false;
1071 struct fuse_entry_out *feo = NULL;
1072 enum vtype vtyp; /* vnode type of target */
1073 off_t filesize; /* filesize of target */
1074
1075 uint64_t nid;
1076
1077 if (fuse_isdeadfs(dvp)) {
1078 *vpp = NULL;
1079 return ENXIO;
1080 }
1081 if (!vnode_isdir(dvp))
1082 return ENOTDIR;
1083
1084 if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP))
1085 return EROFS;
1086
1087 if ((cnp->cn_flags & NOEXECCHECK) != 0)
1088 cnp->cn_flags &= ~NOEXECCHECK;
1089 else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
1090 return err;
1091
1092 is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
1093 if ((flags & ISDOTDOT) && !(data->dataflags & FSESS_EXPORT_SUPPORT))
1094 {
1095 if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
1096 /*
1097 * Since the file system doesn't support ".." lookups,
1098 * we have no way to find this entry.
1099 */
1100 return ESTALE;
1101 }
1102 nid = VTOFUD(dvp)->parent_nid;
1103 if (nid == 0)
1104 return ENOENT;
1105 /* .. is obviously a directory */
1106 vtyp = VDIR;
1107 filesize = 0;
1108 } else if (is_dot) {
1109 nid = VTOI(dvp);
1110 /* . is obviously a directory */
1111 vtyp = VDIR;
1112 filesize = 0;
1113 } else {
1114 struct timespec timeout;
1115
1116 err = cache_lookup(dvp, vpp, cnp, &timeout, NULL);
1117 getnanouptime(&now);
1118 SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now);
1119 switch (err) {
1120 case -1: /* positive match */
1121 if (timespeccmp(&timeout, &now, >)) {
1122 counter_u64_add(fuse_lookup_cache_hits, 1);
1123 } else {
1124 /* Cache timeout */
1125 counter_u64_add(fuse_lookup_cache_misses, 1);
1126 bintime_clear(
1127 &VTOFUD(*vpp)->entry_cache_timeout);
1128 cache_purge(*vpp);
1129 if (dvp != *vpp)
1130 vput(*vpp);
1131 else
1132 vrele(*vpp);
1133 *vpp = NULL;
1134 break;
1135 }
1136 return 0;
1137
1138 case 0: /* no match in cache */
1139 counter_u64_add(fuse_lookup_cache_misses, 1);
1140 break;
1141
1142 case ENOENT: /* negative match */
1143 getnanouptime(&now);
1144 if (timespeccmp(&timeout, &now, <=)) {
1145 /* Cache timeout */
1146 cache_purge_negative(dvp);
1147 break;
1148 }
1149 /* fall through */
1150 default:
1151 return err;
1152 }
1153
1154 fdisp_init(&fdi, cnp->cn_namelen + 1);
1155 fdisp_make(&fdi, FUSE_LOOKUP, mp, VTOI(dvp), td, cred);
1156
1157 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
1158 ((char *)fdi.indata)[cnp->cn_namelen] = '\0';
1159 lookup_err = fdisp_wait_answ(&fdi);
1160 did_lookup = true;
1161
1162 if (!lookup_err) {
1163 /* lookup call succeeded */
1164 feo = (struct fuse_entry_out *)fdi.answ;
1165 nid = feo->nodeid;
1166 if (nid == 0) {
1167 /* zero nodeid means ENOENT and cache it */
1168 struct timespec timeout;
1169
1170 fdi.answ_stat = ENOENT;
1171 lookup_err = ENOENT;
1172 if (cnp->cn_flags & MAKEENTRY) {
1173 fuse_validity_2_timespec(feo, &timeout);
1174 /* Use the same entry_time for .. as for
1175 * the file itself. That doesn't honor
1176 * exactly what the fuse server tells
1177 * us, but to do otherwise would require
1178 * another cache lookup at this point.
1179 */
1180 struct timespec *dtsp = NULL;
1181 cache_enter_time(dvp, *vpp, cnp,
1182 &timeout, dtsp);
1183 }
1184 }
1185 vtyp = IFTOVT(feo->attr.mode);
1186 filesize = feo->attr.size;
1187 }
1188 if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) {
1189 fdisp_destroy(&fdi);
1190 return lookup_err;
1191 }
1192 }
1193 /* lookup_err, if non-zero, must be ENOENT at this point */
1194
1195 if (lookup_err) {
1196 /* Entry not found */
1197 if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
1198 if (default_permissions)
1199 err = fuse_internal_access(dvp, VWRITE, td,
1200 cred);
1201 else
1202 err = 0;
1203 if (!err) {
1204 /*
1205 * Set the SAVENAME flag to hold onto the
1206 * pathname for use later in VOP_CREATE or
1207 * VOP_RENAME.
1208 */
1209 cnp->cn_flags |= SAVENAME;
1210
1211 err = EJUSTRETURN;
1212 }
1213 } else {
1214 err = ENOENT;
1215 }
1216 } else {
1217 /* Entry was found */
1218 if (flags & ISDOTDOT) {
1219 struct fuse_lookup_alloc_arg flaa;
1220
1221 flaa.nid = nid;
1222 flaa.feo = feo;
1223 flaa.cnp = cnp;
1224 flaa.vtyp = vtyp;
1225 err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0,
1226 &vp);
1227 *vpp = vp;
1228 } else if (nid == VTOI(dvp)) {
1229 if (is_dot) {
1230 vref(dvp);
1231 *vpp = dvp;
1232 } else {
1233 fuse_warn(fuse_get_mpdata(mp),
1234 FSESS_WARN_ILLEGAL_INODE,
1235 "Assigned same inode to both parent and "
1236 "child.");
1237 err = EIO;
1238 }
1239
1240 } else {
1241 struct fuse_vnode_data *fvdat;
1242
1243 err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
1244 &vp, cnp, vtyp);
1245 if (err)
1246 goto out;
1247 *vpp = vp;
1248 fvdat = VTOFUD(vp);
1249
1250 MPASS(feo != NULL);
1251 if (timespeccmp(&now, &fvdat->last_local_modify, >)) {
1252 /*
1253 * Attributes from the server are definitely
1254 * newer than the last attributes we sent to
1255 * the server, so cache them.
1256 */
1257 fuse_internal_cache_attrs(*vpp, &feo->attr,
1258 feo->attr_valid, feo->attr_valid_nsec,
1259 NULL, true);
1260 }
1261 fuse_validity_2_bintime(feo->entry_valid,
1262 feo->entry_valid_nsec,
1263 &fvdat->entry_cache_timeout);
1264
1265 if ((nameiop == DELETE || nameiop == RENAME) &&
1266 islastcn && default_permissions)
1267 {
1268 struct vattr dvattr;
1269
1270 err = fuse_internal_access(dvp, VWRITE, td,
1271 cred);
1272 if (err != 0)
1273 goto out;
1274 /*
1275 * if the parent's sticky bit is set, check
1276 * whether we're allowed to remove the file.
1277 * Need to figure out the vnode locking to make
1278 * this work.
1279 */
1280 fuse_internal_getattr(dvp, &dvattr, cred, td);
1281 if ((dvattr.va_mode & S_ISTXT) &&
1282 fuse_internal_access(dvp, VADMIN, td,
1283 cred) &&
1284 fuse_internal_access(*vpp, VADMIN, td,
1285 cred)) {
1286 err = EPERM;
1287 goto out;
1288 }
1289 }
1290
1291 if (islastcn && (
1292 (nameiop == DELETE) ||
1293 (nameiop == RENAME && wantparent))) {
1294 cnp->cn_flags |= SAVENAME;
1295 }
1296
1297 }
1298 }
1299 out:
1300 if (err) {
1301 if (vp != NULL && dvp != vp)
1302 vput(vp);
1303 else if (vp != NULL)
1304 vrele(vp);
1305 *vpp = NULL;
1306 }
1307 if (did_lookup)
1308 fdisp_destroy(&fdi);
1309
1310 return err;
1311 }
1312
1313 /*
1314 struct vnop_mkdir_args {
1315 struct vnode *a_dvp;
1316 struct vnode **a_vpp;
1317 struct componentname *a_cnp;
1318 struct vattr *a_vap;
1319 };
1320 */
1321 static int
1322 fuse_vnop_mkdir(struct vop_mkdir_args *ap)
1323 {
1324 struct vnode *dvp = ap->a_dvp;
1325 struct vnode **vpp = ap->a_vpp;
1326 struct componentname *cnp = ap->a_cnp;
1327 struct vattr *vap = ap->a_vap;
1328
1329 struct fuse_mkdir_in fmdi;
1330
1331 if (fuse_isdeadfs(dvp)) {
1332 return ENXIO;
1333 }
1334 fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
1335 fmdi.umask = curthread->td_proc->p_fd->fd_cmask;
1336
1337 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
1338 sizeof(fmdi), VDIR));
1339 }
1340
1341 /*
1342 struct vnop_mknod_args {
1343 struct vnode *a_dvp;
1344 struct vnode **a_vpp;
1345 struct componentname *a_cnp;
1346 struct vattr *a_vap;
1347 };
1348 */
1349 static int
1350 fuse_vnop_mknod(struct vop_mknod_args *ap)
1351 {
1352
1353 struct vnode *dvp = ap->a_dvp;
1354 struct vnode **vpp = ap->a_vpp;
1355 struct componentname *cnp = ap->a_cnp;
1356 struct vattr *vap = ap->a_vap;
1357
1358 if (fuse_isdeadfs(dvp))
1359 return ENXIO;
1360
1361 return fuse_internal_mknod(dvp, vpp, cnp, vap);
1362 }
1363
1364 /*
1365 struct vop_open_args {
1366 struct vnode *a_vp;
1367 int a_mode;
1368 struct ucred *a_cred;
1369 struct thread *a_td;
1370 int a_fdidx; / struct file *a_fp;
1371 };
1372 */
1373 static int
1374 fuse_vnop_open(struct vop_open_args *ap)
1375 {
1376 struct vnode *vp = ap->a_vp;
1377 int a_mode = ap->a_mode;
1378 struct thread *td = ap->a_td;
1379 struct ucred *cred = ap->a_cred;
1380 pid_t pid = td->td_proc->p_pid;
1381 struct fuse_vnode_data *fvdat;
1382
1383 if (fuse_isdeadfs(vp))
1384 return ENXIO;
1385 if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
1386 return (EOPNOTSUPP);
1387 if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
1388 return EINVAL;
1389
1390 fvdat = VTOFUD(vp);
1391
1392 if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
1393 fuse_vnode_open(vp, 0, td);
1394 return 0;
1395 }
1396
1397 return fuse_filehandle_open(vp, a_mode, NULL, td, cred);
1398 }
1399
1400 static int
1401 fuse_vnop_pathconf(struct vop_pathconf_args *ap)
1402 {
1403
1404 switch (ap->a_name) {
1405 case _PC_FILESIZEBITS:
1406 *ap->a_retval = 64;
1407 return (0);
1408 case _PC_NAME_MAX:
1409 *ap->a_retval = NAME_MAX;
1410 return (0);
1411 case _PC_LINK_MAX:
1412 *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX);
1413 return (0);
1414 case _PC_SYMLINK_MAX:
1415 *ap->a_retval = MAXPATHLEN;
1416 return (0);
1417 case _PC_NO_TRUNC:
1418 *ap->a_retval = 1;
1419 return (0);
1420 default:
1421 return (vop_stdpathconf(ap));
1422 }
1423 }
1424
1425 /*
1426 struct vnop_read_args {
1427 struct vnode *a_vp;
1428 struct uio *a_uio;
1429 int a_ioflag;
1430 struct ucred *a_cred;
1431 };
1432 */
1433 static int
1434 fuse_vnop_read(struct vop_read_args *ap)
1435 {
1436 struct vnode *vp = ap->a_vp;
1437 struct uio *uio = ap->a_uio;
1438 int ioflag = ap->a_ioflag;
1439 struct ucred *cred = ap->a_cred;
1440 pid_t pid = curthread->td_proc->p_pid;
1441
1442 if (fuse_isdeadfs(vp)) {
1443 return ENXIO;
1444 }
1445
1446 if (VTOFUD(vp)->flag & FN_DIRECTIO) {
1447 ioflag |= IO_DIRECT;
1448 }
1449
1450 return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
1451 }
1452
1453 /*
1454 struct vnop_readdir_args {
1455 struct vnode *a_vp;
1456 struct uio *a_uio;
1457 struct ucred *a_cred;
1458 int *a_eofflag;
1459 int *a_ncookies;
1460 u_long **a_cookies;
1461 };
1462 */
1463 static int
1464 fuse_vnop_readdir(struct vop_readdir_args *ap)
1465 {
1466 struct vnode *vp = ap->a_vp;
1467 struct uio *uio = ap->a_uio;
1468 struct ucred *cred = ap->a_cred;
1469 struct fuse_filehandle *fufh = NULL;
1470 struct fuse_iov cookediov;
1471 int err = 0;
1472 u_long *cookies;
1473 off_t startoff;
1474 ssize_t tresid;
1475 int ncookies;
1476 bool closefufh = false;
1477 pid_t pid = curthread->td_proc->p_pid;
1478
1479 if (ap->a_eofflag)
1480 *ap->a_eofflag = 0;
1481 if (fuse_isdeadfs(vp)) {
1482 return ENXIO;
1483 }
1484 if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */
1485 (uio_resid(uio) < sizeof(struct dirent))) {
1486 return EINVAL;
1487 }
1488
1489 tresid = uio->uio_resid;
1490 startoff = uio->uio_offset;
1491 err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
1492 if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
1493 /*
1494 * nfsd will do VOP_READDIR without first doing VOP_OPEN. We
1495 * must implicitly open the directory here
1496 */
1497 err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
1498 if (err == 0) {
1499 /*
1500 * When a directory is opened, it must be read from
1501 * the beginning. Hopefully, the "startoff" still
1502 * exists as an offset cookie for the directory.
1503 * If not, it will read the entire directory without
1504 * returning any entries and just return eof.
1505 */
1506 uio->uio_offset = 0;
1507 }
1508 closefufh = true;
1509 }
1510 if (err)
1511 return (err);
1512 if (ap->a_ncookies != NULL) {
1513 ncookies = uio->uio_resid /
1514 (offsetof(struct dirent, d_name) + 4) + 1;
1515 cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
1516 *ap->a_ncookies = ncookies;
1517 *ap->a_cookies = cookies;
1518 } else {
1519 ncookies = 0;
1520 cookies = NULL;
1521 }
1522 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
1523 fiov_init(&cookediov, DIRCOOKEDSIZE);
1524
1525 err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov,
1526 &ncookies, cookies);
1527
1528 fiov_teardown(&cookediov);
1529 if (closefufh)
1530 fuse_filehandle_close(vp, fufh, curthread, cred);
1531
1532 if (ap->a_ncookies != NULL) {
1533 if (err == 0) {
1534 *ap->a_ncookies -= ncookies;
1535 } else {
1536 free(*ap->a_cookies, M_TEMP);
1537 *ap->a_ncookies = 0;
1538 *ap->a_cookies = NULL;
1539 }
1540 }
1541 if (err == 0 && tresid == uio->uio_resid)
1542 *ap->a_eofflag = 1;
1543
1544 return err;
1545 }
1546
1547 /*
1548 struct vnop_readlink_args {
1549 struct vnode *a_vp;
1550 struct uio *a_uio;
1551 struct ucred *a_cred;
1552 };
1553 */
1554 static int
1555 fuse_vnop_readlink(struct vop_readlink_args *ap)
1556 {
1557 struct vnode *vp = ap->a_vp;
1558 struct uio *uio = ap->a_uio;
1559 struct ucred *cred = ap->a_cred;
1560
1561 struct fuse_dispatcher fdi;
1562 int err;
1563
1564 if (fuse_isdeadfs(vp)) {
1565 return ENXIO;
1566 }
1567 if (!vnode_islnk(vp)) {
1568 return EINVAL;
1569 }
1570 fdisp_init(&fdi, 0);
1571 err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred);
1572 if (err) {
1573 goto out;
1574 }
1575 if (((char *)fdi.answ)[0] == '/' &&
1576 fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) {
1577 char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname;
1578
1579 err = uiomove(mpth, strlen(mpth), uio);
1580 }
1581 if (!err) {
1582 err = uiomove(fdi.answ, fdi.iosize, uio);
1583 }
1584 out:
1585 fdisp_destroy(&fdi);
1586 return err;
1587 }
1588
1589 /*
1590 struct vnop_reclaim_args {
1591 struct vnode *a_vp;
1592 struct thread *a_td;
1593 };
1594 */
1595 static int
1596 fuse_vnop_reclaim(struct vop_reclaim_args *ap)
1597 {
1598 struct vnode *vp = ap->a_vp;
1599 struct thread *td = ap->a_td;
1600 struct fuse_vnode_data *fvdat = VTOFUD(vp);
1601 struct fuse_filehandle *fufh, *fufh_tmp;
1602
1603 if (!fvdat) {
1604 panic("FUSE: no vnode data during recycling");
1605 }
1606 LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
1607 printf("FUSE: vnode being reclaimed with open fufh "
1608 "(type=%#x)", fufh->fufh_type);
1609 fuse_filehandle_close(vp, fufh, td, NULL);
1610 }
1611
1612 if (VTOI(vp) == 1) {
1613 /*
1614 * Don't send FUSE_FORGET for the root inode, because
1615 * we never send FUSE_LOOKUP for it (see
1616 * fuse_vfsop_root) and we don't want the server to see
1617 * mismatched lookup counts.
1618 */
1619 struct fuse_data *data;
1620 struct vnode *vroot;
1621
1622 data = fuse_get_mpdata(vnode_mount(vp));
1623 FUSE_LOCK();
1624 vroot = data->vroot;
1625 data->vroot = NULL;
1626 FUSE_UNLOCK();
1627 if (vroot)
1628 vrele(vroot);
1629 } else if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) {
1630 fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp),
1631 fvdat->nlookup);
1632 }
1633 cache_purge(vp);
1634 vfs_hash_remove(vp);
1635 vnode_destroy_vobject(vp);
1636 fuse_vnode_destroy(vp);
1637
1638 return 0;
1639 }
1640
1641 /*
1642 struct vnop_remove_args {
1643 struct vnode *a_dvp;
1644 struct vnode *a_vp;
1645 struct componentname *a_cnp;
1646 };
1647 */
1648 static int
1649 fuse_vnop_remove(struct vop_remove_args *ap)
1650 {
1651 struct vnode *dvp = ap->a_dvp;
1652 struct vnode *vp = ap->a_vp;
1653 struct componentname *cnp = ap->a_cnp;
1654
1655 int err;
1656
1657 if (fuse_isdeadfs(vp)) {
1658 return ENXIO;
1659 }
1660 if (vnode_isdir(vp)) {
1661 return EPERM;
1662 }
1663
1664 err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
1665
1666 return err;
1667 }
1668
1669 /*
1670 struct vnop_rename_args {
1671 struct vnode *a_fdvp;
1672 struct vnode *a_fvp;
1673 struct componentname *a_fcnp;
1674 struct vnode *a_tdvp;
1675 struct vnode *a_tvp;
1676 struct componentname *a_tcnp;
1677 };
1678 */
1679 static int
1680 fuse_vnop_rename(struct vop_rename_args *ap)
1681 {
1682 struct vnode *fdvp = ap->a_fdvp;
1683 struct vnode *fvp = ap->a_fvp;
1684 struct componentname *fcnp = ap->a_fcnp;
1685 struct vnode *tdvp = ap->a_tdvp;
1686 struct vnode *tvp = ap->a_tvp;
1687 struct componentname *tcnp = ap->a_tcnp;
1688 struct fuse_data *data;
1689 bool newparent = fdvp != tdvp;
1690 bool isdir = fvp->v_type == VDIR;
1691 int err = 0;
1692
1693 if (fuse_isdeadfs(fdvp)) {
1694 return ENXIO;
1695 }
1696 if (fvp->v_mount != tdvp->v_mount ||
1697 (tvp && fvp->v_mount != tvp->v_mount)) {
1698 SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
1699 err = EXDEV;
1700 goto out;
1701 }
1702 cache_purge(fvp);
1703
1704 /*
1705 * FUSE library is expected to check if target directory is not
1706 * under the source directory in the file system tree.
1707 * Linux performs this check at VFS level.
1708 */
1709 /*
1710 * If source is a directory, and it will get a new parent, user must
1711 * have write permission to it, so ".." can be modified.
1712 */
1713 data = fuse_get_mpdata(vnode_mount(tdvp));
1714 if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
1715 err = fuse_internal_access(fvp, VWRITE,
1716 tcnp->cn_thread, tcnp->cn_cred);
1717 if (err)
1718 goto out;
1719 }
1720 sx_xlock(&data->rename_lock);
1721 err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
1722 if (err == 0) {
1723 if (tdvp != fdvp)
1724 fuse_vnode_setparent(fvp, tdvp);
1725 if (tvp != NULL)
1726 fuse_vnode_setparent(tvp, NULL);
1727 }
1728 sx_unlock(&data->rename_lock);
1729
1730 if (tvp != NULL && tvp != fvp) {
1731 cache_purge(tvp);
1732 }
1733 if (vnode_isdir(fvp)) {
1734 if (((tvp != NULL) && vnode_isdir(tvp)) || vnode_isdir(fvp)) {
1735 cache_purge(tdvp);
1736 }
1737 cache_purge(fdvp);
1738 }
1739 out:
1740 if (tdvp == tvp) {
1741 vrele(tdvp);
1742 } else {
1743 vput(tdvp);
1744 }
1745 if (tvp != NULL) {
1746 vput(tvp);
1747 }
1748 vrele(fdvp);
1749 vrele(fvp);
1750
1751 return err;
1752 }
1753
1754 /*
1755 struct vnop_rmdir_args {
1756 struct vnode *a_dvp;
1757 struct vnode *a_vp;
1758 struct componentname *a_cnp;
1759 } *ap;
1760 */
1761 static int
1762 fuse_vnop_rmdir(struct vop_rmdir_args *ap)
1763 {
1764 struct vnode *dvp = ap->a_dvp;
1765 struct vnode *vp = ap->a_vp;
1766
1767 int err;
1768
1769 if (fuse_isdeadfs(vp)) {
1770 return ENXIO;
1771 }
1772 if (VTOFUD(vp) == VTOFUD(dvp)) {
1773 return EINVAL;
1774 }
1775 err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
1776
1777 return err;
1778 }
1779
1780 /*
1781 struct vnop_setattr_args {
1782 struct vnode *a_vp;
1783 struct vattr *a_vap;
1784 struct ucred *a_cred;
1785 struct thread *a_td;
1786 };
1787 */
1788 static int
1789 fuse_vnop_setattr(struct vop_setattr_args *ap)
1790 {
1791 struct vnode *vp = ap->a_vp;
1792 struct vattr *vap = ap->a_vap;
1793 struct ucred *cred = ap->a_cred;
1794 struct thread *td = curthread;
1795 struct mount *mp;
1796 struct fuse_data *data;
1797 struct vattr old_va;
1798 int dataflags;
1799 int err = 0, err2;
1800 accmode_t accmode = 0;
1801 bool checkperm;
1802 bool drop_suid = false;
1803 gid_t cr_gid;
1804
1805 mp = vnode_mount(vp);
1806 data = fuse_get_mpdata(mp);
1807 dataflags = data->dataflags;
1808 checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
1809 if (cred->cr_ngroups > 0)
1810 cr_gid = cred->cr_groups[0];
1811 else
1812 cr_gid = 0;
1813
1814 if (fuse_isdeadfs(vp)) {
1815 return ENXIO;
1816 }
1817
1818 if (vap->va_uid != (uid_t)VNOVAL) {
1819 if (checkperm) {
1820 /* Only root may change a file's owner */
1821 err = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
1822 if (err) {
1823 /* As a special case, allow the null chown */
1824 err2 = fuse_internal_getattr(vp, &old_va, cred,
1825 td);
1826 if (err2)
1827 return (err2);
1828 if (vap->va_uid != old_va.va_uid)
1829 return err;
1830 else
1831 accmode |= VADMIN;
1832 drop_suid = true;
1833 } else
1834 accmode |= VADMIN;
1835 } else
1836 accmode |= VADMIN;
1837 }
1838 if (vap->va_gid != (gid_t)VNOVAL) {
1839 if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN, 0))
1840 drop_suid = true;
1841 if (checkperm && !groupmember(vap->va_gid, cred))
1842 {
1843 /*
1844 * Non-root users may only chgrp to one of their own
1845 * groups
1846 */
1847 err = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
1848 if (err) {
1849 /* As a special case, allow the null chgrp */
1850 err2 = fuse_internal_getattr(vp, &old_va, cred,
1851 td);
1852 if (err2)
1853 return (err2);
1854 if (vap->va_gid != old_va.va_gid)
1855 return err;
1856 accmode |= VADMIN;
1857 } else
1858 accmode |= VADMIN;
1859 } else
1860 accmode |= VADMIN;
1861 }
1862 if (vap->va_size != VNOVAL) {
1863 switch (vp->v_type) {
1864 case VDIR:
1865 return (EISDIR);
1866 case VLNK:
1867 case VREG:
1868 if (vfs_isrdonly(mp))
1869 return (EROFS);
1870 break;
1871 default:
1872 /*
1873 * According to POSIX, the result is unspecified
1874 * for file types other than regular files,
1875 * directories and shared memory objects. We
1876 * don't support shared memory objects in the file
1877 * system, and have dubious support for truncating
1878 * symlinks. Just ignore the request in other cases.
1879 */
1880 return (0);
1881 }
1882 /* Don't set accmode. Permission to trunc is checked upstack */
1883 }
1884 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1885 if (vap->va_vaflags & VA_UTIMES_NULL)
1886 accmode |= VWRITE;
1887 else
1888 accmode |= VADMIN;
1889 }
1890 if (drop_suid) {
1891 if (vap->va_mode != (mode_t)VNOVAL)
1892 vap->va_mode &= ~(S_ISUID | S_ISGID);
1893 else {
1894 err = fuse_internal_getattr(vp, &old_va, cred, td);
1895 if (err)
1896 return (err);
1897 vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID);
1898 }
1899 }
1900 if (vap->va_mode != (mode_t)VNOVAL) {
1901 /* Only root may set the sticky bit on non-directories */
1902 if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT)
1903 && priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
1904 return EFTYPE;
1905 if (checkperm && (vap->va_mode & S_ISGID)) {
1906 err = fuse_internal_getattr(vp, &old_va, cred, td);
1907 if (err)
1908 return (err);
1909 if (!groupmember(old_va.va_gid, cred)) {
1910 err = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
1911 if (err)
1912 return (err);
1913 }
1914 }
1915 accmode |= VADMIN;
1916 }
1917
1918 if (vfs_isrdonly(mp))
1919 return EROFS;
1920
1921 if (checkperm) {
1922 err = fuse_internal_access(vp, accmode, td, cred);
1923 } else {
1924 err = 0;
1925 }
1926 if (err)
1927 return err;
1928 else
1929 return fuse_internal_setattr(vp, vap, td, cred);
1930 }
1931
1932 /*
1933 struct vnop_strategy_args {
1934 struct vnode *a_vp;
1935 struct buf *a_bp;
1936 };
1937 */
1938 static int
1939 fuse_vnop_strategy(struct vop_strategy_args *ap)
1940 {
1941 struct vnode *vp = ap->a_vp;
1942 struct buf *bp = ap->a_bp;
1943
1944 if (!vp || fuse_isdeadfs(vp)) {
1945 bp->b_ioflags |= BIO_ERROR;
1946 bp->b_error = ENXIO;
1947 bufdone(bp);
1948 return 0;
1949 }
1950
1951 /*
1952 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags.
1953 * fuse_io_strategy sets bp's error fields
1954 */
1955 (void)fuse_io_strategy(vp, bp);
1956
1957 return 0;
1958 }
1959
1960
1961 /*
1962 struct vnop_symlink_args {
1963 struct vnode *a_dvp;
1964 struct vnode **a_vpp;
1965 struct componentname *a_cnp;
1966 struct vattr *a_vap;
1967 char *a_target;
1968 };
1969 */
1970 static int
1971 fuse_vnop_symlink(struct vop_symlink_args *ap)
1972 {
1973 struct vnode *dvp = ap->a_dvp;
1974 struct vnode **vpp = ap->a_vpp;
1975 struct componentname *cnp = ap->a_cnp;
1976 char *target = ap->a_target;
1977
1978 struct fuse_dispatcher fdi;
1979
1980 int err;
1981 size_t len;
1982
1983 if (fuse_isdeadfs(dvp)) {
1984 return ENXIO;
1985 }
1986 /*
1987 * Unlike the other creator type calls, here we have to create a message
1988 * where the name of the new entry comes first, and the data describing
1989 * the entry comes second.
1990 * Hence we can't rely on our handy fuse_internal_newentry() routine,
1991 * but put together the message manually and just call the core part.
1992 */
1993
1994 len = strlen(target) + 1;
1995 fdisp_init(&fdi, len + cnp->cn_namelen + 1);
1996 fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL);
1997
1998 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
1999 ((char *)fdi.indata)[cnp->cn_namelen] = '\0';
2000 memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len);
2001
2002 err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi);
2003 fdisp_destroy(&fdi);
2004 return err;
2005 }
2006
2007 /*
2008 struct vnop_write_args {
2009 struct vnode *a_vp;
2010 struct uio *a_uio;
2011 int a_ioflag;
2012 struct ucred *a_cred;
2013 };
2014 */
2015 static int
2016 fuse_vnop_write(struct vop_write_args *ap)
2017 {
2018 struct vnode *vp = ap->a_vp;
2019 struct uio *uio = ap->a_uio;
2020 int ioflag = ap->a_ioflag;
2021 struct ucred *cred = ap->a_cred;
2022 pid_t pid = curthread->td_proc->p_pid;
2023
2024 if (fuse_isdeadfs(vp)) {
2025 return ENXIO;
2026 }
2027
2028 if (VTOFUD(vp)->flag & FN_DIRECTIO) {
2029 ioflag |= IO_DIRECT;
2030 }
2031
2032 return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
2033 }
2034
2035 static daddr_t
2036 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
2037 {
2038 const int biosize = fuse_iosize(vp);
2039
2040 return (off / biosize);
2041 }
2042
2043 static int
2044 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *blksz)
2045 {
2046 off_t filesize;
2047 int err;
2048 const int biosize = fuse_iosize(vp);
2049
2050 err = fuse_vnode_size(vp, &filesize, NULL, NULL);
2051 if (err) {
2052 /* This will turn into a SIGBUS */
2053 return (EIO);
2054 } else if ((off_t)lbn * biosize >= filesize) {
2055 *blksz = 0;
2056 } else if ((off_t)(lbn + 1) * biosize > filesize) {
2057 *blksz = filesize - (off_t)lbn *biosize;
2058 } else {
2059 *blksz = biosize;
2060 }
2061 return (0);
2062 }
2063
2064 /*
2065 struct vnop_getpages_args {
2066 struct vnode *a_vp;
2067 vm_page_t *a_m;
2068 int a_count;
2069 int a_reqpage;
2070 };
2071 */
2072 static int
2073 fuse_vnop_getpages(struct vop_getpages_args *ap)
2074 {
2075 struct vnode *vp = ap->a_vp;
2076
2077 if (!fsess_opt_mmap(vnode_mount(vp))) {
2078 SDT_PROBE2(fusefs, , vnops, trace, 1,
2079 "called on non-cacheable vnode??\n");
2080 return (VM_PAGER_ERROR);
2081 }
2082
2083 return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
2084 ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz));
2085 }
2086
2087 static const char extattr_namespace_separator = '.';
2088
2089 /*
2090 struct vop_getextattr_args {
2091 struct vop_generic_args a_gen;
2092 struct vnode *a_vp;
2093 int a_attrnamespace;
2094 const char *a_name;
2095 struct uio *a_uio;
2096 size_t *a_size;
2097 struct ucred *a_cred;
2098 struct thread *a_td;
2099 };
2100 */
2101 static int
2102 fuse_vnop_getextattr(struct vop_getextattr_args *ap)
2103 {
2104 struct vnode *vp = ap->a_vp;
2105 struct uio *uio = ap->a_uio;
2106 struct fuse_dispatcher fdi;
2107 struct fuse_getxattr_in *get_xattr_in;
2108 struct fuse_getxattr_out *get_xattr_out;
2109 struct mount *mp = vnode_mount(vp);
2110 struct thread *td = ap->a_td;
2111 struct ucred *cred = ap->a_cred;
2112 char *prefix;
2113 char *attr_str;
2114 size_t len;
2115 int err;
2116
2117 if (fuse_isdeadfs(vp))
2118 return (ENXIO);
2119
2120 if (!fsess_isimpl(mp, FUSE_GETXATTR))
2121 return EOPNOTSUPP;
2122
2123 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
2124 if (err)
2125 return err;
2126
2127 /* Default to looking for user attributes. */
2128 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2129 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2130 else
2131 prefix = EXTATTR_NAMESPACE_USER_STRING;
2132
2133 len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2134 strlen(ap->a_name) + 1;
2135
2136 fdisp_init(&fdi, len + sizeof(*get_xattr_in));
2137 fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred);
2138
2139 get_xattr_in = fdi.indata;
2140 /*
2141 * Check to see whether we're querying the available size or
2142 * issuing the actual request. If we pass in 0, we get back struct
2143 * fuse_getxattr_out. If we pass in a non-zero size, we get back
2144 * that much data, without the struct fuse_getxattr_out header.
2145 */
2146 if (uio == NULL)
2147 get_xattr_in->size = 0;
2148 else
2149 get_xattr_in->size = uio->uio_resid;
2150
2151 attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
2152 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2153 ap->a_name);
2154
2155 err = fdisp_wait_answ(&fdi);
2156 if (err != 0) {
2157 if (err == ENOSYS) {
2158 fsess_set_notimpl(mp, FUSE_GETXATTR);
2159 err = EOPNOTSUPP;
2160 }
2161 goto out;
2162 }
2163
2164 get_xattr_out = fdi.answ;
2165
2166 if (ap->a_size != NULL)
2167 *ap->a_size = get_xattr_out->size;
2168
2169 if (uio != NULL)
2170 err = uiomove(fdi.answ, fdi.iosize, uio);
2171
2172 out:
2173 fdisp_destroy(&fdi);
2174 return (err);
2175 }
2176
2177 /*
2178 struct vop_setextattr_args {
2179 struct vop_generic_args a_gen;
2180 struct vnode *a_vp;
2181 int a_attrnamespace;
2182 const char *a_name;
2183 struct uio *a_uio;
2184 struct ucred *a_cred;
2185 struct thread *a_td;
2186 };
2187 */
2188 static int
2189 fuse_vnop_setextattr(struct vop_setextattr_args *ap)
2190 {
2191 struct vnode *vp = ap->a_vp;
2192 struct uio *uio = ap->a_uio;
2193 struct fuse_dispatcher fdi;
2194 struct fuse_setxattr_in *set_xattr_in;
2195 struct mount *mp = vnode_mount(vp);
2196 struct thread *td = ap->a_td;
2197 struct ucred *cred = ap->a_cred;
2198 char *prefix;
2199 size_t len;
2200 char *attr_str;
2201 int err;
2202
2203 if (fuse_isdeadfs(vp))
2204 return (ENXIO);
2205
2206 if (!fsess_isimpl(mp, FUSE_SETXATTR))
2207 return EOPNOTSUPP;
2208
2209 if (vfs_isrdonly(mp))
2210 return EROFS;
2211
2212 /* Deleting xattrs must use VOP_DELETEEXTATTR instead */
2213 if (ap->a_uio == NULL) {
2214 /*
2215 * If we got here as fallback from VOP_DELETEEXTATTR, then
2216 * return EOPNOTSUPP.
2217 */
2218 if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
2219 return (EOPNOTSUPP);
2220 else
2221 return (EINVAL);
2222 }
2223
2224 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
2225 VWRITE);
2226 if (err)
2227 return err;
2228
2229 /* Default to looking for user attributes. */
2230 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2231 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2232 else
2233 prefix = EXTATTR_NAMESPACE_USER_STRING;
2234
2235 len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2236 strlen(ap->a_name) + 1;
2237
2238 fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
2239 fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
2240
2241 set_xattr_in = fdi.indata;
2242 set_xattr_in->size = uio->uio_resid;
2243
2244 attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
2245 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2246 ap->a_name);
2247
2248 err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
2249 uio->uio_resid, uio);
2250 if (err != 0) {
2251 goto out;
2252 }
2253
2254 err = fdisp_wait_answ(&fdi);
2255
2256 if (err == ENOSYS) {
2257 fsess_set_notimpl(mp, FUSE_SETXATTR);
2258 err = EOPNOTSUPP;
2259 }
2260 if (err == ERESTART) {
2261 /* Can't restart after calling uiomove */
2262 err = EINTR;
2263 }
2264
2265 out:
2266 fdisp_destroy(&fdi);
2267 return (err);
2268 }
2269
2270 /*
2271 * The Linux / FUSE extended attribute list is simply a collection of
2272 * NUL-terminated strings. The FreeBSD extended attribute list is a single
2273 * byte length followed by a non-NUL terminated string. So, this allows
2274 * conversion of the Linux / FUSE format to the FreeBSD format in place.
2275 * Linux attribute names are reported with the namespace as a prefix (e.g.
2276 * "user.attribute_name"), but in FreeBSD they are reported without the
2277 * namespace prefix (e.g. "attribute_name"). So, we're going from:
2278 *
2279 * user.attr_name1\0user.attr_name2\0
2280 *
2281 * to:
2282 *
2283 * <num>attr_name1<num>attr_name2
2284 *
2285 * Where "<num>" is a single byte number of characters in the attribute name.
2286 *
2287 * Args:
2288 * prefix - exattr namespace prefix string
2289 * list, list_len - input list with namespace prefixes
2290 * bsd_list, bsd_list_len - output list compatible with bsd vfs
2291 */
2292 static int
2293 fuse_xattrlist_convert(char *prefix, const char *list, int list_len,
2294 char *bsd_list, int *bsd_list_len)
2295 {
2296 int len, pos, dist_to_next, prefix_len;
2297
2298 pos = 0;
2299 *bsd_list_len = 0;
2300 prefix_len = strlen(prefix);
2301
2302 while (pos < list_len && list[pos] != '\0') {
2303 dist_to_next = strlen(&list[pos]) + 1;
2304 if (bcmp(&list[pos], prefix, prefix_len) == 0 &&
2305 list[pos + prefix_len] == extattr_namespace_separator) {
2306 len = dist_to_next -
2307 (prefix_len + sizeof(extattr_namespace_separator)) - 1;
2308 if (len >= EXTATTR_MAXNAMELEN)
2309 return (ENAMETOOLONG);
2310
2311 bsd_list[*bsd_list_len] = len;
2312 memcpy(&bsd_list[*bsd_list_len + 1],
2313 &list[pos + prefix_len +
2314 sizeof(extattr_namespace_separator)], len);
2315
2316 *bsd_list_len += len + 1;
2317 }
2318
2319 pos += dist_to_next;
2320 }
2321
2322 return (0);
2323 }
2324
2325 /*
2326 * List extended attributes
2327 *
2328 * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which
2329 * has a number of differences compared to its FreeBSD equivalent,
2330 * extattr_list_file:
2331 *
2332 * - FUSE_LISTXATTR returns all extended attributes across all namespaces,
2333 * whereas listxattr(2) only returns attributes for a single namespace
2334 * - FUSE_LISTXATTR prepends each attribute name with "namespace."
2335 * - If the provided buffer is not large enough to hold the result,
2336 * FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to
2337 * return as many results as will fit.
2338 */
2339 /*
2340 struct vop_listextattr_args {
2341 struct vop_generic_args a_gen;
2342 struct vnode *a_vp;
2343 int a_attrnamespace;
2344 struct uio *a_uio;
2345 size_t *a_size;
2346 struct ucred *a_cred;
2347 struct thread *a_td;
2348 };
2349 */
2350 static int
2351 fuse_vnop_listextattr(struct vop_listextattr_args *ap)
2352 {
2353 struct vnode *vp = ap->a_vp;
2354 struct uio *uio = ap->a_uio;
2355 struct fuse_dispatcher fdi;
2356 struct fuse_listxattr_in *list_xattr_in;
2357 struct fuse_listxattr_out *list_xattr_out;
2358 struct mount *mp = vnode_mount(vp);
2359 struct thread *td = ap->a_td;
2360 struct ucred *cred = ap->a_cred;
2361 char *prefix;
2362 char *bsd_list = NULL;
2363 char *linux_list;
2364 int bsd_list_len;
2365 int linux_list_len;
2366 int err;
2367
2368 if (fuse_isdeadfs(vp))
2369 return (ENXIO);
2370
2371 if (!fsess_isimpl(mp, FUSE_LISTXATTR))
2372 return EOPNOTSUPP;
2373
2374 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
2375 if (err)
2376 return err;
2377
2378 /*
2379 * Add space for a NUL and the period separator if enabled.
2380 * Default to looking for user attributes.
2381 */
2382 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2383 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2384 else
2385 prefix = EXTATTR_NAMESPACE_USER_STRING;
2386
2387 fdisp_init(&fdi, sizeof(*list_xattr_in));
2388 fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
2389
2390 /*
2391 * Retrieve Linux / FUSE compatible list size.
2392 */
2393 list_xattr_in = fdi.indata;
2394 list_xattr_in->size = 0;
2395
2396 err = fdisp_wait_answ(&fdi);
2397 if (err != 0) {
2398 if (err == ENOSYS) {
2399 fsess_set_notimpl(mp, FUSE_LISTXATTR);
2400 err = EOPNOTSUPP;
2401 }
2402 goto out;
2403 }
2404
2405 list_xattr_out = fdi.answ;
2406 linux_list_len = list_xattr_out->size;
2407 if (linux_list_len == 0) {
2408 if (ap->a_size != NULL)
2409 *ap->a_size = linux_list_len;
2410 goto out;
2411 }
2412
2413 /*
2414 * Retrieve Linux / FUSE compatible list values.
2415 */
2416 fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
2417 list_xattr_in = fdi.indata;
2418 list_xattr_in->size = linux_list_len;
2419
2420 err = fdisp_wait_answ(&fdi);
2421 if (err == ERANGE) {
2422 /*
2423 * Race detected. The attribute list must've grown since the
2424 * first FUSE_LISTXATTR call. Start over. Go all the way back
2425 * to userland so we can process signals, if necessary, before
2426 * restarting.
2427 */
2428 err = ERESTART;
2429 goto out;
2430 } else if (err != 0)
2431 goto out;
2432
2433 linux_list = fdi.answ;
2434 /* FUSE doesn't allow the server to return more data than requested */
2435 if (fdi.iosize > linux_list_len) {
2436 struct fuse_data *data = fuse_get_mpdata(mp);
2437
2438 fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG,
2439 "server returned "
2440 "more extended attribute data than requested; "
2441 "should've returned ERANGE instead.");
2442 } else {
2443 /* But returning less data is fine */
2444 linux_list_len = fdi.iosize;
2445 }
2446
2447 /*
2448 * Retrieve the BSD compatible list values.
2449 * The Linux / FUSE attribute list format isn't the same
2450 * as FreeBSD's format. So we need to transform it into
2451 * FreeBSD's format before giving it to the user.
2452 */
2453 bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK);
2454 err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len,
2455 bsd_list, &bsd_list_len);
2456 if (err != 0)
2457 goto out;
2458
2459 if (ap->a_size != NULL)
2460 *ap->a_size = bsd_list_len;
2461
2462 if (uio != NULL)
2463 err = uiomove(bsd_list, bsd_list_len, uio);
2464
2465 out:
2466 free(bsd_list, M_TEMP);
2467 fdisp_destroy(&fdi);
2468 return (err);
2469 }
2470
2471 /*
2472 struct vop_deleteextattr_args {
2473 struct vop_generic_args a_gen;
2474 struct vnode *a_vp;
2475 int a_attrnamespace;
2476 const char *a_name;
2477 struct ucred *a_cred;
2478 struct thread *a_td;
2479 };
2480 */
2481 static int
2482 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
2483 {
2484 struct vnode *vp = ap->a_vp;
2485 struct fuse_dispatcher fdi;
2486 struct mount *mp = vnode_mount(vp);
2487 struct thread *td = ap->a_td;
2488 struct ucred *cred = ap->a_cred;
2489 char *prefix;
2490 size_t len;
2491 char *attr_str;
2492 int err;
2493
2494 if (fuse_isdeadfs(vp))
2495 return (ENXIO);
2496
2497 if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
2498 return EOPNOTSUPP;
2499
2500 if (vfs_isrdonly(mp))
2501 return EROFS;
2502
2503 err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
2504 VWRITE);
2505 if (err)
2506 return err;
2507
2508 /* Default to looking for user attributes. */
2509 if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2510 prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2511 else
2512 prefix = EXTATTR_NAMESPACE_USER_STRING;
2513
2514 len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2515 strlen(ap->a_name) + 1;
2516
2517 fdisp_init(&fdi, len);
2518 fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred);
2519
2520 attr_str = fdi.indata;
2521 snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2522 ap->a_name);
2523
2524 err = fdisp_wait_answ(&fdi);
2525 if (err == ENOSYS) {
2526 fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
2527 err = EOPNOTSUPP;
2528 }
2529
2530 fdisp_destroy(&fdi);
2531 return (err);
2532 }
2533
2534 /*
2535 struct vnop_print_args {
2536 struct vnode *a_vp;
2537 };
2538 */
2539 static int
2540 fuse_vnop_print(struct vop_print_args *ap)
2541 {
2542 struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
2543
2544 printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n",
2545 (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid,
2546 (uintmax_t)fvdat->nlookup,
2547 fvdat->flag);
2548
2549 return 0;
2550 }
2551
2552 /*
2553 * Get an NFS filehandle for a FUSE file.
2554 *
2555 * This will only work for FUSE file systems that guarantee the uniqueness of
2556 * nodeid:generation, which most don't.
2557 */
2558 /*
2559 vop_vptofh {
2560 IN struct vnode *a_vp;
2561 IN struct fid *a_fhp;
2562 };
2563 */
2564 static int
2565 fuse_vnop_vptofh(struct vop_vptofh_args *ap)
2566 {
2567 struct vnode *vp = ap->a_vp;
2568 struct fuse_vnode_data *fvdat = VTOFUD(vp);
2569 struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp);
2570 _Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid),
2571 "FUSE fid type is too big");
2572 struct mount *mp = vnode_mount(vp);
2573 struct fuse_data *data = fuse_get_mpdata(mp);
2574 struct vattr va;
2575 int err;
2576
2577 if (!(data->dataflags & FSESS_EXPORT_SUPPORT))
2578 return EOPNOTSUPP;
2579
2580 err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
2581 if (err)
2582 return err;
2583
2584 /*ip = VTOI(ap->a_vp);*/
2585 /*ufhp = (struct ufid *)ap->a_fhp;*/
2586 fhp->len = sizeof(struct fuse_fid);
2587 fhp->nid = fvdat->nid;
2588 if (fvdat->generation <= UINT32_MAX)
2589 fhp->gen = fvdat->generation;
2590 else
2591 return EOVERFLOW;
2592 return (0);
2593 }
2594
2595
Cache object: 8c54bb44fa6ee7e39c619111bbc28182
|