FreeBSD/Linux Kernel Cross Reference
sys/dev/vn/vn.c
1 /*
2 * Copyright (c) 1988 University of Utah.
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah Hdr: vn.c 1.13 94/04/02
39 *
40 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
41 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.41.2.4 1999/09/05 08:09:30 peter Exp $
42 */
43
44 /*
45 * Vnode disk driver.
46 *
47 * Block/character interface to a vnode. Allows one to treat a file
48 * as a disk (e.g. build a filesystem in it, mount it, etc.).
49 *
50 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
51 * instead of a simple VOP_RDWR. We do this to avoid distorting the
52 * local buffer cache.
53 *
54 * NOTE 2: There is a security issue involved with this driver.
55 * Once mounted all access to the contents of the "mapped" file via
56 * the special file is controlled by the permissions on the special
57 * file, the protection of the mapped file is ignored (effectively,
58 * by using root credentials in all transactions).
59 *
60 * NOTE 3: Doesn't interact with leases, should it?
61 */
62 #include "vn.h"
63 #if NVN > 0
64
65 /* default is to have 8 VN's */
66 #if NVN < 8
67 #undef NVN
68 #define NVN 8
69 #endif
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/kernel.h>
74 #include <sys/namei.h>
75 #include <sys/proc.h>
76 #include <sys/errno.h>
77 #include <sys/buf.h>
78 #include <sys/malloc.h>
79 #include <sys/ioctl.h>
80 #include <sys/mount.h>
81 #include <sys/vnode.h>
82 #include <sys/file.h>
83 #include <sys/uio.h>
84 #include <sys/disklabel.h>
85 #include <sys/diskslice.h>
86 #include <sys/stat.h>
87 #include <sys/conf.h>
88 #ifdef DEVFS
89 #include <sys/devfsext.h>
90 #endif /*DEVFS*/
91
92 #include <miscfs/specfs/specdev.h>
93
94 #include <sys/vnioctl.h>
95
96 static d_open_t vnopen;
97 static d_close_t vnclose;
98 static d_ioctl_t vnioctl;
99 static d_dump_t vndump;
100 static d_psize_t vnsize;
101 static d_strategy_t vnstrategy;
102
103 #define CDEV_MAJOR 43
104 #define BDEV_MAJOR 15
105 static struct cdevsw vn_cdevsw;
106 static struct bdevsw vn_bdevsw =
107 { vnopen, vnclose, vnstrategy, vnioctl, /*15*/
108 vndump, vnsize, 0, "vn", &vn_cdevsw, -1 };
109
110
111 #ifdef DEBUG
112 int dovncluster = 1;
113 int vndebug = 0x00;
114 #define VDB_FOLLOW 0x01
115 #define VDB_INIT 0x02
116 #define VDB_IO 0x04
117 #endif
118
119 #define vnunit(dev) dkunit(dev)
120
121 #define getvnbuf() \
122 ((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
123
124 #define putvnbuf(bp) \
125 free((caddr_t)(bp), M_DEVBUF)
126
127 struct vn_softc {
128 int sc_flags; /* flags */
129 size_t sc_size; /* size of vn */
130 #if defined(DEVFS) && defined(notyet)
131 void *sc_bdev; /* devfs token for whole disk */
132 void *sc_cdev; /* devfs token for raw whole disk */
133 #endif
134 struct vnode *sc_vp; /* vnode */
135 struct ucred *sc_cred; /* credentials */
136 int sc_maxactive; /* max # of active requests */
137 struct buf sc_tab; /* transfer queue */
138 u_long sc_options; /* options */
139 struct diskslices *sc_slices;
140 };
141
142 /* sc_flags */
143 #define VNF_INITED 0x01
144
145 static struct vn_softc *vn_softc[NVN];
146 static u_long vn_options;
147
148 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
149
150 static void vniodone (struct buf *bp);
151 static int vnsetcred (struct vn_softc *vn, struct ucred *cred);
152 static void vnshutdown (int, void *);
153 static void vnclear (struct vn_softc *vn);
154
155 static int
156 vnclose(dev_t dev, int flags, int mode, struct proc *p)
157 {
158 struct vn_softc *vn = vn_softc[vnunit(dev)];
159
160 IFOPT(vn, VN_LABELS)
161 if (vn->sc_slices != NULL)
162 dsclose(dev, mode, vn->sc_slices);
163 return (0);
164 }
165
166 static int
167 vnopen(dev_t dev, int flags, int mode, struct proc *p)
168 {
169 int unit = vnunit(dev);
170 struct vn_softc *vn;
171
172 if (unit >= NVN) {
173 if (vn_options & VN_FOLLOW)
174 printf("vnopen(0x%lx, 0x%x, 0x%x, %p)\n",
175 dev, flags, mode, p);
176 return(ENOENT);
177 }
178
179 vn = vn_softc[unit];
180 if (!vn) {
181 vn = malloc(sizeof *vn, M_DEVBUF, M_WAITOK);
182 if (!vn)
183 return (ENOMEM);
184 bzero(vn, sizeof *vn);
185 vn_softc[unit] = vn;
186 }
187
188 IFOPT(vn, VN_FOLLOW)
189 printf("vnopen(0x%lx, 0x%x, 0x%x, %p)\n", dev, flags, mode, p);
190
191 IFOPT(vn, VN_LABELS) {
192 if (vn->sc_flags & VNF_INITED) {
193 struct disklabel label;
194
195 /* Build label for whole disk. */
196 bzero(&label, sizeof label);
197 label.d_secsize = DEV_BSIZE;
198 label.d_nsectors = 32;
199 label.d_ntracks = 64;
200 label.d_ncylinders = vn->sc_size / (32 * 64);
201 label.d_secpercyl = 32 * 64;
202 label.d_secperunit =
203 label.d_partitions[RAW_PART].p_size =
204 vn->sc_size;
205
206 return (dsopen("vn", dev, mode, &vn->sc_slices, &label,
207 vnstrategy, (ds_setgeom_t *)NULL,
208 &vn_bdevsw, &vn_cdevsw));
209 }
210 if (dkslice(dev) != WHOLE_DISK_SLICE ||
211 dkpart(dev) != RAW_PART ||
212 mode != S_IFCHR)
213 return (ENXIO);
214 }
215 return(0);
216 }
217
218 /*
219 * this code does I/O calls through the appropriate VOP entry point...
220 * unless a swap_pager I/O request is being done. This strategy (-))
221 * allows for coherency with mmap except in the case of paging. This
222 * is necessary, because the VOP calls use lots of memory (and actually
223 * are not extremely efficient -- but we want to keep semantics correct),
224 * and the pageout daemon gets really unhappy (and so does the rest of the
225 * system) when it runs out of memory.
226 */
227 static void
228 vnstrategy(struct buf *bp)
229 {
230 int unit = vnunit(bp->b_dev);
231 register struct vn_softc *vn = vn_softc[unit];
232 register daddr_t bn;
233 int error;
234 int isvplocked = 0;
235 long sz;
236 struct uio auio;
237 struct iovec aiov;
238
239 IFOPT(vn, VN_DEBUG)
240 printf("vnstrategy(%p): unit %d\n", bp, unit);
241
242 if ((vn->sc_flags & VNF_INITED) == 0) {
243 bp->b_error = ENXIO;
244 bp->b_flags |= B_ERROR;
245 biodone(bp);
246 return;
247 }
248 IFOPT(vn, VN_LABELS) {
249 bp->b_resid = bp->b_bcount;/* XXX best place to set this? */
250 if (vn->sc_slices != NULL && dscheck(bp, vn->sc_slices) <= 0) {
251 biodone(bp);
252 return;
253 }
254 bn = bp->b_pblkno;
255 bp->b_resid = bp->b_bcount;/* XXX best place to set this? */
256 } else {
257 bn = bp->b_blkno;
258 sz = howmany(bp->b_bcount, DEV_BSIZE);
259 bp->b_resid = bp->b_bcount;
260 if (bn < 0 || bn + sz > vn->sc_size) {
261 if (bn != vn->sc_size) {
262 bp->b_error = EINVAL;
263 bp->b_flags |= B_ERROR;
264 }
265 biodone(bp);
266 return;
267 }
268 }
269
270 if( (bp->b_flags & B_PAGING) == 0) {
271 aiov.iov_base = bp->b_data;
272 aiov.iov_len = bp->b_bcount;
273 auio.uio_iov = &aiov;
274 auio.uio_iovcnt = 1;
275 auio.uio_offset = dbtob(bn);
276 auio.uio_segflg = UIO_SYSSPACE;
277 if( bp->b_flags & B_READ)
278 auio.uio_rw = UIO_READ;
279 else
280 auio.uio_rw = UIO_WRITE;
281 auio.uio_resid = bp->b_bcount;
282 auio.uio_procp = curproc;
283 if (!VOP_ISLOCKED(vn->sc_vp)) {
284 isvplocked = 1;
285 VOP_LOCK(vn->sc_vp);
286 }
287 if( bp->b_flags & B_READ)
288 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
289 else
290 error = VOP_WRITE(vn->sc_vp, &auio, 0, vn->sc_cred);
291 if (isvplocked) {
292 VOP_UNLOCK(vn->sc_vp);
293 isvplocked = 0;
294 }
295
296 bp->b_resid = auio.uio_resid;
297
298 if( error )
299 bp->b_flags |= B_ERROR;
300 biodone(bp);
301 } else {
302 long bsize, resid;
303 off_t byten;
304 int flags;
305 caddr_t addr;
306 struct buf *nbp;
307
308 nbp = getvnbuf();
309 byten = dbtob(bn);
310 bsize = vn->sc_vp->v_mount->mnt_stat.f_iosize;
311 addr = bp->b_data;
312 flags = bp->b_flags | B_CALL;
313 for (resid = bp->b_resid; resid; ) {
314 struct vnode *vp;
315 daddr_t nbn;
316 int off, s, nra;
317
318 nra = 0;
319 if (!VOP_ISLOCKED(vn->sc_vp)) {
320 isvplocked = 1;
321 VOP_LOCK(vn->sc_vp);
322 }
323 error = VOP_BMAP(vn->sc_vp, (daddr_t)(byten / bsize),
324 &vp, &nbn, &nra, NULL);
325 if (isvplocked) {
326 VOP_UNLOCK(vn->sc_vp);
327 isvplocked = 0;
328 }
329 if (error == 0 && nbn == -1)
330 error = EIO;
331
332 IFOPT(vn, VN_DONTCLUSTER)
333 nra = 0;
334
335 off = byten % bsize;
336 if (off)
337 sz = bsize - off;
338 else
339 sz = (1 + nra) * bsize;
340 if (resid < sz)
341 sz = resid;
342
343 if (error) {
344 bp->b_resid -= (resid - sz);
345 bp->b_flags |= B_ERROR;
346 biodone(bp);
347 putvnbuf(nbp);
348 return;
349 }
350
351 IFOPT(vn,VN_IO)
352 printf(
353 /* XXX no %qx in kernel. Synthesize it. */
354 "vnstrategy: vp %p/%p bn 0x%lx%08lx/0x%lx sz 0x%x\n",
355 vn->sc_vp, vp, (long)(byten >> 32),
356 (u_long)byten, nbn, sz);
357
358 nbp->b_flags = flags;
359 nbp->b_bcount = sz;
360 nbp->b_bufsize = sz;
361 nbp->b_error = 0;
362 if (vp->v_type == VBLK || vp->v_type == VCHR)
363 nbp->b_dev = vp->v_rdev;
364 else
365 nbp->b_dev = NODEV;
366 nbp->b_data = addr;
367 nbp->b_blkno = nbn + btodb(off);
368 nbp->b_proc = bp->b_proc;
369 nbp->b_iodone = vniodone;
370 nbp->b_vp = vp;
371 nbp->b_rcred = vn->sc_cred; /* XXX crdup? */
372 nbp->b_wcred = vn->sc_cred; /* XXX crdup? */
373 nbp->b_dirtyoff = bp->b_dirtyoff;
374 nbp->b_dirtyend = bp->b_dirtyend;
375 nbp->b_validoff = bp->b_validoff;
376 nbp->b_validend = bp->b_validend;
377
378 if ((nbp->b_flags & B_READ) == 0)
379 nbp->b_vp->v_numoutput++;
380
381 VOP_STRATEGY(nbp);
382
383 s = splbio();
384 while ((nbp->b_flags & B_DONE) == 0) {
385 nbp->b_flags |= B_WANTED;
386 tsleep(nbp, PRIBIO, "vnwait", 0);
387 }
388 splx(s);
389
390 if( nbp->b_flags & B_ERROR) {
391 bp->b_flags |= B_ERROR;
392 bp->b_resid -= (resid - sz);
393 biodone(bp);
394 putvnbuf(nbp);
395 return;
396 }
397
398 byten += sz;
399 addr += sz;
400 resid -= sz;
401 }
402 biodone(bp);
403 putvnbuf(nbp);
404 }
405 }
406
407 void
408 vniodone( struct buf *bp) {
409 bp->b_flags |= B_DONE;
410 wakeup((caddr_t) bp);
411 }
412
413 /* ARGSUSED */
414 static int
415 vnioctl(dev_t dev, int cmd, caddr_t data, int flag, struct proc *p)
416 {
417 struct vn_softc *vn = vn_softc[vnunit(dev)];
418 struct vn_ioctl *vio;
419 struct vattr vattr;
420 struct nameidata nd;
421 int error;
422 u_long *f;
423
424
425 IFOPT(vn,VN_FOLLOW)
426 printf("vnioctl(0x%lx, 0x%x, %p, 0x%x, %p): unit %d\n",
427 dev, cmd, data, flag, p, vnunit(dev));
428
429 switch (cmd) {
430 case VNIOCATTACH:
431 case VNIOCDETACH:
432 case VNIOCGSET:
433 case VNIOCGCLEAR:
434 case VNIOCUSET:
435 case VNIOCUCLEAR:
436 goto vn_specific;
437 }
438
439 IFOPT(vn,VN_LABELS) {
440 if (vn->sc_slices != NULL) {
441 error = dsioctl("vn", dev, cmd, data, flag,
442 &vn->sc_slices, vnstrategy,
443 (ds_setgeom_t *)NULL);
444 if (error != -1)
445 return (error);
446 }
447 if (dkslice(dev) != WHOLE_DISK_SLICE ||
448 dkpart(dev) != RAW_PART)
449 return (ENOTTY);
450 }
451
452 vn_specific:
453
454 error = suser(p->p_ucred, &p->p_acflag);
455 if (error)
456 return (error);
457
458 vio = (struct vn_ioctl *)data;
459 f = (u_long*)data;
460 switch (cmd) {
461
462 case VNIOCATTACH:
463 if (vn->sc_flags & VNF_INITED)
464 return(EBUSY);
465 /*
466 * Always open for read and write.
467 * This is probably bogus, but it lets vn_open()
468 * weed out directories, sockets, etc. so we don't
469 * have to worry about them.
470 */
471 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
472 error = vn_open(&nd, FREAD|FWRITE, 0);
473 if (error)
474 return(error);
475 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
476 if (error) {
477 VOP_UNLOCK(nd.ni_vp);
478 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
479 return(error);
480 }
481 VOP_UNLOCK(nd.ni_vp);
482 vn->sc_vp = nd.ni_vp;
483 vn->sc_size = btodb(vattr.va_size); /* note truncation */
484 error = vnsetcred(vn, p->p_ucred);
485 if (error) {
486 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
487 return(error);
488 }
489 vio->vn_size = dbtob(vn->sc_size);
490 vn->sc_flags |= VNF_INITED;
491 IFOPT(vn, VN_LABELS) {
492 /*
493 * Reopen so that `ds' knows which devices are open.
494 * If this is the first VNIOCSET, then we've
495 * guaranteed that the device is the cdev and that
496 * no other slices or labels are open. Otherwise,
497 * we rely on VNIOCCLR not being abused.
498 */
499 error = vnopen(dev, flag, S_IFCHR, p);
500 if (error)
501 vnclear(vn);
502 }
503 IFOPT(vn, VN_FOLLOW)
504 printf("vnioctl: SET vp %p size %x\n",
505 vn->sc_vp, vn->sc_size);
506 break;
507
508 case VNIOCDETACH:
509 if ((vn->sc_flags & VNF_INITED) == 0)
510 return(ENXIO);
511 /*
512 * XXX handle i/o in progress. Return EBUSY, or wait, or
513 * flush the i/o.
514 * XXX handle multiple opens of the device. Return EBUSY,
515 * or revoke the fd's.
516 * How are these problems handled for removable and failing
517 * hardware devices?
518 */
519 vnclear(vn);
520 IFOPT(vn, VN_FOLLOW)
521 printf("vnioctl: CLRed\n");
522 break;
523
524 case VNIOCGSET:
525 vn_options |= *f;
526 *f = vn_options;
527 break;
528
529 case VNIOCGCLEAR:
530 vn_options &= ~(*f);
531 *f = vn_options;
532 break;
533
534 case VNIOCUSET:
535 vn->sc_options |= *f;
536 *f = vn->sc_options;
537 break;
538
539 case VNIOCUCLEAR:
540 vn->sc_options &= ~(*f);
541 *f = vn->sc_options;
542 break;
543
544 default:
545 return (ENOTTY);
546 }
547 return(0);
548 }
549
550 /*
551 * Duplicate the current processes' credentials. Since we are called only
552 * as the result of a SET ioctl and only root can do that, any future access
553 * to this "disk" is essentially as root. Note that credentials may change
554 * if some other uid can write directly to the mapped file (NFS).
555 */
556 int
557 vnsetcred(struct vn_softc *vn, struct ucred *cred)
558 {
559 struct uio auio;
560 struct iovec aiov;
561 char *tmpbuf;
562 int error;
563
564 vn->sc_cred = crdup(cred);
565 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
566
567 /* XXX: Horrible kludge to establish credentials for NFS */
568 aiov.iov_base = tmpbuf;
569 aiov.iov_len = min(DEV_BSIZE, dbtob(vn->sc_size));
570 auio.uio_iov = &aiov;
571 auio.uio_iovcnt = 1;
572 auio.uio_offset = 0;
573 auio.uio_rw = UIO_READ;
574 auio.uio_segflg = UIO_SYSSPACE;
575 auio.uio_resid = aiov.iov_len;
576 VOP_LOCK(vn->sc_vp);
577 error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
578 VOP_UNLOCK(vn->sc_vp);
579
580 free(tmpbuf, M_TEMP);
581 return (error);
582 }
583
584 void
585 vnshutdown(int howto, void *ignored)
586 {
587 int i;
588
589 for (i = 0; i < NVN; i++)
590 if (vn_softc[i] && vn_softc[i]->sc_flags & VNF_INITED)
591 vnclear(vn_softc[i]);
592 }
593
594 void
595 vnclear(struct vn_softc *vn)
596 {
597 register struct vnode *vp = vn->sc_vp;
598 struct proc *p = curproc; /* XXX */
599
600 IFOPT(vn, VN_FOLLOW)
601 printf("vnclear(%p): vp=%p\n", vn, vp);
602 vn->sc_flags &= ~VNF_INITED;
603 if (vp == (struct vnode *)0)
604 panic("vnclear: null vp");
605 (void) vn_close(vp, FREAD|FWRITE, vn->sc_cred, p);
606 crfree(vn->sc_cred);
607 vn->sc_vp = (struct vnode *)0;
608 vn->sc_cred = (struct ucred *)0;
609 vn->sc_size = 0;
610 if (vn->sc_slices != NULL)
611 dsgone(&vn->sc_slices);
612 }
613
614 static int
615 vnsize(dev_t dev)
616 {
617 int unit = vnunit(dev);
618
619 if (unit >= NVN || (!vn_softc[unit]) ||
620 (vn_softc[unit]->sc_flags & VNF_INITED) == 0)
621 return(-1);
622 return(vn_softc[unit]->sc_size);
623 }
624
625 static int
626 vndump(dev_t dev)
627 {
628 return (ENODEV);
629 }
630 static vn_devsw_installed = 0;
631
632 static void
633 vn_drvinit(void *unused)
634 {
635 #ifdef DEVFS
636 int mynor;
637 int unit;
638 struct vn_softc *vn;
639 #endif
640
641 if( ! vn_devsw_installed ) {
642 if (at_shutdown(&vnshutdown, NULL, SHUTDOWN_POST_SYNC)) {
643 printf("vn: could not install shutdown hook\n");
644 return;
645 }
646 bdevsw_add_generic(BDEV_MAJOR, CDEV_MAJOR, &vn_bdevsw);
647 #ifdef DEVFS
648 for (unit = 0; unit < NVN; unit++) {
649 vn = vn_softc[unit];
650 mynor = dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART);
651 /*
652 * XXX not saving tokens yet. The vn devices don't
653 * exist until after they have been opened :-).
654 */
655 devfs_add_devswf(&vn_bdevsw, mynor, DV_BLK,
656 UID_ROOT, GID_OPERATOR, 0640,
657 "vn%d", unit);
658 devfs_add_devswf(&vn_cdevsw, mynor, DV_CHR,
659 UID_ROOT, GID_OPERATOR, 0640,
660 "rvn%d", unit);
661 }
662 #endif
663 vn_devsw_installed = 1;
664 }
665 }
666
667 SYSINIT(vndev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,vn_drvinit,NULL)
668
669
670 #endif
Cache object: bbb54c98ef560122fa2c03e5b83b2b8b
|