FreeBSD/Linux Kernel Cross Reference
sys/dev/md/md.c
1 /*-
2 * SPDX-License-Identifier: (Beerware AND BSD-3-Clause)
3 *
4 * ----------------------------------------------------------------------------
5 * "THE BEER-WARE LICENSE" (Revision 42):
6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
7 * can do whatever you want with this stuff. If we meet some day, and you think
8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
9 * ----------------------------------------------------------------------------
10 *
11 * $FreeBSD$
12 *
13 */
14
15 /*-
16 * The following functions are based on the vn(4) driver: mdstart_swap(),
17 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
18 * and as such under the following copyright:
19 *
20 * Copyright (c) 1988 University of Utah.
21 * Copyright (c) 1990, 1993
22 * The Regents of the University of California. All rights reserved.
23 * Copyright (c) 2013 The FreeBSD Foundation
24 * All rights reserved.
25 *
26 * This code is derived from software contributed to Berkeley by
27 * the Systems Programming Group of the University of Utah Computer
28 * Science Department.
29 *
30 * Portions of this software were developed by Konstantin Belousov
31 * under sponsorship from the FreeBSD Foundation.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * from: Utah Hdr: vn.c 1.13 94/04/02
58 *
59 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94
60 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
61 */
62
63 #include "opt_rootdevname.h"
64 #include "opt_geom.h"
65 #include "opt_md.h"
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/conf.h>
72 #include <sys/devicestat.h>
73 #include <sys/fcntl.h>
74 #include <sys/kernel.h>
75 #include <sys/kthread.h>
76 #include <sys/limits.h>
77 #include <sys/linker.h>
78 #include <sys/lock.h>
79 #include <sys/malloc.h>
80 #include <sys/mdioctl.h>
81 #include <sys/mount.h>
82 #include <sys/mutex.h>
83 #include <sys/sx.h>
84 #include <sys/namei.h>
85 #include <sys/proc.h>
86 #include <sys/queue.h>
87 #include <sys/rwlock.h>
88 #include <sys/sbuf.h>
89 #include <sys/sched.h>
90 #include <sys/sf_buf.h>
91 #include <sys/sysctl.h>
92 #include <sys/uio.h>
93 #include <sys/vnode.h>
94 #include <sys/disk.h>
95
96 #include <geom/geom.h>
97 #include <geom/geom_int.h>
98
99 #include <vm/vm.h>
100 #include <vm/vm_param.h>
101 #include <vm/vm_object.h>
102 #include <vm/vm_page.h>
103 #include <vm/vm_pager.h>
104 #include <vm/swap_pager.h>
105 #include <vm/uma.h>
106
107 #include <machine/bus.h>
108
109 #define MD_MODVER 1
110
111 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */
112 #define MD_EXITING 0x20000 /* Worker thread is exiting. */
113 #define MD_PROVIDERGONE 0x40000 /* Safe to free the softc */
114
115 #ifndef MD_NSECT
116 #define MD_NSECT (10000 * 2)
117 #endif
118
119 struct md_req {
120 unsigned md_unit; /* unit number */
121 enum md_types md_type; /* type of disk */
122 off_t md_mediasize; /* size of disk in bytes */
123 unsigned md_sectorsize; /* sectorsize */
124 unsigned md_options; /* options */
125 int md_fwheads; /* firmware heads */
126 int md_fwsectors; /* firmware sectors */
127 char *md_file; /* pathname of file to mount */
128 enum uio_seg md_file_seg; /* location of md_file */
129 char *md_label; /* label of the device (userspace) */
130 int *md_units; /* pointer to units array (kernel) */
131 size_t md_units_nitems; /* items in md_units array */
132 };
133
134 #ifdef COMPAT_FREEBSD32
135 struct md_ioctl32 {
136 unsigned md_version;
137 unsigned md_unit;
138 enum md_types md_type;
139 uint32_t md_file;
140 off_t md_mediasize;
141 unsigned md_sectorsize;
142 unsigned md_options;
143 uint64_t md_base;
144 int md_fwheads;
145 int md_fwsectors;
146 uint32_t md_label;
147 int md_pad[MDNPAD];
148 } __attribute__((__packed__));
149 CTASSERT((sizeof(struct md_ioctl32)) == 436);
150
151 #define MDIOCATTACH_32 _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32)
152 #define MDIOCDETACH_32 _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32)
153 #define MDIOCQUERY_32 _IOC_NEWTYPE(MDIOCQUERY, struct md_ioctl32)
154 #define MDIOCRESIZE_32 _IOC_NEWTYPE(MDIOCRESIZE, struct md_ioctl32)
155 #endif /* COMPAT_FREEBSD32 */
156
157 static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk");
158 static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors");
159
160 static int md_debug;
161 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0,
162 "Enable md(4) debug messages");
163 static int md_malloc_wait;
164 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0,
165 "Allow malloc to wait for memory allocations");
166
167 #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE)
168 #define MD_ROOT_FSTYPE "ufs"
169 #endif
170
171 #if defined(MD_ROOT)
172 /*
173 * Preloaded image gets put here.
174 */
175 #if defined(MD_ROOT_SIZE)
176 /*
177 * We put the mfs_root symbol into the oldmfs section of the kernel object file.
178 * Applications that patch the object with the image can determine
179 * the size looking at the oldmfs section size within the kernel.
180 */
181 u_char mfs_root[MD_ROOT_SIZE*1024] __attribute__ ((section ("oldmfs")));
182 const int mfs_root_size = sizeof(mfs_root);
183 #elif defined(MD_ROOT_MEM)
184 /* MD region already mapped in the memory */
185 u_char *mfs_root;
186 int mfs_root_size;
187 #else
188 extern volatile u_char __weak_symbol mfs_root;
189 extern volatile u_char __weak_symbol mfs_root_end;
190 #define mfs_root_size ((uintptr_t)(&mfs_root_end - &mfs_root))
191 #endif
192 #endif
193
194 static g_init_t g_md_init;
195 static g_fini_t g_md_fini;
196 static g_start_t g_md_start;
197 static g_access_t g_md_access;
198 static void g_md_dumpconf(struct sbuf *sb, const char *indent,
199 struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp);
200 static g_provgone_t g_md_providergone;
201
202 static struct cdev *status_dev = NULL;
203 static struct sx md_sx;
204 static struct unrhdr *md_uh;
205
206 static d_ioctl_t mdctlioctl;
207
208 static struct cdevsw mdctl_cdevsw = {
209 .d_version = D_VERSION,
210 .d_ioctl = mdctlioctl,
211 .d_name = MD_NAME,
212 };
213
214 struct g_class g_md_class = {
215 .name = "MD",
216 .version = G_VERSION,
217 .init = g_md_init,
218 .fini = g_md_fini,
219 .start = g_md_start,
220 .access = g_md_access,
221 .dumpconf = g_md_dumpconf,
222 .providergone = g_md_providergone,
223 };
224
225 DECLARE_GEOM_CLASS(g_md_class, g_md);
226 MODULE_VERSION(geom_md, 0);
227
228 static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list);
229
230 #define NINDIR (PAGE_SIZE / sizeof(uintptr_t))
231 #define NMASK (NINDIR-1)
232 static int nshift;
233
234 static uma_zone_t md_pbuf_zone;
235
236 struct indir {
237 uintptr_t *array;
238 u_int total;
239 u_int used;
240 u_int shift;
241 };
242
243 struct md_s {
244 int unit;
245 LIST_ENTRY(md_s) list;
246 struct bio_queue_head bio_queue;
247 struct mtx queue_mtx;
248 struct cdev *dev;
249 enum md_types type;
250 off_t mediasize;
251 unsigned sectorsize;
252 unsigned opencount;
253 unsigned fwheads;
254 unsigned fwsectors;
255 char ident[32];
256 unsigned flags;
257 char name[20];
258 struct proc *procp;
259 struct g_geom *gp;
260 struct g_provider *pp;
261 int (*start)(struct md_s *sc, struct bio *bp);
262 struct devstat *devstat;
263
264 /* MD_MALLOC related fields */
265 struct indir *indir;
266 uma_zone_t uma;
267
268 /* MD_PRELOAD related fields */
269 u_char *pl_ptr;
270 size_t pl_len;
271
272 /* MD_VNODE related fields */
273 struct vnode *vnode;
274 char file[PATH_MAX];
275 char label[PATH_MAX];
276 struct ucred *cred;
277
278 /* MD_SWAP related fields */
279 vm_object_t object;
280 };
281
282 static struct indir *
283 new_indir(u_int shift)
284 {
285 struct indir *ip;
286
287 ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT)
288 | M_ZERO);
289 if (ip == NULL)
290 return (NULL);
291 ip->array = malloc(sizeof(uintptr_t) * NINDIR,
292 M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO);
293 if (ip->array == NULL) {
294 free(ip, M_MD);
295 return (NULL);
296 }
297 ip->total = NINDIR;
298 ip->shift = shift;
299 return (ip);
300 }
301
302 static void
303 del_indir(struct indir *ip)
304 {
305
306 free(ip->array, M_MDSECT);
307 free(ip, M_MD);
308 }
309
310 static void
311 destroy_indir(struct md_s *sc, struct indir *ip)
312 {
313 int i;
314
315 for (i = 0; i < NINDIR; i++) {
316 if (!ip->array[i])
317 continue;
318 if (ip->shift)
319 destroy_indir(sc, (struct indir*)(ip->array[i]));
320 else if (ip->array[i] > 255)
321 uma_zfree(sc->uma, (void *)(ip->array[i]));
322 }
323 del_indir(ip);
324 }
325
326 /*
327 * This function does the math and allocates the top level "indir" structure
328 * for a device of "size" sectors.
329 */
330
331 static struct indir *
332 dimension(off_t size)
333 {
334 off_t rcnt;
335 struct indir *ip;
336 int layer;
337
338 rcnt = size;
339 layer = 0;
340 while (rcnt > NINDIR) {
341 rcnt /= NINDIR;
342 layer++;
343 }
344
345 /*
346 * XXX: the top layer is probably not fully populated, so we allocate
347 * too much space for ip->array in here.
348 */
349 ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO);
350 ip->array = malloc(sizeof(uintptr_t) * NINDIR,
351 M_MDSECT, M_WAITOK | M_ZERO);
352 ip->total = NINDIR;
353 ip->shift = layer * nshift;
354 return (ip);
355 }
356
357 /*
358 * Read a given sector
359 */
360
361 static uintptr_t
362 s_read(struct indir *ip, off_t offset)
363 {
364 struct indir *cip;
365 int idx;
366 uintptr_t up;
367
368 if (md_debug > 1)
369 printf("s_read(%jd)\n", (intmax_t)offset);
370 up = 0;
371 for (cip = ip; cip != NULL;) {
372 if (cip->shift) {
373 idx = (offset >> cip->shift) & NMASK;
374 up = cip->array[idx];
375 cip = (struct indir *)up;
376 continue;
377 }
378 idx = offset & NMASK;
379 return (cip->array[idx]);
380 }
381 return (0);
382 }
383
384 /*
385 * Write a given sector, prune the tree if the value is 0
386 */
387
388 static int
389 s_write(struct indir *ip, off_t offset, uintptr_t ptr)
390 {
391 struct indir *cip, *lip[10];
392 int idx, li;
393 uintptr_t up;
394
395 if (md_debug > 1)
396 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr);
397 up = 0;
398 li = 0;
399 cip = ip;
400 for (;;) {
401 lip[li++] = cip;
402 if (cip->shift) {
403 idx = (offset >> cip->shift) & NMASK;
404 up = cip->array[idx];
405 if (up != 0) {
406 cip = (struct indir *)up;
407 continue;
408 }
409 /* Allocate branch */
410 cip->array[idx] =
411 (uintptr_t)new_indir(cip->shift - nshift);
412 if (cip->array[idx] == 0)
413 return (ENOSPC);
414 cip->used++;
415 up = cip->array[idx];
416 cip = (struct indir *)up;
417 continue;
418 }
419 /* leafnode */
420 idx = offset & NMASK;
421 up = cip->array[idx];
422 if (up != 0)
423 cip->used--;
424 cip->array[idx] = ptr;
425 if (ptr != 0)
426 cip->used++;
427 break;
428 }
429 if (cip->used != 0 || li == 1)
430 return (0);
431 li--;
432 while (cip->used == 0 && cip != ip) {
433 li--;
434 idx = (offset >> lip[li]->shift) & NMASK;
435 up = lip[li]->array[idx];
436 KASSERT(up == (uintptr_t)cip, ("md screwed up"));
437 del_indir(cip);
438 lip[li]->array[idx] = 0;
439 lip[li]->used--;
440 cip = lip[li];
441 }
442 return (0);
443 }
444
445 static int
446 g_md_access(struct g_provider *pp, int r, int w, int e)
447 {
448 struct md_s *sc;
449
450 sc = pp->geom->softc;
451 if (sc == NULL) {
452 if (r <= 0 && w <= 0 && e <= 0)
453 return (0);
454 return (ENXIO);
455 }
456 r += pp->acr;
457 w += pp->acw;
458 e += pp->ace;
459 if ((sc->flags & MD_READONLY) != 0 && w > 0)
460 return (EROFS);
461 if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
462 sc->opencount = 1;
463 } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
464 sc->opencount = 0;
465 }
466 return (0);
467 }
468
469 static void
470 g_md_start(struct bio *bp)
471 {
472 struct md_s *sc;
473
474 sc = bp->bio_to->geom->softc;
475 if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) {
476 devstat_start_transaction_bio(sc->devstat, bp);
477 }
478 mtx_lock(&sc->queue_mtx);
479 bioq_disksort(&sc->bio_queue, bp);
480 wakeup(sc);
481 mtx_unlock(&sc->queue_mtx);
482 }
483
484 #define MD_MALLOC_MOVE_ZERO 1
485 #define MD_MALLOC_MOVE_FILL 2
486 #define MD_MALLOC_MOVE_READ 3
487 #define MD_MALLOC_MOVE_WRITE 4
488 #define MD_MALLOC_MOVE_CMP 5
489
490 static int
491 md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
492 void *ptr, u_char fill, int op)
493 {
494 struct sf_buf *sf;
495 vm_page_t m, *mp1;
496 char *p, first;
497 off_t *uc;
498 unsigned n;
499 int error, i, ma_offs1, sz, first_read;
500
501 m = NULL;
502 error = 0;
503 sf = NULL;
504 /* if (op == MD_MALLOC_MOVE_CMP) { gcc */
505 first = 0;
506 first_read = 0;
507 uc = ptr;
508 mp1 = *mp;
509 ma_offs1 = *ma_offs;
510 /* } */
511 sched_pin();
512 for (n = sectorsize; n != 0; n -= sz) {
513 sz = imin(PAGE_SIZE - *ma_offs, n);
514 if (m != **mp) {
515 if (sf != NULL)
516 sf_buf_free(sf);
517 m = **mp;
518 sf = sf_buf_alloc(m, SFB_CPUPRIVATE |
519 (md_malloc_wait ? 0 : SFB_NOWAIT));
520 if (sf == NULL) {
521 error = ENOMEM;
522 break;
523 }
524 }
525 p = (char *)sf_buf_kva(sf) + *ma_offs;
526 switch (op) {
527 case MD_MALLOC_MOVE_ZERO:
528 bzero(p, sz);
529 break;
530 case MD_MALLOC_MOVE_FILL:
531 memset(p, fill, sz);
532 break;
533 case MD_MALLOC_MOVE_READ:
534 bcopy(ptr, p, sz);
535 cpu_flush_dcache(p, sz);
536 break;
537 case MD_MALLOC_MOVE_WRITE:
538 bcopy(p, ptr, sz);
539 break;
540 case MD_MALLOC_MOVE_CMP:
541 for (i = 0; i < sz; i++, p++) {
542 if (!first_read) {
543 *uc = (u_char)*p;
544 first = *p;
545 first_read = 1;
546 } else if (*p != first) {
547 error = EDOOFUS;
548 break;
549 }
550 }
551 break;
552 default:
553 KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op));
554 break;
555 }
556 if (error != 0)
557 break;
558 *ma_offs += sz;
559 *ma_offs %= PAGE_SIZE;
560 if (*ma_offs == 0)
561 (*mp)++;
562 ptr = (char *)ptr + sz;
563 }
564
565 if (sf != NULL)
566 sf_buf_free(sf);
567 sched_unpin();
568 if (op == MD_MALLOC_MOVE_CMP && error != 0) {
569 *mp = mp1;
570 *ma_offs = ma_offs1;
571 }
572 return (error);
573 }
574
575 static int
576 md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs,
577 unsigned len, void *ptr, u_char fill, int op)
578 {
579 bus_dma_segment_t *vlist;
580 uint8_t *p, *end, first;
581 off_t *uc;
582 int ma_offs, seg_len;
583
584 vlist = *pvlist;
585 ma_offs = *pma_offs;
586 uc = ptr;
587
588 for (; len != 0; len -= seg_len) {
589 seg_len = imin(vlist->ds_len - ma_offs, len);
590 p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs;
591 switch (op) {
592 case MD_MALLOC_MOVE_ZERO:
593 bzero(p, seg_len);
594 break;
595 case MD_MALLOC_MOVE_FILL:
596 memset(p, fill, seg_len);
597 break;
598 case MD_MALLOC_MOVE_READ:
599 bcopy(ptr, p, seg_len);
600 cpu_flush_dcache(p, seg_len);
601 break;
602 case MD_MALLOC_MOVE_WRITE:
603 bcopy(p, ptr, seg_len);
604 break;
605 case MD_MALLOC_MOVE_CMP:
606 end = p + seg_len;
607 first = *uc = *p;
608 /* Confirm all following bytes match the first */
609 while (++p < end) {
610 if (*p != first)
611 return (EDOOFUS);
612 }
613 break;
614 default:
615 KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op));
616 break;
617 }
618
619 ma_offs += seg_len;
620 if (ma_offs == vlist->ds_len) {
621 ma_offs = 0;
622 vlist++;
623 }
624 ptr = (uint8_t *)ptr + seg_len;
625 }
626 *pvlist = vlist;
627 *pma_offs = ma_offs;
628
629 return (0);
630 }
631
632 static int
633 mdstart_malloc(struct md_s *sc, struct bio *bp)
634 {
635 u_char *dst;
636 vm_page_t *m;
637 bus_dma_segment_t *vlist;
638 int i, error, error1, ma_offs, notmapped;
639 off_t secno, nsec, uc;
640 uintptr_t sp, osp;
641
642 switch (bp->bio_cmd) {
643 case BIO_READ:
644 case BIO_WRITE:
645 case BIO_DELETE:
646 break;
647 case BIO_FLUSH:
648 return (0);
649 default:
650 return (EOPNOTSUPP);
651 }
652
653 notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0;
654 vlist = (bp->bio_flags & BIO_VLIST) != 0 ?
655 (bus_dma_segment_t *)bp->bio_data : NULL;
656 if (notmapped) {
657 m = bp->bio_ma;
658 ma_offs = bp->bio_ma_offset;
659 dst = NULL;
660 KASSERT(vlist == NULL, ("vlists cannot be unmapped"));
661 } else if (vlist != NULL) {
662 ma_offs = bp->bio_ma_offset;
663 dst = NULL;
664 } else {
665 dst = bp->bio_data;
666 }
667
668 nsec = bp->bio_length / sc->sectorsize;
669 secno = bp->bio_offset / sc->sectorsize;
670 error = 0;
671 while (nsec--) {
672 osp = s_read(sc->indir, secno);
673 if (bp->bio_cmd == BIO_DELETE) {
674 if (osp != 0)
675 error = s_write(sc->indir, secno, 0);
676 } else if (bp->bio_cmd == BIO_READ) {
677 if (osp == 0) {
678 if (notmapped) {
679 error = md_malloc_move_ma(&m, &ma_offs,
680 sc->sectorsize, NULL, 0,
681 MD_MALLOC_MOVE_ZERO);
682 } else if (vlist != NULL) {
683 error = md_malloc_move_vlist(&vlist,
684 &ma_offs, sc->sectorsize, NULL, 0,
685 MD_MALLOC_MOVE_ZERO);
686 } else
687 bzero(dst, sc->sectorsize);
688 } else if (osp <= 255) {
689 if (notmapped) {
690 error = md_malloc_move_ma(&m, &ma_offs,
691 sc->sectorsize, NULL, osp,
692 MD_MALLOC_MOVE_FILL);
693 } else if (vlist != NULL) {
694 error = md_malloc_move_vlist(&vlist,
695 &ma_offs, sc->sectorsize, NULL, osp,
696 MD_MALLOC_MOVE_FILL);
697 } else
698 memset(dst, osp, sc->sectorsize);
699 } else {
700 if (notmapped) {
701 error = md_malloc_move_ma(&m, &ma_offs,
702 sc->sectorsize, (void *)osp, 0,
703 MD_MALLOC_MOVE_READ);
704 } else if (vlist != NULL) {
705 error = md_malloc_move_vlist(&vlist,
706 &ma_offs, sc->sectorsize,
707 (void *)osp, 0,
708 MD_MALLOC_MOVE_READ);
709 } else {
710 bcopy((void *)osp, dst, sc->sectorsize);
711 cpu_flush_dcache(dst, sc->sectorsize);
712 }
713 }
714 osp = 0;
715 } else if (bp->bio_cmd == BIO_WRITE) {
716 if (sc->flags & MD_COMPRESS) {
717 if (notmapped) {
718 error1 = md_malloc_move_ma(&m, &ma_offs,
719 sc->sectorsize, &uc, 0,
720 MD_MALLOC_MOVE_CMP);
721 i = error1 == 0 ? sc->sectorsize : 0;
722 } else if (vlist != NULL) {
723 error1 = md_malloc_move_vlist(&vlist,
724 &ma_offs, sc->sectorsize, &uc, 0,
725 MD_MALLOC_MOVE_CMP);
726 i = error1 == 0 ? sc->sectorsize : 0;
727 } else {
728 uc = dst[0];
729 for (i = 1; i < sc->sectorsize; i++) {
730 if (dst[i] != uc)
731 break;
732 }
733 }
734 } else {
735 i = 0;
736 uc = 0;
737 }
738 if (i == sc->sectorsize) {
739 if (osp != uc)
740 error = s_write(sc->indir, secno, uc);
741 } else {
742 if (osp <= 255) {
743 sp = (uintptr_t)uma_zalloc(sc->uma,
744 md_malloc_wait ? M_WAITOK :
745 M_NOWAIT);
746 if (sp == 0) {
747 error = ENOSPC;
748 break;
749 }
750 if (notmapped) {
751 error = md_malloc_move_ma(&m,
752 &ma_offs, sc->sectorsize,
753 (void *)sp, 0,
754 MD_MALLOC_MOVE_WRITE);
755 } else if (vlist != NULL) {
756 error = md_malloc_move_vlist(
757 &vlist, &ma_offs,
758 sc->sectorsize, (void *)sp,
759 0, MD_MALLOC_MOVE_WRITE);
760 } else {
761 bcopy(dst, (void *)sp,
762 sc->sectorsize);
763 }
764 error = s_write(sc->indir, secno, sp);
765 } else {
766 if (notmapped) {
767 error = md_malloc_move_ma(&m,
768 &ma_offs, sc->sectorsize,
769 (void *)osp, 0,
770 MD_MALLOC_MOVE_WRITE);
771 } else if (vlist != NULL) {
772 error = md_malloc_move_vlist(
773 &vlist, &ma_offs,
774 sc->sectorsize, (void *)osp,
775 0, MD_MALLOC_MOVE_WRITE);
776 } else {
777 bcopy(dst, (void *)osp,
778 sc->sectorsize);
779 }
780 osp = 0;
781 }
782 }
783 } else {
784 error = EOPNOTSUPP;
785 }
786 if (osp > 255)
787 uma_zfree(sc->uma, (void*)osp);
788 if (error != 0)
789 break;
790 secno++;
791 if (!notmapped && vlist == NULL)
792 dst += sc->sectorsize;
793 }
794 bp->bio_resid = 0;
795 return (error);
796 }
797
798 static void
799 mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len)
800 {
801 off_t seg_len;
802
803 while (offset >= vlist->ds_len) {
804 offset -= vlist->ds_len;
805 vlist++;
806 }
807
808 while (len != 0) {
809 seg_len = omin(len, vlist->ds_len - offset);
810 bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset),
811 seg_len);
812 offset = 0;
813 src = (uint8_t *)src + seg_len;
814 len -= seg_len;
815 vlist++;
816 }
817 }
818
819 static void
820 mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len)
821 {
822 off_t seg_len;
823
824 while (offset >= vlist->ds_len) {
825 offset -= vlist->ds_len;
826 vlist++;
827 }
828
829 while (len != 0) {
830 seg_len = omin(len, vlist->ds_len - offset);
831 bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst,
832 seg_len);
833 offset = 0;
834 dst = (uint8_t *)dst + seg_len;
835 len -= seg_len;
836 vlist++;
837 }
838 }
839
840 static int
841 mdstart_preload(struct md_s *sc, struct bio *bp)
842 {
843 uint8_t *p;
844
845 p = sc->pl_ptr + bp->bio_offset;
846 switch (bp->bio_cmd) {
847 case BIO_READ:
848 if ((bp->bio_flags & BIO_VLIST) != 0) {
849 mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data,
850 bp->bio_ma_offset, bp->bio_length);
851 } else {
852 bcopy(p, bp->bio_data, bp->bio_length);
853 }
854 cpu_flush_dcache(bp->bio_data, bp->bio_length);
855 break;
856 case BIO_WRITE:
857 if ((bp->bio_flags & BIO_VLIST) != 0) {
858 mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data,
859 bp->bio_ma_offset, p, bp->bio_length);
860 } else {
861 bcopy(bp->bio_data, p, bp->bio_length);
862 }
863 break;
864 }
865 bp->bio_resid = 0;
866 return (0);
867 }
868
869 static int
870 mdstart_vnode(struct md_s *sc, struct bio *bp)
871 {
872 int error;
873 struct uio auio;
874 struct iovec aiov;
875 struct iovec *piov;
876 struct mount *mp;
877 struct vnode *vp;
878 struct buf *pb;
879 bus_dma_segment_t *vlist;
880 struct thread *td;
881 off_t iolen, iostart, len, zerosize;
882 int ma_offs, npages;
883
884 switch (bp->bio_cmd) {
885 case BIO_READ:
886 auio.uio_rw = UIO_READ;
887 break;
888 case BIO_WRITE:
889 case BIO_DELETE:
890 auio.uio_rw = UIO_WRITE;
891 break;
892 case BIO_FLUSH:
893 break;
894 default:
895 return (EOPNOTSUPP);
896 }
897
898 td = curthread;
899 vp = sc->vnode;
900 pb = NULL;
901 piov = NULL;
902 ma_offs = bp->bio_ma_offset;
903 len = bp->bio_length;
904
905 /*
906 * VNODE I/O
907 *
908 * If an error occurs, we set BIO_ERROR but we do not set
909 * B_INVAL because (for a write anyway), the buffer is
910 * still valid.
911 */
912
913 if (bp->bio_cmd == BIO_FLUSH) {
914 do {
915 (void)vn_start_write(vp, &mp, V_WAIT);
916 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
917 error = VOP_FSYNC(vp, MNT_WAIT, td);
918 VOP_UNLOCK(vp);
919 vn_finished_write(mp);
920 } while (error == ERELOOKUP);
921 return (error);
922 }
923
924 auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
925 auio.uio_resid = bp->bio_length;
926 auio.uio_segflg = UIO_SYSSPACE;
927 auio.uio_td = td;
928
929 if (bp->bio_cmd == BIO_DELETE) {
930 /*
931 * Emulate BIO_DELETE by writing zeros.
932 */
933 zerosize = ZERO_REGION_SIZE -
934 (ZERO_REGION_SIZE % sc->sectorsize);
935 auio.uio_iovcnt = howmany(bp->bio_length, zerosize);
936 piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK);
937 auio.uio_iov = piov;
938 while (len > 0) {
939 piov->iov_base = __DECONST(void *, zero_region);
940 piov->iov_len = len;
941 if (len > zerosize)
942 piov->iov_len = zerosize;
943 len -= piov->iov_len;
944 piov++;
945 }
946 piov = auio.uio_iov;
947 } else if ((bp->bio_flags & BIO_VLIST) != 0) {
948 piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK);
949 auio.uio_iov = piov;
950 vlist = (bus_dma_segment_t *)bp->bio_data;
951 while (len > 0) {
952 piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr +
953 ma_offs);
954 piov->iov_len = vlist->ds_len - ma_offs;
955 if (piov->iov_len > len)
956 piov->iov_len = len;
957 len -= piov->iov_len;
958 ma_offs = 0;
959 vlist++;
960 piov++;
961 }
962 auio.uio_iovcnt = piov - auio.uio_iov;
963 piov = auio.uio_iov;
964 } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
965 pb = uma_zalloc(md_pbuf_zone, M_WAITOK);
966 MPASS((pb->b_flags & B_MAXPHYS) != 0);
967 bp->bio_resid = len;
968 unmapped_step:
969 npages = atop(min(maxphys, round_page(len + (ma_offs &
970 PAGE_MASK))));
971 iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len);
972 KASSERT(iolen > 0, ("zero iolen"));
973 pmap_qenter((vm_offset_t)pb->b_data,
974 &bp->bio_ma[atop(ma_offs)], npages);
975 aiov.iov_base = (void *)((vm_offset_t)pb->b_data +
976 (ma_offs & PAGE_MASK));
977 aiov.iov_len = iolen;
978 auio.uio_iov = &aiov;
979 auio.uio_iovcnt = 1;
980 auio.uio_resid = iolen;
981 } else {
982 aiov.iov_base = bp->bio_data;
983 aiov.iov_len = bp->bio_length;
984 auio.uio_iov = &aiov;
985 auio.uio_iovcnt = 1;
986 }
987 iostart = auio.uio_offset;
988 if (auio.uio_rw == UIO_READ) {
989 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
990 error = VOP_READ(vp, &auio, 0, sc->cred);
991 VOP_UNLOCK(vp);
992 } else {
993 (void) vn_start_write(vp, &mp, V_WAIT);
994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
995 error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC,
996 sc->cred);
997 VOP_UNLOCK(vp);
998 vn_finished_write(mp);
999 if (error == 0)
1000 sc->flags &= ~MD_VERIFY;
1001 }
1002
1003 /* When MD_CACHE is set, try to avoid double-caching the data. */
1004 if (error == 0 && (sc->flags & MD_CACHE) == 0)
1005 VOP_ADVISE(vp, iostart, auio.uio_offset - 1,
1006 POSIX_FADV_DONTNEED);
1007
1008 if (pb != NULL) {
1009 pmap_qremove((vm_offset_t)pb->b_data, npages);
1010 if (error == 0) {
1011 len -= iolen;
1012 bp->bio_resid -= iolen;
1013 ma_offs += iolen;
1014 if (len > 0)
1015 goto unmapped_step;
1016 }
1017 uma_zfree(md_pbuf_zone, pb);
1018 } else {
1019 bp->bio_resid = auio.uio_resid;
1020 }
1021
1022 free(piov, M_MD);
1023 return (error);
1024 }
1025
1026 static int
1027 mdstart_swap(struct md_s *sc, struct bio *bp)
1028 {
1029 vm_page_t m;
1030 u_char *p;
1031 vm_pindex_t i, lastp;
1032 bus_dma_segment_t *vlist;
1033 int rv, ma_offs, offs, len, lastend;
1034
1035 switch (bp->bio_cmd) {
1036 case BIO_READ:
1037 case BIO_WRITE:
1038 case BIO_DELETE:
1039 break;
1040 case BIO_FLUSH:
1041 return (0);
1042 default:
1043 return (EOPNOTSUPP);
1044 }
1045
1046 p = bp->bio_data;
1047 ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ?
1048 bp->bio_ma_offset : 0;
1049 vlist = (bp->bio_flags & BIO_VLIST) != 0 ?
1050 (bus_dma_segment_t *)bp->bio_data : NULL;
1051
1052 /*
1053 * offs is the offset at which to start operating on the
1054 * next (ie, first) page. lastp is the last page on
1055 * which we're going to operate. lastend is the ending
1056 * position within that last page (ie, PAGE_SIZE if
1057 * we're operating on complete aligned pages).
1058 */
1059 offs = bp->bio_offset % PAGE_SIZE;
1060 lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE;
1061 lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1;
1062
1063 rv = VM_PAGER_OK;
1064 vm_object_pip_add(sc->object, 1);
1065 for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) {
1066 len = ((i == lastp) ? lastend : PAGE_SIZE) - offs;
1067 m = vm_page_grab_unlocked(sc->object, i, VM_ALLOC_SYSTEM);
1068 if (bp->bio_cmd == BIO_READ) {
1069 if (vm_page_all_valid(m))
1070 rv = VM_PAGER_OK;
1071 else
1072 rv = vm_pager_get_pages(sc->object, &m, 1,
1073 NULL, NULL);
1074 if (rv == VM_PAGER_ERROR) {
1075 VM_OBJECT_WLOCK(sc->object);
1076 vm_page_free(m);
1077 VM_OBJECT_WUNLOCK(sc->object);
1078 break;
1079 } else if (rv == VM_PAGER_FAIL) {
1080 /*
1081 * Pager does not have the page. Zero
1082 * the allocated page, and mark it as
1083 * valid. Do not set dirty, the page
1084 * can be recreated if thrown out.
1085 */
1086 pmap_zero_page(m);
1087 vm_page_valid(m);
1088 }
1089 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
1090 pmap_copy_pages(&m, offs, bp->bio_ma,
1091 ma_offs, len);
1092 } else if ((bp->bio_flags & BIO_VLIST) != 0) {
1093 physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs,
1094 vlist, ma_offs, len);
1095 cpu_flush_dcache(p, len);
1096 } else {
1097 physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len);
1098 cpu_flush_dcache(p, len);
1099 }
1100 } else if (bp->bio_cmd == BIO_WRITE) {
1101 if (len == PAGE_SIZE || vm_page_all_valid(m))
1102 rv = VM_PAGER_OK;
1103 else
1104 rv = vm_pager_get_pages(sc->object, &m, 1,
1105 NULL, NULL);
1106 if (rv == VM_PAGER_ERROR) {
1107 VM_OBJECT_WLOCK(sc->object);
1108 vm_page_free(m);
1109 VM_OBJECT_WUNLOCK(sc->object);
1110 break;
1111 } else if (rv == VM_PAGER_FAIL)
1112 pmap_zero_page(m);
1113
1114 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
1115 pmap_copy_pages(bp->bio_ma, ma_offs, &m,
1116 offs, len);
1117 } else if ((bp->bio_flags & BIO_VLIST) != 0) {
1118 physcopyin_vlist(vlist, ma_offs,
1119 VM_PAGE_TO_PHYS(m) + offs, len);
1120 } else {
1121 physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len);
1122 }
1123
1124 vm_page_valid(m);
1125 vm_page_set_dirty(m);
1126 } else if (bp->bio_cmd == BIO_DELETE) {
1127 if (len == PAGE_SIZE || vm_page_all_valid(m))
1128 rv = VM_PAGER_OK;
1129 else
1130 rv = vm_pager_get_pages(sc->object, &m, 1,
1131 NULL, NULL);
1132 VM_OBJECT_WLOCK(sc->object);
1133 if (rv == VM_PAGER_ERROR) {
1134 vm_page_free(m);
1135 VM_OBJECT_WUNLOCK(sc->object);
1136 break;
1137 } else if (rv == VM_PAGER_FAIL) {
1138 vm_page_free(m);
1139 m = NULL;
1140 } else {
1141 /* Page is valid. */
1142 if (len != PAGE_SIZE) {
1143 pmap_zero_page_area(m, offs, len);
1144 vm_page_set_dirty(m);
1145 } else {
1146 vm_pager_page_unswapped(m);
1147 vm_page_free(m);
1148 m = NULL;
1149 }
1150 }
1151 VM_OBJECT_WUNLOCK(sc->object);
1152 }
1153 if (m != NULL) {
1154 /*
1155 * The page may be deactivated prior to setting
1156 * PGA_REFERENCED, but in this case it will be
1157 * reactivated by the page daemon.
1158 */
1159 if (vm_page_active(m))
1160 vm_page_reference(m);
1161 else
1162 vm_page_activate(m);
1163 vm_page_xunbusy(m);
1164 }
1165
1166 /* Actions on further pages start at offset 0 */
1167 p += PAGE_SIZE - offs;
1168 offs = 0;
1169 ma_offs += len;
1170 }
1171 vm_object_pip_wakeup(sc->object);
1172 return (rv != VM_PAGER_ERROR ? 0 : ENOSPC);
1173 }
1174
1175 static int
1176 mdstart_null(struct md_s *sc, struct bio *bp)
1177 {
1178
1179 switch (bp->bio_cmd) {
1180 case BIO_READ:
1181 bzero(bp->bio_data, bp->bio_length);
1182 cpu_flush_dcache(bp->bio_data, bp->bio_length);
1183 break;
1184 case BIO_WRITE:
1185 break;
1186 }
1187 bp->bio_resid = 0;
1188 return (0);
1189 }
1190
1191 static void
1192 md_handleattr(struct md_s *sc, struct bio *bp)
1193 {
1194 if (sc->fwsectors && sc->fwheads &&
1195 (g_handleattr_int(bp, "GEOM::fwsectors", sc->fwsectors) != 0 ||
1196 g_handleattr_int(bp, "GEOM::fwheads", sc->fwheads) != 0))
1197 return;
1198 if (g_handleattr_int(bp, "GEOM::candelete", 1) != 0)
1199 return;
1200 if (sc->ident[0] != '\0' &&
1201 g_handleattr_str(bp, "GEOM::ident", sc->ident) != 0)
1202 return;
1203 if (g_handleattr_int(bp, "MNT::verified", (sc->flags & MD_VERIFY) != 0))
1204 return;
1205 g_io_deliver(bp, EOPNOTSUPP);
1206 }
1207
1208 static void
1209 md_kthread(void *arg)
1210 {
1211 struct md_s *sc;
1212 struct bio *bp;
1213 int error;
1214
1215 sc = arg;
1216 thread_lock(curthread);
1217 sched_prio(curthread, PRIBIO);
1218 thread_unlock(curthread);
1219 if (sc->type == MD_VNODE)
1220 curthread->td_pflags |= TDP_NORUNNINGBUF;
1221
1222 for (;;) {
1223 mtx_lock(&sc->queue_mtx);
1224 if (sc->flags & MD_SHUTDOWN) {
1225 sc->flags |= MD_EXITING;
1226 mtx_unlock(&sc->queue_mtx);
1227 kproc_exit(0);
1228 }
1229 bp = bioq_takefirst(&sc->bio_queue);
1230 if (!bp) {
1231 msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0);
1232 continue;
1233 }
1234 mtx_unlock(&sc->queue_mtx);
1235 if (bp->bio_cmd == BIO_GETATTR) {
1236 md_handleattr(sc, bp);
1237 } else {
1238 error = sc->start(sc, bp);
1239 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
1240 /*
1241 * Devstat uses (bio_bcount, bio_resid) for
1242 * determining the length of the completed part
1243 * of the i/o. g_io_deliver() will translate
1244 * from bio_completed to that, but it also
1245 * destroys the bio so we must do our own
1246 * translation.
1247 */
1248 bp->bio_bcount = bp->bio_length;
1249 devstat_end_transaction_bio(sc->devstat, bp);
1250 }
1251 bp->bio_completed = bp->bio_length - bp->bio_resid;
1252 g_io_deliver(bp, error);
1253 }
1254 }
1255 }
1256
1257 static struct md_s *
1258 mdfind(int unit)
1259 {
1260 struct md_s *sc;
1261
1262 LIST_FOREACH(sc, &md_softc_list, list) {
1263 if (sc->unit == unit)
1264 break;
1265 }
1266 return (sc);
1267 }
1268
1269 static struct md_s *
1270 mdnew(int unit, int *errp, enum md_types type)
1271 {
1272 struct md_s *sc;
1273 int error;
1274
1275 *errp = 0;
1276 if (unit == -1)
1277 unit = alloc_unr(md_uh);
1278 else
1279 unit = alloc_unr_specific(md_uh, unit);
1280
1281 if (unit == -1) {
1282 *errp = EBUSY;
1283 return (NULL);
1284 }
1285
1286 sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO);
1287 sc->type = type;
1288 bioq_init(&sc->bio_queue);
1289 mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF);
1290 sc->unit = unit;
1291 sprintf(sc->name, "md%d", unit);
1292 LIST_INSERT_HEAD(&md_softc_list, sc, list);
1293 error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name);
1294 if (error == 0)
1295 return (sc);
1296 LIST_REMOVE(sc, list);
1297 mtx_destroy(&sc->queue_mtx);
1298 free_unr(md_uh, sc->unit);
1299 free(sc, M_MD);
1300 *errp = error;
1301 return (NULL);
1302 }
1303
1304 static void
1305 mdinit(struct md_s *sc)
1306 {
1307 struct g_geom *gp;
1308 struct g_provider *pp;
1309
1310 g_topology_lock();
1311 gp = g_new_geomf(&g_md_class, "md%d", sc->unit);
1312 gp->softc = sc;
1313 pp = g_new_providerf(gp, "md%d", sc->unit);
1314 devstat_remove_entry(pp->stat);
1315 pp->stat = NULL;
1316 pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
1317 pp->mediasize = sc->mediasize;
1318 pp->sectorsize = sc->sectorsize;
1319 switch (sc->type) {
1320 case MD_MALLOC:
1321 case MD_VNODE:
1322 case MD_SWAP:
1323 pp->flags |= G_PF_ACCEPT_UNMAPPED;
1324 break;
1325 case MD_PRELOAD:
1326 case MD_NULL:
1327 break;
1328 }
1329 sc->gp = gp;
1330 sc->pp = pp;
1331 sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize,
1332 DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
1333 sc->devstat->id = pp;
1334 g_error_provider(pp, 0);
1335 g_topology_unlock();
1336 }
1337
1338 static int
1339 mdcreate_malloc(struct md_s *sc, struct md_req *mdr)
1340 {
1341 uintptr_t sp;
1342 int error;
1343 off_t u;
1344
1345 error = 0;
1346 if (mdr->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
1347 return (EINVAL);
1348 if (mdr->md_sectorsize != 0 && !powerof2(mdr->md_sectorsize))
1349 return (EINVAL);
1350 /* Compression doesn't make sense if we have reserved space */
1351 if (mdr->md_options & MD_RESERVE)
1352 mdr->md_options &= ~MD_COMPRESS;
1353 if (mdr->md_fwsectors != 0)
1354 sc->fwsectors = mdr->md_fwsectors;
1355 if (mdr->md_fwheads != 0)
1356 sc->fwheads = mdr->md_fwheads;
1357 sc->flags = mdr->md_options & (MD_COMPRESS | MD_FORCE);
1358 sc->indir = dimension(sc->mediasize / sc->sectorsize);
1359 sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL,
1360 0x1ff, 0);
1361 if (mdr->md_options & MD_RESERVE) {
1362 off_t nsectors;
1363
1364 nsectors = sc->mediasize / sc->sectorsize;
1365 for (u = 0; u < nsectors; u++) {
1366 sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ?
1367 M_WAITOK : M_NOWAIT) | M_ZERO);
1368 if (sp != 0)
1369 error = s_write(sc->indir, u, sp);
1370 else
1371 error = ENOMEM;
1372 if (error != 0)
1373 break;
1374 }
1375 }
1376 return (error);
1377 }
1378
1379 static int
1380 mdsetcred(struct md_s *sc, struct ucred *cred)
1381 {
1382 char *tmpbuf;
1383 int error = 0;
1384
1385 /*
1386 * Set credits in our softc
1387 */
1388
1389 if (sc->cred)
1390 crfree(sc->cred);
1391 sc->cred = crhold(cred);
1392
1393 /*
1394 * Horrible kludge to establish credentials for NFS XXX.
1395 */
1396
1397 if (sc->vnode) {
1398 struct uio auio;
1399 struct iovec aiov;
1400
1401 tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK);
1402 bzero(&auio, sizeof(auio));
1403
1404 aiov.iov_base = tmpbuf;
1405 aiov.iov_len = sc->sectorsize;
1406 auio.uio_iov = &aiov;
1407 auio.uio_iovcnt = 1;
1408 auio.uio_offset = 0;
1409 auio.uio_rw = UIO_READ;
1410 auio.uio_segflg = UIO_SYSSPACE;
1411 auio.uio_resid = aiov.iov_len;
1412 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
1413 error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
1414 VOP_UNLOCK(sc->vnode);
1415 free(tmpbuf, M_TEMP);
1416 }
1417 return (error);
1418 }
1419
1420 static int
1421 mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td)
1422 {
1423 struct vattr vattr;
1424 struct nameidata nd;
1425 char *fname;
1426 int error, flags;
1427
1428 fname = mdr->md_file;
1429 if (mdr->md_file_seg == UIO_USERSPACE) {
1430 error = copyinstr(fname, sc->file, sizeof(sc->file), NULL);
1431 if (error != 0)
1432 return (error);
1433 } else if (mdr->md_file_seg == UIO_SYSSPACE)
1434 strlcpy(sc->file, fname, sizeof(sc->file));
1435 else
1436 return (EDOOFUS);
1437
1438 /*
1439 * If the user specified that this is a read only device, don't
1440 * set the FWRITE mask before trying to open the backing store.
1441 */
1442 flags = FREAD | ((mdr->md_options & MD_READONLY) ? 0 : FWRITE) \
1443 | ((mdr->md_options & MD_VERIFY) ? O_VERIFY : 0);
1444 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file, td);
1445 error = vn_open(&nd, &flags, 0, NULL);
1446 if (error != 0)
1447 return (error);
1448 NDFREE(&nd, NDF_ONLY_PNBUF);
1449 if (nd.ni_vp->v_type != VREG) {
1450 error = EINVAL;
1451 goto bad;
1452 }
1453 error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred);
1454 if (error != 0)
1455 goto bad;
1456 if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) {
1457 vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY);
1458 if (VN_IS_DOOMED(nd.ni_vp)) {
1459 /* Forced unmount. */
1460 error = EBADF;
1461 goto bad;
1462 }
1463 }
1464 nd.ni_vp->v_vflag |= VV_MD;
1465 VOP_UNLOCK(nd.ni_vp);
1466
1467 if (mdr->md_fwsectors != 0)
1468 sc->fwsectors = mdr->md_fwsectors;
1469 if (mdr->md_fwheads != 0)
1470 sc->fwheads = mdr->md_fwheads;
1471 snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju",
1472 (uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid);
1473 sc->flags = mdr->md_options & (MD_ASYNC | MD_CACHE | MD_FORCE |
1474 MD_VERIFY);
1475 if (!(flags & FWRITE))
1476 sc->flags |= MD_READONLY;
1477 sc->vnode = nd.ni_vp;
1478
1479 error = mdsetcred(sc, td->td_ucred);
1480 if (error != 0) {
1481 sc->vnode = NULL;
1482 vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY);
1483 nd.ni_vp->v_vflag &= ~VV_MD;
1484 goto bad;
1485 }
1486 return (0);
1487 bad:
1488 VOP_UNLOCK(nd.ni_vp);
1489 (void)vn_close(nd.ni_vp, flags, td->td_ucred, td);
1490 return (error);
1491 }
1492
1493 static void
1494 g_md_providergone(struct g_provider *pp)
1495 {
1496 struct md_s *sc = pp->geom->softc;
1497
1498 mtx_lock(&sc->queue_mtx);
1499 sc->flags |= MD_PROVIDERGONE;
1500 wakeup(&sc->flags);
1501 mtx_unlock(&sc->queue_mtx);
1502 }
1503
1504 static int
1505 mddestroy(struct md_s *sc, struct thread *td)
1506 {
1507
1508 if (sc->gp) {
1509 g_topology_lock();
1510 g_wither_geom(sc->gp, ENXIO);
1511 g_topology_unlock();
1512
1513 mtx_lock(&sc->queue_mtx);
1514 while (!(sc->flags & MD_PROVIDERGONE))
1515 msleep(&sc->flags, &sc->queue_mtx, PRIBIO, "mddestroy", 0);
1516 mtx_unlock(&sc->queue_mtx);
1517 }
1518 if (sc->devstat) {
1519 devstat_remove_entry(sc->devstat);
1520 sc->devstat = NULL;
1521 }
1522 mtx_lock(&sc->queue_mtx);
1523 sc->flags |= MD_SHUTDOWN;
1524 wakeup(sc);
1525 while (!(sc->flags & MD_EXITING))
1526 msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10);
1527 mtx_unlock(&sc->queue_mtx);
1528 mtx_destroy(&sc->queue_mtx);
1529 if (sc->vnode != NULL) {
1530 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
1531 sc->vnode->v_vflag &= ~VV_MD;
1532 VOP_UNLOCK(sc->vnode);
1533 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
1534 FREAD : (FREAD|FWRITE), sc->cred, td);
1535 }
1536 if (sc->cred != NULL)
1537 crfree(sc->cred);
1538 if (sc->object != NULL)
1539 vm_object_deallocate(sc->object);
1540 if (sc->indir)
1541 destroy_indir(sc, sc->indir);
1542 if (sc->uma)
1543 uma_zdestroy(sc->uma);
1544
1545 LIST_REMOVE(sc, list);
1546 free_unr(md_uh, sc->unit);
1547 free(sc, M_MD);
1548 return (0);
1549 }
1550
1551 static int
1552 mdresize(struct md_s *sc, struct md_req *mdr)
1553 {
1554 int error, res;
1555 vm_pindex_t oldpages, newpages;
1556
1557 switch (sc->type) {
1558 case MD_VNODE:
1559 case MD_NULL:
1560 break;
1561 case MD_SWAP:
1562 if (mdr->md_mediasize <= 0 ||
1563 (mdr->md_mediasize % PAGE_SIZE) != 0)
1564 return (EDOM);
1565 oldpages = OFF_TO_IDX(sc->mediasize);
1566 newpages = OFF_TO_IDX(mdr->md_mediasize);
1567 if (newpages < oldpages) {
1568 VM_OBJECT_WLOCK(sc->object);
1569 vm_object_page_remove(sc->object, newpages, 0, 0);
1570 swap_release_by_cred(IDX_TO_OFF(oldpages -
1571 newpages), sc->cred);
1572 sc->object->charge = IDX_TO_OFF(newpages);
1573 sc->object->size = newpages;
1574 VM_OBJECT_WUNLOCK(sc->object);
1575 } else if (newpages > oldpages) {
1576 res = swap_reserve_by_cred(IDX_TO_OFF(newpages -
1577 oldpages), sc->cred);
1578 if (!res)
1579 return (ENOMEM);
1580 if ((mdr->md_options & MD_RESERVE) ||
1581 (sc->flags & MD_RESERVE)) {
1582 error = swap_pager_reserve(sc->object,
1583 oldpages, newpages - oldpages);
1584 if (error < 0) {
1585 swap_release_by_cred(
1586 IDX_TO_OFF(newpages - oldpages),
1587 sc->cred);
1588 return (EDOM);
1589 }
1590 }
1591 VM_OBJECT_WLOCK(sc->object);
1592 sc->object->charge = IDX_TO_OFF(newpages);
1593 sc->object->size = newpages;
1594 VM_OBJECT_WUNLOCK(sc->object);
1595 }
1596 break;
1597 default:
1598 return (EOPNOTSUPP);
1599 }
1600
1601 sc->mediasize = mdr->md_mediasize;
1602
1603 g_topology_lock();
1604 g_resize_provider(sc->pp, sc->mediasize);
1605 g_topology_unlock();
1606 return (0);
1607 }
1608
1609 static int
1610 mdcreate_swap(struct md_s *sc, struct md_req *mdr, struct thread *td)
1611 {
1612 vm_ooffset_t npage;
1613 int error;
1614
1615 /*
1616 * Range check. Disallow negative sizes and sizes not being
1617 * multiple of page size.
1618 */
1619 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0)
1620 return (EDOM);
1621
1622 /*
1623 * Allocate an OBJT_SWAP object.
1624 *
1625 * Note the truncation.
1626 */
1627
1628 if ((mdr->md_options & MD_VERIFY) != 0)
1629 return (EINVAL);
1630 npage = mdr->md_mediasize / PAGE_SIZE;
1631 if (mdr->md_fwsectors != 0)
1632 sc->fwsectors = mdr->md_fwsectors;
1633 if (mdr->md_fwheads != 0)
1634 sc->fwheads = mdr->md_fwheads;
1635 sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage,
1636 VM_PROT_DEFAULT, 0, td->td_ucred);
1637 if (sc->object == NULL)
1638 return (ENOMEM);
1639 sc->flags = mdr->md_options & (MD_FORCE | MD_RESERVE);
1640 if (mdr->md_options & MD_RESERVE) {
1641 if (swap_pager_reserve(sc->object, 0, npage) < 0) {
1642 error = EDOM;
1643 goto finish;
1644 }
1645 }
1646 error = mdsetcred(sc, td->td_ucred);
1647 finish:
1648 if (error != 0) {
1649 vm_object_deallocate(sc->object);
1650 sc->object = NULL;
1651 }
1652 return (error);
1653 }
1654
1655 static int
1656 mdcreate_null(struct md_s *sc, struct md_req *mdr, struct thread *td)
1657 {
1658
1659 /*
1660 * Range check. Disallow negative sizes and sizes not being
1661 * multiple of page size.
1662 */
1663 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0)
1664 return (EDOM);
1665
1666 return (0);
1667 }
1668
1669 static int
1670 kern_mdattach_locked(struct thread *td, struct md_req *mdr)
1671 {
1672 struct md_s *sc;
1673 unsigned sectsize;
1674 int error, i;
1675
1676 sx_assert(&md_sx, SA_XLOCKED);
1677
1678 switch (mdr->md_type) {
1679 case MD_MALLOC:
1680 case MD_PRELOAD:
1681 case MD_VNODE:
1682 case MD_SWAP:
1683 case MD_NULL:
1684 break;
1685 default:
1686 return (EINVAL);
1687 }
1688 if (mdr->md_sectorsize == 0)
1689 sectsize = DEV_BSIZE;
1690 else
1691 sectsize = mdr->md_sectorsize;
1692 if (sectsize > maxphys || mdr->md_mediasize < sectsize)
1693 return (EINVAL);
1694 if (mdr->md_options & MD_AUTOUNIT)
1695 sc = mdnew(-1, &error, mdr->md_type);
1696 else {
1697 if (mdr->md_unit > INT_MAX)
1698 return (EINVAL);
1699 sc = mdnew(mdr->md_unit, &error, mdr->md_type);
1700 }
1701 if (sc == NULL)
1702 return (error);
1703 if (mdr->md_label != NULL)
1704 error = copyinstr(mdr->md_label, sc->label,
1705 sizeof(sc->label), NULL);
1706 if (error != 0)
1707 goto err_after_new;
1708 if (mdr->md_options & MD_AUTOUNIT)
1709 mdr->md_unit = sc->unit;
1710 sc->mediasize = mdr->md_mediasize;
1711 sc->sectorsize = sectsize;
1712 error = EDOOFUS;
1713 switch (sc->type) {
1714 case MD_MALLOC:
1715 sc->start = mdstart_malloc;
1716 error = mdcreate_malloc(sc, mdr);
1717 break;
1718 case MD_PRELOAD:
1719 /*
1720 * We disallow attaching preloaded memory disks via
1721 * ioctl. Preloaded memory disks are automatically
1722 * attached in g_md_init().
1723 */
1724 error = EOPNOTSUPP;
1725 break;
1726 case MD_VNODE:
1727 sc->start = mdstart_vnode;
1728 error = mdcreate_vnode(sc, mdr, td);
1729 break;
1730 case MD_SWAP:
1731 sc->start = mdstart_swap;
1732 error = mdcreate_swap(sc, mdr, td);
1733 break;
1734 case MD_NULL:
1735 sc->start = mdstart_null;
1736 error = mdcreate_null(sc, mdr, td);
1737 break;
1738 }
1739 err_after_new:
1740 if (error != 0) {
1741 mddestroy(sc, td);
1742 return (error);
1743 }
1744
1745 /* Prune off any residual fractional sector */
1746 i = sc->mediasize % sc->sectorsize;
1747 sc->mediasize -= i;
1748
1749 mdinit(sc);
1750 return (0);
1751 }
1752
1753 static int
1754 kern_mdattach(struct thread *td, struct md_req *mdr)
1755 {
1756 int error;
1757
1758 sx_xlock(&md_sx);
1759 error = kern_mdattach_locked(td, mdr);
1760 sx_xunlock(&md_sx);
1761 return (error);
1762 }
1763
1764 static int
1765 kern_mddetach_locked(struct thread *td, struct md_req *mdr)
1766 {
1767 struct md_s *sc;
1768
1769 sx_assert(&md_sx, SA_XLOCKED);
1770
1771 if (mdr->md_mediasize != 0 ||
1772 (mdr->md_options & ~MD_FORCE) != 0)
1773 return (EINVAL);
1774
1775 sc = mdfind(mdr->md_unit);
1776 if (sc == NULL)
1777 return (ENOENT);
1778 if (sc->opencount != 0 && !(sc->flags & MD_FORCE) &&
1779 !(mdr->md_options & MD_FORCE))
1780 return (EBUSY);
1781 return (mddestroy(sc, td));
1782 }
1783
1784 static int
1785 kern_mddetach(struct thread *td, struct md_req *mdr)
1786 {
1787 int error;
1788
1789 sx_xlock(&md_sx);
1790 error = kern_mddetach_locked(td, mdr);
1791 sx_xunlock(&md_sx);
1792 return (error);
1793 }
1794
1795 static int
1796 kern_mdresize_locked(struct md_req *mdr)
1797 {
1798 struct md_s *sc;
1799
1800 sx_assert(&md_sx, SA_XLOCKED);
1801
1802 if ((mdr->md_options & ~(MD_FORCE | MD_RESERVE)) != 0)
1803 return (EINVAL);
1804
1805 sc = mdfind(mdr->md_unit);
1806 if (sc == NULL)
1807 return (ENOENT);
1808 if (mdr->md_mediasize < sc->sectorsize)
1809 return (EINVAL);
1810 mdr->md_mediasize -= mdr->md_mediasize % sc->sectorsize;
1811 if (mdr->md_mediasize < sc->mediasize &&
1812 !(sc->flags & MD_FORCE) &&
1813 !(mdr->md_options & MD_FORCE))
1814 return (EBUSY);
1815 return (mdresize(sc, mdr));
1816 }
1817
1818 static int
1819 kern_mdresize(struct md_req *mdr)
1820 {
1821 int error;
1822
1823 sx_xlock(&md_sx);
1824 error = kern_mdresize_locked(mdr);
1825 sx_xunlock(&md_sx);
1826 return (error);
1827 }
1828
1829 static int
1830 kern_mdquery_locked(struct md_req *mdr)
1831 {
1832 struct md_s *sc;
1833 int error;
1834
1835 sx_assert(&md_sx, SA_XLOCKED);
1836
1837 sc = mdfind(mdr->md_unit);
1838 if (sc == NULL)
1839 return (ENOENT);
1840 mdr->md_type = sc->type;
1841 mdr->md_options = sc->flags;
1842 mdr->md_mediasize = sc->mediasize;
1843 mdr->md_sectorsize = sc->sectorsize;
1844 error = 0;
1845 if (mdr->md_label != NULL) {
1846 error = copyout(sc->label, mdr->md_label,
1847 strlen(sc->label) + 1);
1848 if (error != 0)
1849 return (error);
1850 }
1851 if (sc->type == MD_VNODE ||
1852 (sc->type == MD_PRELOAD && mdr->md_file != NULL))
1853 error = copyout(sc->file, mdr->md_file,
1854 strlen(sc->file) + 1);
1855 return (error);
1856 }
1857
1858 static int
1859 kern_mdquery(struct md_req *mdr)
1860 {
1861 int error;
1862
1863 sx_xlock(&md_sx);
1864 error = kern_mdquery_locked(mdr);
1865 sx_xunlock(&md_sx);
1866 return (error);
1867 }
1868
1869 /* Copy members that are not userspace pointers. */
1870 #define MD_IOCTL2REQ(mdio, mdr) do { \
1871 (mdr)->md_unit = (mdio)->md_unit; \
1872 (mdr)->md_type = (mdio)->md_type; \
1873 (mdr)->md_mediasize = (mdio)->md_mediasize; \
1874 (mdr)->md_sectorsize = (mdio)->md_sectorsize; \
1875 (mdr)->md_options = (mdio)->md_options; \
1876 (mdr)->md_fwheads = (mdio)->md_fwheads; \
1877 (mdr)->md_fwsectors = (mdio)->md_fwsectors; \
1878 (mdr)->md_units = &(mdio)->md_pad[0]; \
1879 (mdr)->md_units_nitems = nitems((mdio)->md_pad); \
1880 } while(0)
1881
1882 /* Copy members that might have been updated */
1883 #define MD_REQ2IOCTL(mdr, mdio) do { \
1884 (mdio)->md_unit = (mdr)->md_unit; \
1885 (mdio)->md_type = (mdr)->md_type; \
1886 (mdio)->md_mediasize = (mdr)->md_mediasize; \
1887 (mdio)->md_sectorsize = (mdr)->md_sectorsize; \
1888 (mdio)->md_options = (mdr)->md_options; \
1889 (mdio)->md_fwheads = (mdr)->md_fwheads; \
1890 (mdio)->md_fwsectors = (mdr)->md_fwsectors; \
1891 } while(0)
1892
1893 static int
1894 mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1895 struct thread *td)
1896 {
1897 struct md_req mdr;
1898 int error;
1899
1900 if (md_debug)
1901 printf("mdctlioctl(%s %lx %p %x %p)\n",
1902 devtoname(dev), cmd, addr, flags, td);
1903
1904 bzero(&mdr, sizeof(mdr));
1905 switch (cmd) {
1906 case MDIOCATTACH:
1907 case MDIOCDETACH:
1908 case MDIOCRESIZE:
1909 case MDIOCQUERY: {
1910 struct md_ioctl *mdio = (struct md_ioctl *)addr;
1911 if (mdio->md_version != MDIOVERSION)
1912 return (EINVAL);
1913 MD_IOCTL2REQ(mdio, &mdr);
1914 mdr.md_file = mdio->md_file;
1915 mdr.md_file_seg = UIO_USERSPACE;
1916 /* If the file is adjacent to the md_ioctl it's in kernel. */
1917 if ((void *)mdio->md_file == (void *)(mdio + 1))
1918 mdr.md_file_seg = UIO_SYSSPACE;
1919 mdr.md_label = mdio->md_label;
1920 break;
1921 }
1922 #ifdef COMPAT_FREEBSD32
1923 case MDIOCATTACH_32:
1924 case MDIOCDETACH_32:
1925 case MDIOCRESIZE_32:
1926 case MDIOCQUERY_32: {
1927 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr;
1928 if (mdio->md_version != MDIOVERSION)
1929 return (EINVAL);
1930 MD_IOCTL2REQ(mdio, &mdr);
1931 mdr.md_file = (void *)(uintptr_t)mdio->md_file;
1932 mdr.md_file_seg = UIO_USERSPACE;
1933 mdr.md_label = (void *)(uintptr_t)mdio->md_label;
1934 break;
1935 }
1936 #endif
1937 default:
1938 /* Fall through to handler switch. */
1939 break;
1940 }
1941
1942 error = 0;
1943 switch (cmd) {
1944 case MDIOCATTACH:
1945 #ifdef COMPAT_FREEBSD32
1946 case MDIOCATTACH_32:
1947 #endif
1948 error = kern_mdattach(td, &mdr);
1949 break;
1950 case MDIOCDETACH:
1951 #ifdef COMPAT_FREEBSD32
1952 case MDIOCDETACH_32:
1953 #endif
1954 error = kern_mddetach(td, &mdr);
1955 break;
1956 case MDIOCRESIZE:
1957 #ifdef COMPAT_FREEBSD32
1958 case MDIOCRESIZE_32:
1959 #endif
1960 error = kern_mdresize(&mdr);
1961 break;
1962 case MDIOCQUERY:
1963 #ifdef COMPAT_FREEBSD32
1964 case MDIOCQUERY_32:
1965 #endif
1966 error = kern_mdquery(&mdr);
1967 break;
1968 default:
1969 error = ENOIOCTL;
1970 }
1971
1972 switch (cmd) {
1973 case MDIOCATTACH:
1974 case MDIOCQUERY: {
1975 struct md_ioctl *mdio = (struct md_ioctl *)addr;
1976 MD_REQ2IOCTL(&mdr, mdio);
1977 break;
1978 }
1979 #ifdef COMPAT_FREEBSD32
1980 case MDIOCATTACH_32:
1981 case MDIOCQUERY_32: {
1982 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr;
1983 MD_REQ2IOCTL(&mdr, mdio);
1984 break;
1985 }
1986 #endif
1987 default:
1988 /* Other commands to not alter mdr. */
1989 break;
1990 }
1991
1992 return (error);
1993 }
1994
1995 static void
1996 md_preloaded(u_char *image, size_t length, const char *name)
1997 {
1998 struct md_s *sc;
1999 int error;
2000
2001 sc = mdnew(-1, &error, MD_PRELOAD);
2002 if (sc == NULL)
2003 return;
2004 sc->mediasize = length;
2005 sc->sectorsize = DEV_BSIZE;
2006 sc->pl_ptr = image;
2007 sc->pl_len = length;
2008 sc->start = mdstart_preload;
2009 if (name != NULL)
2010 strlcpy(sc->file, name, sizeof(sc->file));
2011 #ifdef MD_ROOT
2012 if (sc->unit == 0) {
2013 #ifndef ROOTDEVNAME
2014 rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0";
2015 #endif
2016 #ifdef MD_ROOT_READONLY
2017 sc->flags |= MD_READONLY;
2018 #endif
2019 }
2020 #endif
2021 mdinit(sc);
2022 if (name != NULL) {
2023 printf("%s%d: Preloaded image <%s> %zd bytes at %p\n",
2024 MD_NAME, sc->unit, name, length, image);
2025 } else {
2026 printf("%s%d: Embedded image %zd bytes at %p\n",
2027 MD_NAME, sc->unit, length, image);
2028 }
2029 }
2030
2031 static void
2032 g_md_init(struct g_class *mp __unused)
2033 {
2034 caddr_t mod;
2035 u_char *ptr, *name, *type;
2036 unsigned len;
2037 int i;
2038
2039 /* figure out log2(NINDIR) */
2040 for (i = NINDIR, nshift = -1; i; nshift++)
2041 i >>= 1;
2042
2043 mod = NULL;
2044 sx_init(&md_sx, "MD config lock");
2045 g_topology_unlock();
2046 md_uh = new_unrhdr(0, INT_MAX, NULL);
2047 #ifdef MD_ROOT
2048 if (mfs_root_size != 0) {
2049 sx_xlock(&md_sx);
2050 #ifdef MD_ROOT_MEM
2051 md_preloaded(mfs_root, mfs_root_size, NULL);
2052 #else
2053 md_preloaded(__DEVOLATILE(u_char *, &mfs_root), mfs_root_size,
2054 NULL);
2055 #endif
2056 sx_xunlock(&md_sx);
2057 }
2058 #endif
2059 /* XXX: are preload_* static or do they need Giant ? */
2060 while ((mod = preload_search_next_name(mod)) != NULL) {
2061 name = (char *)preload_search_info(mod, MODINFO_NAME);
2062 if (name == NULL)
2063 continue;
2064 type = (char *)preload_search_info(mod, MODINFO_TYPE);
2065 if (type == NULL)
2066 continue;
2067 if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
2068 continue;
2069 ptr = preload_fetch_addr(mod);
2070 len = preload_fetch_size(mod);
2071 if (ptr != NULL && len != 0) {
2072 sx_xlock(&md_sx);
2073 md_preloaded(ptr, len, name);
2074 sx_xunlock(&md_sx);
2075 }
2076 }
2077 md_pbuf_zone = pbuf_zsecond_create("mdpbuf", nswbuf / 10);
2078 status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
2079 0600, MDCTL_NAME);
2080 g_topology_lock();
2081 }
2082
2083 static void
2084 g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2085 struct g_consumer *cp __unused, struct g_provider *pp)
2086 {
2087 struct md_s *mp;
2088 char *type;
2089
2090 mp = gp->softc;
2091 if (mp == NULL)
2092 return;
2093
2094 switch (mp->type) {
2095 case MD_MALLOC:
2096 type = "malloc";
2097 break;
2098 case MD_PRELOAD:
2099 type = "preload";
2100 break;
2101 case MD_VNODE:
2102 type = "vnode";
2103 break;
2104 case MD_SWAP:
2105 type = "swap";
2106 break;
2107 case MD_NULL:
2108 type = "null";
2109 break;
2110 default:
2111 type = "unknown";
2112 break;
2113 }
2114
2115 if (pp != NULL) {
2116 if (indent == NULL) {
2117 sbuf_printf(sb, " u %d", mp->unit);
2118 sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize);
2119 sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads);
2120 sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors);
2121 sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize);
2122 sbuf_printf(sb, " t %s", type);
2123 if ((mp->type == MD_VNODE && mp->vnode != NULL) ||
2124 (mp->type == MD_PRELOAD && mp->file[0] != '\0'))
2125 sbuf_printf(sb, " file %s", mp->file);
2126 sbuf_printf(sb, " label %s", mp->label);
2127 } else {
2128 sbuf_printf(sb, "%s<unit>%d</unit>\n", indent,
2129 mp->unit);
2130 sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n",
2131 indent, (uintmax_t) mp->sectorsize);
2132 sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n",
2133 indent, (uintmax_t) mp->fwheads);
2134 sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n",
2135 indent, (uintmax_t) mp->fwsectors);
2136 if (mp->ident[0] != '\0') {
2137 sbuf_printf(sb, "%s<ident>", indent);
2138 g_conf_printf_escaped(sb, "%s", mp->ident);
2139 sbuf_printf(sb, "</ident>\n");
2140 }
2141 sbuf_printf(sb, "%s<length>%ju</length>\n",
2142 indent, (uintmax_t) mp->mediasize);
2143 sbuf_printf(sb, "%s<compression>%s</compression>\n", indent,
2144 (mp->flags & MD_COMPRESS) == 0 ? "off": "on");
2145 sbuf_printf(sb, "%s<access>%s</access>\n", indent,
2146 (mp->flags & MD_READONLY) == 0 ? "read-write":
2147 "read-only");
2148 sbuf_printf(sb, "%s<type>%s</type>\n", indent,
2149 type);
2150 if ((mp->type == MD_VNODE && mp->vnode != NULL) ||
2151 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) {
2152 sbuf_printf(sb, "%s<file>", indent);
2153 g_conf_printf_escaped(sb, "%s", mp->file);
2154 sbuf_printf(sb, "</file>\n");
2155 }
2156 if (mp->type == MD_VNODE)
2157 sbuf_printf(sb, "%s<cache>%s</cache>\n", indent,
2158 (mp->flags & MD_CACHE) == 0 ? "off": "on");
2159 sbuf_printf(sb, "%s<label>", indent);
2160 g_conf_printf_escaped(sb, "%s", mp->label);
2161 sbuf_printf(sb, "</label>\n");
2162 }
2163 }
2164 }
2165
2166 static void
2167 g_md_fini(struct g_class *mp __unused)
2168 {
2169
2170 sx_destroy(&md_sx);
2171 if (status_dev != NULL)
2172 destroy_dev(status_dev);
2173 uma_zdestroy(md_pbuf_zone);
2174 delete_unrhdr(md_uh);
2175 }
Cache object: ec322eec9b34d9edefc11412c469aebe
|