FreeBSD/Linux Kernel Cross Reference
sys/geom/geom_bsd.c
1 /*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 * products derived from this software without specific prior written
21 * permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 /*
37 * This is the method for dealing with BSD disklabels. It has been
38 * extensively (by my standards at least) commented, in the vain hope that
39 * it will serve as the source in future copy&paste operations.
40 */
41
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD: releng/6.0/sys/geom/geom_bsd.c 143719 2005-03-16 20:48:13Z pjd $");
44
45 #include <sys/param.h>
46 #include <sys/endian.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/fcntl.h>
50 #include <sys/conf.h>
51 #include <sys/bio.h>
52 #include <sys/malloc.h>
53 #include <sys/lock.h>
54 #include <sys/mutex.h>
55 #include <sys/md5.h>
56 #include <sys/errno.h>
57 #include <sys/disklabel.h>
58 #include <geom/geom.h>
59 #include <geom/geom_slice.h>
60
61 #define BSD_CLASS_NAME "BSD"
62
63 #define ALPHA_LABEL_OFFSET 64
64
65 #define LABELSIZE (148 + 16 * MAXPARTITIONS)
66
67 static void g_bsd_hotwrite(void *arg, int flag);
68 /*
69 * Our private data about one instance. All the rest is handled by the
70 * slice code and stored in its softc, so this is just the stuff
71 * specific to BSD disklabels.
72 */
73 struct g_bsd_softc {
74 off_t labeloffset;
75 off_t mbroffset;
76 off_t rawoffset;
77 struct disklabel ondisk;
78 u_char label[LABELSIZE];
79 u_char labelsum[16];
80 };
81
82 /*
83 * Modify our slicer to match proposed disklabel, if possible.
84 * This is where we make sure we don't do something stupid.
85 */
86 static int
87 g_bsd_modify(struct g_geom *gp, u_char *label)
88 {
89 int i, error;
90 struct partition *ppp;
91 struct g_slicer *gsp;
92 struct g_consumer *cp;
93 struct g_bsd_softc *ms;
94 u_int secsize, u;
95 off_t rawoffset, o;
96 struct disklabel dl;
97 MD5_CTX md5sum;
98
99 g_topology_assert();
100 gsp = gp->softc;
101 ms = gsp->softc;
102
103 error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS);
104 if (error) {
105 return (error);
106 }
107
108 /* Get dimensions of our device. */
109 cp = LIST_FIRST(&gp->consumer);
110 secsize = cp->provider->sectorsize;
111
112 /* ... or a smaller sector size. */
113 if (dl.d_secsize < secsize) {
114 return (EINVAL);
115 }
116
117 /* ... or a non-multiple sector size. */
118 if (dl.d_secsize % secsize != 0) {
119 return (EINVAL);
120 }
121
122 /* Historical braindamage... */
123 rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize;
124
125 for (i = 0; i < dl.d_npartitions; i++) {
126 ppp = &dl.d_partitions[i];
127 if (ppp->p_size == 0)
128 continue;
129 o = (off_t)ppp->p_offset * dl.d_secsize;
130
131 if (o < rawoffset)
132 rawoffset = 0;
133 }
134
135 if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset)
136 printf("WARNING: Expected rawoffset %jd, found %jd\n",
137 (intmax_t)ms->mbroffset/dl.d_secsize,
138 (intmax_t)rawoffset/dl.d_secsize);
139
140 /* Don't munge open partitions. */
141 for (i = 0; i < dl.d_npartitions; i++) {
142 ppp = &dl.d_partitions[i];
143
144 o = (off_t)ppp->p_offset * dl.d_secsize;
145 if (o == 0)
146 o = rawoffset;
147 error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
148 o - rawoffset,
149 (off_t)ppp->p_size * dl.d_secsize,
150 dl.d_secsize,
151 "%s%c", gp->name, 'a' + i);
152 if (error)
153 return (error);
154 }
155
156 /* Look good, go for it... */
157 for (u = 0; u < gsp->nslice; u++) {
158 ppp = &dl.d_partitions[u];
159 o = (off_t)ppp->p_offset * dl.d_secsize;
160 if (o == 0)
161 o = rawoffset;
162 g_slice_config(gp, u, G_SLICE_CONFIG_SET,
163 o - rawoffset,
164 (off_t)ppp->p_size * dl.d_secsize,
165 dl.d_secsize,
166 "%s%c", gp->name, 'a' + u);
167 }
168
169 /* Update our softc */
170 ms->ondisk = dl;
171 if (label != ms->label)
172 bcopy(label, ms->label, LABELSIZE);
173 ms->rawoffset = rawoffset;
174
175 /*
176 * In order to avoid recursively attaching to the same
177 * on-disk label (it's usually visible through the 'c'
178 * partition) we calculate an MD5 and ask if other BSD's
179 * below us love that label. If they do, we don't.
180 */
181 MD5Init(&md5sum);
182 MD5Update(&md5sum, ms->label, sizeof(ms->label));
183 MD5Final(ms->labelsum, &md5sum);
184
185 return (0);
186 }
187
188 /*
189 * This is an internal helper function, called multiple times from the taste
190 * function to try to locate a disklabel on the disk. More civilized formats
191 * will not need this, as there is only one possible place on disk to look
192 * for the magic spot.
193 */
194
195 static int
196 g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset)
197 {
198 int error;
199 u_char *buf;
200 struct disklabel *dl;
201 off_t secoff;
202
203 /*
204 * We need to read entire aligned sectors, and we assume that the
205 * disklabel does not span sectors, so one sector is enough.
206 */
207 error = 0;
208 secoff = offset % secsize;
209 buf = g_read_data(cp, offset - secoff, secsize, &error);
210 if (buf == NULL || error != 0)
211 return (ENOENT);
212
213 /* Decode into our native format. */
214 dl = &ms->ondisk;
215 error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS);
216 if (!error)
217 bcopy(buf + secoff, ms->label, LABELSIZE);
218
219 /* Remember to free the buffer g_read_data() gave us. */
220 g_free(buf);
221
222 ms->labeloffset = offset;
223 return (error);
224 }
225
226 /*
227 * This function writes the current label to disk, possibly updating
228 * the alpha SRM checksum.
229 */
230
231 static int
232 g_bsd_writelabel(struct g_geom *gp, u_char *bootcode)
233 {
234 off_t secoff;
235 u_int secsize;
236 struct g_consumer *cp;
237 struct g_slicer *gsp;
238 struct g_bsd_softc *ms;
239 u_char *buf;
240 uint64_t sum;
241 int error, i;
242
243 gsp = gp->softc;
244 ms = gsp->softc;
245 cp = LIST_FIRST(&gp->consumer);
246 /* Get sector size, we need it to read data. */
247 secsize = cp->provider->sectorsize;
248 secoff = ms->labeloffset % secsize;
249 if (bootcode == NULL) {
250 buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error);
251 if (buf == NULL || error != 0)
252 return (error);
253 bcopy(ms->label, buf + secoff, sizeof(ms->label));
254 } else {
255 buf = bootcode;
256 bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label));
257 }
258 if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
259 sum = 0;
260 for (i = 0; i < 63; i++)
261 sum += le64dec(buf + i * 8);
262 le64enc(buf + 504, sum);
263 }
264 if (bootcode == NULL) {
265 error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize);
266 g_free(buf);
267 } else {
268 error = g_write_data(cp, 0, bootcode, BBSIZE);
269 }
270 return(error);
271 }
272
273 /*
274 * If the user tries to overwrite our disklabel through an open partition
275 * or via a magicwrite config call, we end up here and try to prevent
276 * footshooting as best we can.
277 */
278 static void
279 g_bsd_hotwrite(void *arg, int flag)
280 {
281 struct bio *bp;
282 struct g_geom *gp;
283 struct g_slicer *gsp;
284 struct g_slice *gsl;
285 struct g_bsd_softc *ms;
286 u_char *p;
287 int error;
288
289 g_topology_assert();
290 /*
291 * We should never get canceled, because that would amount to a removal
292 * of the geom while there was outstanding I/O requests.
293 */
294 KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled"));
295 bp = arg;
296 gp = bp->bio_to->geom;
297 gsp = gp->softc;
298 ms = gsp->softc;
299 gsl = &gsp->slices[bp->bio_to->index];
300 p = (u_char*)bp->bio_data + ms->labeloffset
301 - (bp->bio_offset + gsl->offset);
302 error = g_bsd_modify(gp, p);
303 if (error) {
304 g_io_deliver(bp, EPERM);
305 return;
306 }
307 g_slice_finish_hot(bp);
308 }
309
310 /*-
311 * This start routine is only called for non-trivial requests, all the
312 * trivial ones are handled autonomously by the slice code.
313 * For requests we handle here, we must call the g_io_deliver() on the
314 * bio, and return non-zero to indicate to the slice code that we did so.
315 * This code executes in the "DOWN" I/O path, this means:
316 * * No sleeping.
317 * * Don't grab the topology lock.
318 * * Don't call biowait, g_getattr(), g_setattr() or g_read_data()
319 */
320 static int
321 g_bsd_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
322 {
323 struct g_geom *gp;
324 struct g_bsd_softc *ms;
325 struct g_slicer *gsp;
326 u_char *label;
327 int error;
328
329 gp = pp->geom;
330 gsp = gp->softc;
331 ms = gsp->softc;
332
333 switch(cmd) {
334 case DIOCGDINFO:
335 /* Return a copy of the disklabel to userland. */
336 bsd_disklabel_le_dec(ms->label, data, MAXPARTITIONS);
337 return(0);
338 case DIOCBSDBB: {
339 struct g_consumer *cp;
340 u_char *buf;
341 void *p;
342 int error, i;
343 uint64_t sum;
344
345 if (!(fflag & FWRITE))
346 return (EPERM);
347 /* The disklabel to set is the ioctl argument. */
348 buf = g_malloc(BBSIZE, M_WAITOK);
349 p = *(void **)data;
350 error = copyin(p, buf, BBSIZE);
351 if (!error) {
352 /* XXX: Rude, but supposedly safe */
353 DROP_GIANT();
354 g_topology_lock();
355 /* Validate and modify our slice instance to match. */
356 error = g_bsd_modify(gp, buf + ms->labeloffset);
357 if (!error) {
358 cp = LIST_FIRST(&gp->consumer);
359 if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
360 sum = 0;
361 for (i = 0; i < 63; i++)
362 sum += le64dec(buf + i * 8);
363 le64enc(buf + 504, sum);
364 }
365 error = g_write_data(cp, 0, buf, BBSIZE);
366 }
367 g_topology_unlock();
368 PICKUP_GIANT();
369 }
370 g_free(buf);
371 return (error);
372 }
373 case DIOCSDINFO:
374 case DIOCWDINFO: {
375 if (!(fflag & FWRITE))
376 return (EPERM);
377 label = g_malloc(LABELSIZE, M_WAITOK);
378 /* The disklabel to set is the ioctl argument. */
379 bsd_disklabel_le_enc(label, data);
380
381 DROP_GIANT();
382 g_topology_lock();
383 /* Validate and modify our slice instance to match. */
384 error = g_bsd_modify(gp, label);
385 if (error == 0 && cmd == DIOCWDINFO)
386 error = g_bsd_writelabel(gp, NULL);
387 g_topology_unlock();
388 PICKUP_GIANT();
389 g_free(label);
390 return(error);
391 }
392 default:
393 return (ENOIOCTL);
394 }
395 }
396
397 static int
398 g_bsd_start(struct bio *bp)
399 {
400 struct g_geom *gp;
401 struct g_bsd_softc *ms;
402 struct g_slicer *gsp;
403
404 gp = bp->bio_to->geom;
405 gsp = gp->softc;
406 ms = gsp->softc;
407 if (bp->bio_cmd == BIO_GETATTR) {
408 if (g_handleattr(bp, "BSD::labelsum", ms->labelsum,
409 sizeof(ms->labelsum)))
410 return (1);
411 }
412 return (0);
413 }
414
415 /*
416 * Dump configuration information in XML format.
417 * Notice that the function is called once for the geom and once for each
418 * consumer and provider. We let g_slice_dumpconf() do most of the work.
419 */
420 static void
421 g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
422 {
423 struct g_bsd_softc *ms;
424 struct g_slicer *gsp;
425
426 gsp = gp->softc;
427 ms = gsp->softc;
428 g_slice_dumpconf(sb, indent, gp, cp, pp);
429 if (indent != NULL && pp == NULL && cp == NULL) {
430 sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n",
431 indent, (intmax_t)ms->labeloffset);
432 sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n",
433 indent, (intmax_t)ms->rawoffset);
434 sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n",
435 indent, (intmax_t)ms->mbroffset);
436 } else if (pp != NULL) {
437 if (indent == NULL)
438 sbuf_printf(sb, " ty %d",
439 ms->ondisk.d_partitions[pp->index].p_fstype);
440 else
441 sbuf_printf(sb, "%s<type>%d</type>\n", indent,
442 ms->ondisk.d_partitions[pp->index].p_fstype);
443 }
444 }
445
446 /*
447 * The taste function is called from the event-handler, with the topology
448 * lock already held and a provider to examine. The flags are unused.
449 *
450 * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
451 * if we find valid, consistent magic on it, build a geom on it.
452 * any magic bits which indicate that we should automatically put a BSD
453 * geom on it.
454 *
455 * There may be cases where the operator would like to put a BSD-geom on
456 * providers which do not meet all of the requirements. This can be done
457 * by instead passing the G_TF_INSIST flag, which will override these
458 * checks.
459 *
460 * The final flags value is G_TF_TRANSPARENT, which instructs the method
461 * to put a geom on top of the provider and configure it to be as transparent
462 * as possible. This is not really relevant to the BSD method and therefore
463 * not implemented here.
464 */
465
466 static struct g_geom *
467 g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
468 {
469 struct g_geom *gp;
470 struct g_consumer *cp;
471 int error, i;
472 struct g_bsd_softc *ms;
473 u_int secsize;
474 struct g_slicer *gsp;
475 u_char hash[16];
476 MD5_CTX md5sum;
477
478 g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
479 g_topology_assert();
480
481 /* We don't implement transparent inserts. */
482 if (flags == G_TF_TRANSPARENT)
483 return (NULL);
484
485 /*
486 * BSD labels are a subclass of the general "slicing" topology so
487 * a lot of the work can be done by the common "slice" code.
488 * Create a geom with space for MAXPARTITIONS providers, one consumer
489 * and a softc structure for us. Specify the provider to attach
490 * the consumer to and our "start" routine for special requests.
491 * The provider is opened with mode (1,0,0) so we can do reads
492 * from it.
493 */
494 gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms,
495 sizeof(*ms), g_bsd_start);
496 if (gp == NULL)
497 return (NULL);
498
499 /* Get the geom_slicer softc from the geom. */
500 gsp = gp->softc;
501
502 /*
503 * The do...while loop here allows us to have multiple escapes
504 * using a simple "break". This improves code clarity without
505 * ending up in deep nesting and without using goto or come from.
506 */
507 do {
508 /*
509 * If the provider is an MBR we will only auto attach
510 * to type 165 slices in the G_TF_NORMAL case. We will
511 * attach to any other type.
512 */
513 error = g_getattr("MBR::type", cp, &i);
514 if (!error) {
515 if (i != 165 && flags == G_TF_NORMAL)
516 break;
517 error = g_getattr("MBR::offset", cp, &ms->mbroffset);
518 if (error)
519 break;
520 }
521
522 /* Same thing if we are inside a PC98 */
523 error = g_getattr("PC98::type", cp, &i);
524 if (!error) {
525 if (i != 0xc494 && flags == G_TF_NORMAL)
526 break;
527 error = g_getattr("PC98::offset", cp, &ms->mbroffset);
528 if (error)
529 break;
530 }
531
532 /* Get sector size, we need it to read data. */
533 secsize = cp->provider->sectorsize;
534 if (secsize < 512)
535 break;
536
537 /* First look for a label at the start of the second sector. */
538 error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize);
539
540 /* Next, look for alpha labels */
541 if (error)
542 error = g_bsd_try(gp, gsp, cp, secsize, ms,
543 ALPHA_LABEL_OFFSET);
544
545 /* If we didn't find a label, punt. */
546 if (error)
547 break;
548
549 /*
550 * In order to avoid recursively attaching to the same
551 * on-disk label (it's usually visible through the 'c'
552 * partition) we calculate an MD5 and ask if other BSD's
553 * below us love that label. If they do, we don't.
554 */
555 MD5Init(&md5sum);
556 MD5Update(&md5sum, ms->label, sizeof(ms->label));
557 MD5Final(ms->labelsum, &md5sum);
558
559 error = g_getattr("BSD::labelsum", cp, &hash);
560 if (!error && !bcmp(ms->labelsum, hash, sizeof(hash)))
561 break;
562
563 /*
564 * Process the found disklabel, and modify our "slice"
565 * instance to match it, if possible.
566 */
567 error = g_bsd_modify(gp, ms->label);
568 } while (0);
569
570 /* Success or failure, we can close our provider now. */
571 g_access(cp, -1, 0, 0);
572
573 /* If we have configured any providers, return the new geom. */
574 if (gsp->nprovider > 0) {
575 g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE,
576 G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
577 gsp->hot = g_bsd_hotwrite;
578 return (gp);
579 }
580 /*
581 * ...else push the "self-destruct" button, by spoiling our own
582 * consumer. This triggers a call to g_slice_spoiled which will
583 * dismantle what was setup.
584 */
585 g_slice_spoiled(cp);
586 return (NULL);
587 }
588
589 struct h0h0 {
590 struct g_geom *gp;
591 struct g_bsd_softc *ms;
592 u_char *label;
593 int error;
594 };
595
596 static void
597 g_bsd_callconfig(void *arg, int flag)
598 {
599 struct h0h0 *hp;
600
601 hp = arg;
602 hp->error = g_bsd_modify(hp->gp, hp->label);
603 if (!hp->error)
604 hp->error = g_bsd_writelabel(hp->gp, NULL);
605 }
606
607 /*
608 * NB! curthread is user process which GCTL'ed.
609 */
610 static void
611 g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
612 {
613 u_char *label;
614 int error;
615 struct h0h0 h0h0;
616 struct g_geom *gp;
617 struct g_slicer *gsp;
618 struct g_consumer *cp;
619 struct g_bsd_softc *ms;
620
621 g_topology_assert();
622 gp = gctl_get_geom(req, mp, "geom");
623 if (gp == NULL)
624 return;
625 cp = LIST_FIRST(&gp->consumer);
626 gsp = gp->softc;
627 ms = gsp->softc;
628 if (!strcmp(verb, "read mbroffset")) {
629 gctl_set_param(req, "mbroffset",
630 &ms->mbroffset, sizeof(ms->mbroffset));
631 return;
632 } else if (!strcmp(verb, "write label")) {
633 label = gctl_get_paraml(req, "label", LABELSIZE);
634 if (label == NULL)
635 return;
636 h0h0.gp = gp;
637 h0h0.ms = gsp->softc;
638 h0h0.label = label;
639 h0h0.error = -1;
640 /* XXX: Does this reference register with our selfdestruct code ? */
641 error = g_access(cp, 1, 1, 1);
642 if (error) {
643 gctl_error(req, "could not access consumer");
644 return;
645 }
646 g_bsd_callconfig(&h0h0, 0);
647 error = h0h0.error;
648 g_access(cp, -1, -1, -1);
649 } else if (!strcmp(verb, "write bootcode")) {
650 label = gctl_get_paraml(req, "bootcode", BBSIZE);
651 if (label == NULL)
652 return;
653 /* XXX: Does this reference register with our selfdestruct code ? */
654 error = g_access(cp, 1, 1, 1);
655 if (error) {
656 gctl_error(req, "could not access consumer");
657 return;
658 }
659 error = g_bsd_writelabel(gp, label);
660 g_access(cp, -1, -1, -1);
661 } else {
662 gctl_error(req, "Unknown verb parameter");
663 }
664
665 return;
666 }
667
668 /* Finally, register with GEOM infrastructure. */
669 static struct g_class g_bsd_class = {
670 .name = BSD_CLASS_NAME,
671 .version = G_VERSION,
672 .taste = g_bsd_taste,
673 .ctlreq = g_bsd_config,
674 .dumpconf = g_bsd_dumpconf,
675 .ioctl = g_bsd_ioctl,
676 };
677
678 DECLARE_GEOM_CLASS(g_bsd_class, g_bsd);
Cache object: 3edeba414e6761b5d527a28ac4822646
|