1 /*-
2 * Copyright (c) 1997, 1998
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * This software is distributed under the so-called ``Berkeley
6 * License'':
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by Nan Yang Computer
19 * Services Limited.
20 * 4. Neither the name of the Company nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * This software is provided ``as is'', and any express or implied
25 * warranties, including, but not limited to, the implied warranties of
26 * merchantability and fitness for a particular purpose are disclaimed.
27 * In no event shall the company or contributors be liable for any
28 * direct, indirect, incidental, special, exemplary, or consequential
29 * damages (including, but not limited to, procurement of substitute
30 * goods or services; loss of use, data, or profits; or business
31 * interruption) however caused and on any theory of liability, whether
32 * in contract, strict liability, or tort (including negligence or
33 * otherwise) arising in any way out of the use of this software, even if
34 * advised of the possibility of such damage.
35 *
36 * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $
37 * $FreeBSD$
38 */
39
40 #include <dev/vinum/vinumhdr.h>
41 #include <dev/vinum/request.h>
42 #include <vm/vm_zone.h>
43
44 static char *sappend(char *txt, char *s);
45 static int drivecmp(const void *va, const void *vb);
46
47 /*
48 * Open the device associated with the drive, and set drive's vp.
49 * Return an error number
50 */
51 int
52 open_drive(struct drive *drive, struct proc *p, int verbose)
53 {
54 int devmajor; /* major devs for disk device */
55 int devminor; /* minor devs for disk device */
56 int unit;
57 char *dname;
58
59 if (bcmp(drive->devicename, "/dev/", 5)) /* device name doesn't start with /dev */
60 return ENOENT; /* give up */
61 if (drive->flags & VF_OPEN) /* open already, */
62 return EBUSY; /* don't do it again */
63
64 /*
65 * Yes, Bruce, I know this is horrible, but we
66 * don't have a root file system when we first
67 * try to do this. If you can come up with a
68 * better solution, I'd really like it. I'm
69 * just putting it in now to add ammuntion to
70 * moving the system to devfs.
71 */
72 dname = &drive->devicename[5];
73 drive->dev = NULL; /* no device yet */
74
75 /* Find the device */
76 if (bcmp(dname, "wd", 2) == 0) /* IDE disk */
77 devmajor = 0;
78 else if (bcmp(dname, "da", 2) == 0)
79 devmajor = 4;
80 else if (bcmp(dname, "vn", 2) == 0)
81 devmajor = 15;
82 else
83 return ENODEV;
84 dname += 2; /* point past */
85
86 /*
87 * Found the device. We can expect one of
88 * two formats for the rest: a unit number,
89 * then either a partition letter for the
90 * compatiblity partition (e.g. h) or a
91 * slice ID and partition (e.g. s2e).
92 * Create a minor number for each of them.
93 */
94 unit = 0;
95 while ((*dname >= '') /* unit number */
96 &&(*dname <= '9')) {
97 unit = unit * 10 + *dname - '';
98 dname++;
99 }
100
101 if (*dname == 's') { /* slice */
102 if (((dname[1] < '1') || (dname[1] > '4')) /* invalid slice */
103 ||((dname[2] < 'a') || (dname[2] > 'h'))) /* or invalid partition */
104 return ENODEV;
105 devminor = (unit << 3) /* unit */
106 +(dname[2] - 'a') /* partition */
107 +((dname[1] - '' + 1) << 16); /* slice */
108 } else { /* compatibility partition */
109 if ((*dname < 'a') || (*dname > 'h')) /* or invalid partition */
110 return ENODEV;
111 devminor = (*dname - 'a') /* partition */
112 +(unit << 3); /* unit */
113 }
114
115 drive->dev = makedev(devmajor, devminor); /* find the device */
116 if (drive->dev == NULL) /* didn't find anything */
117 return ENODEV;
118
119 drive->lasterror = (*bdevsw[major(drive->dev)]->d_open) (drive->dev, FWRITE, 0, NULL);
120
121 if (drive->lasterror != 0) { /* failed */
122 drive->state = drive_down; /* just force it down */
123 if (verbose)
124 log(LOG_WARNING,
125 "vinum open_drive %s: failed with error %d\n",
126 drive->devicename, drive->lasterror);
127 } else
128 drive->flags |= VF_OPEN; /* we're open now */
129
130 return drive->lasterror;
131 }
132
133 /*
134 * Set some variables in the drive struct
135 * in more convenient form. Return error indication
136 */
137 int
138 set_drive_parms(struct drive *drive)
139 {
140 drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */
141 drive->secsperblock = drive->blocksize /* number of sectors per block */
142 / drive->partinfo.disklab->d_secsize;
143
144 /* Now update the label part */
145 bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
146 getmicrotime(&drive->label.date_of_birth); /* and current time */
147 drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
148 *((u_int64_t) drive->partinfo.disklab->d_secsize);
149 #if VINUMDEBUG
150 if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */
151 drive->label.drive_size *= 100;
152 #endif
153
154 /* number of sectors available for subdisks */
155 drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
156
157 /*
158 * Bug in 3.0 as of January 1998: you can open
159 * non-existent slices. They have a length of 0.
160 */
161 if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
162 set_drive_state(drive->driveno, drive_down, setstate_force);
163 drive->lasterror = ENOSPC;
164 return ENOSPC;
165 }
166 drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
167 drive->freelist = (struct drive_freelist *)
168 Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
169 if (drive->freelist == NULL) /* can't malloc, dammit */
170 return ENOSPC;
171 drive->freelist_entries = 1; /* just (almost) the complete drive */
172 drive->freelist[0].offset = DATASTART; /* starts here */
173 drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
174 if (drive->label.name[0] != '\0') /* got a name */
175 set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
176 else /* we know about it, but that's all */
177 drive->state = drive_referenced;
178 return 0;
179 }
180
181 /*
182 * Initialize a drive: open the device and add device
183 * information
184 */
185 int
186 init_drive(struct drive *drive, int verbose)
187 {
188 int error;
189
190 if (drive->devicename[0] != '/') {
191 drive->lasterror = EINVAL;
192 log(LOG_ERR, "vinum: Can't open drive without drive name\n");
193 return EINVAL;
194 }
195 error = open_drive(drive, curproc, verbose); /* open the drive */
196 if (error)
197 return error;
198
199 error = (*bdevsw[major(drive->dev)]->d_ioctl) (drive->dev,
200 DIOCGPART,
201 (caddr_t) & drive->partinfo,
202 FREAD,
203 curproc);
204 if (error) {
205 if (verbose)
206 log(LOG_WARNING,
207 "vinum open_drive %s: Can't get partition information, error %d\n",
208 drive->devicename,
209 error);
210 close_drive(drive);
211 drive->lasterror = error;
212 return error;
213 }
214 if (drive->partinfo.part->p_fstype != FS_VINUM) { /* not Vinum */
215 drive->lasterror = EFTYPE;
216 if (verbose)
217 log(LOG_WARNING,
218 "vinum open_drive %s: Wrong partition type for vinum\n",
219 drive->devicename);
220 close_drive(drive);
221 return EFTYPE;
222 }
223 return set_drive_parms(drive); /* set various odds and ends */
224 }
225
226 /* Close a drive if it's open. */
227 void
228 close_drive(struct drive *drive)
229 {
230 LOCKDRIVE(drive); /* keep the daemon out */
231 if (drive->flags & VF_OPEN)
232 close_locked_drive(drive); /* and close it */
233 if (drive->state > drive_down) /* if it's up */
234 drive->state = drive_down; /* make sure it's down */
235 unlockdrive(drive);
236 }
237
238 /*
239 * Real drive close code, called with drive already locked.
240 * We have also checked that the drive is open. No errors.
241 */
242 void
243 close_locked_drive(struct drive *drive)
244 {
245 /*
246 * If we can't access the drive, we can't flush
247 * the queues, which spec_close() will try to
248 * do. Get rid of them here first.
249 */
250 drive->lasterror = (*bdevsw[major(drive->dev)]->d_close) (drive->dev, 0, 0, NULL);
251 drive->flags &= ~VF_OPEN; /* no longer open */
252 }
253
254 /*
255 * Remove drive from the configuration.
256 * Caller must ensure that it isn't active.
257 */
258 void
259 remove_drive(int driveno)
260 {
261 struct drive *drive = &vinum_conf.drive[driveno];
262 struct vinum_hdr *vhdr; /* buffer for header */
263 int error;
264
265 if (drive->state > drive_referenced) { /* real drive */
266 if (drive->state == drive_up) {
267 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */
268 CHECKALLOC(vhdr, "Can't allocate memory");
269 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
270 if (error)
271 drive->lasterror = error;
272 else {
273 vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */
274 write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
275 }
276 Free(vhdr);
277 }
278 free_drive(drive); /* close it and free resources */
279 save_config(); /* and save the updated configuration */
280 }
281 }
282
283 /*
284 * Transfer drive data. Usually called from one of these defines;
285 * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
286 * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
287 *
288 * length and offset are in bytes, but must be multiples of sector
289 * size. The function *does not check* for this condition, and
290 * truncates ruthlessly.
291 * Return error number
292 */
293 int
294 driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
295 {
296 int error;
297 struct buf *bp;
298
299 error = 0; /* to keep the compiler happy */
300 while (length) { /* divide into small enough blocks */
301 int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */
302
303 bp = geteblk(len); /* get a buffer header */
304 bp->b_flags = flag | B_BUSY;
305 bp->b_dev = drive->dev; /* device */
306 bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
307 bp->b_saveaddr = bp->b_data;
308 bp->b_data = buf;
309 bp->b_bcount = len;
310 (*bdevsw[major(bp->b_dev)]->d_strategy) (bp);
311 error = biowait(bp);
312 bp->b_data = bp->b_saveaddr;
313 bp->b_flags |= B_INVAL | B_AGE;
314 bp->b_flags &= ~B_ERROR;
315 brelse(bp);
316 if (error)
317 break;
318 length -= len; /* update pointers */
319 buf += len;
320 offset += len;
321 }
322 return error;
323 }
324
325 /*
326 * Check a drive for a vinum header. If found,
327 * update the drive information. We come here
328 * with a partially populated drive structure
329 * which includes the device name.
330 *
331 * Return information on what we found.
332 *
333 * This function is called from two places: check_drive,
334 * which wants to find out whether the drive is a
335 * Vinum drive, and config_drive, which asserts that
336 * it is a vinum drive. In the first case, we don't
337 * print error messages (verbose==0), in the second
338 * we do (verbose==1).
339 */
340 enum drive_label_info
341 read_drive_label(struct drive *drive, int verbose)
342 {
343 int error;
344 int result; /* result of our search */
345 struct vinum_hdr *vhdr; /* and as header */
346
347 error = init_drive(drive, 0); /* find the drive */
348 if (error) /* find the drive */
349 return DL_CANT_OPEN; /* not ours */
350
351 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
352 CHECKALLOC(vhdr, "Can't allocate memory");
353
354 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
355 if (vhdr->magic == VINUM_MAGIC) { /* ours! */
356 if (drive->label.name[0] /* we have a name for this drive */
357 &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
358 drive->lasterror = EINVAL;
359 result = DL_WRONG_DRIVE; /* it's the wrong drive */
360 } else {
361 drive->state = drive_up; /* it's OK by us */
362 result = DL_OURS;
363 }
364 /*
365 * We copy the drive anyway so that we have
366 * the correct name in the drive info. This
367 * may not be the name specified
368 */
369 drive->label = vhdr->label; /* put in the label information */
370 } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
371 result = DL_DELETED_LABEL; /* and return the info */
372 else
373 result = DL_NOT_OURS; /* we could have it, but we don't yet */
374 Free(vhdr); /* that's all. */
375 return result;
376 }
377
378 /*
379 * Check a drive for a vinum header. If found,
380 * read configuration information from the drive and
381 * incorporate the data into the configuration.
382 *
383 * Return drive number.
384 */
385 struct drive *
386 check_drive(char *devicename)
387 {
388 int driveno;
389 int i;
390 struct drive *drive;
391
392 driveno = find_drive_by_dev(devicename, 1); /* if entry doesn't exist, create it */
393 drive = &vinum_conf.drive[driveno]; /* and get a pointer */
394
395 if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */
396 for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
397 if ((i != driveno) /* not this drive */
398 &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */
399 &&(strcmp(DRIVE[i].label.name,
400 DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
401 struct drive *mydrive = &DRIVE[i];
402
403 if (mydrive->devicename[0] == '/') { /* we know a device name for it */
404 /*
405 * set an error, but don't take the
406 * drive down: that would cause unneeded
407 * error messages.
408 */
409 drive->lasterror = EEXIST;
410 break;
411 } else { /* it's just a place holder, */
412 int sdno;
413
414 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
415 if ((SD[sdno].driveno == i) /* it's pointing to this one, */
416 &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */
417 SD[sdno].driveno = drive->driveno; /* point to the one we found */
418 update_sd_state(sdno); /* and update its state */
419 }
420 }
421 bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */
422 }
423 }
424 }
425 } else {
426 if (drive->lasterror == 0)
427 drive->lasterror = ENODEV;
428 close_drive(drive);
429 drive->state = drive_down;
430 }
431 return drive;
432 }
433
434 static char *
435 sappend(char *txt, char *s)
436 {
437 while ((*s++ = *txt++) != 0);
438 return s - 1;
439 }
440
441 /* Kludge: kernel printf doesn't handle quads correctly XXX */
442 static char *lltoa(long long l, char *s);
443
444 static char *
445 lltoa(long long l, char *s)
446 {
447 if (l < 0) {
448 *s++ = '-';
449 l = -l;
450 }
451 if (l > 9) {
452 s = lltoa(l / 10, s);
453 l %= 10;
454 }
455 *s++ = l + '';
456 return s;
457 }
458 /*
459 * Format the configuration in text form into the buffer
460 * at config. Don't go beyond len bytes
461 * XXX this stinks. Fix soon.
462 */
463 void
464 format_config(char *config, int len)
465 {
466 int i;
467 int j;
468 char *s = config;
469
470 bzero(config, len);
471
472 /* First, the volume configuration */
473 for (i = 0; i < vinum_conf.volumes_allocated; i++) {
474 struct volume *vol;
475
476 vol = &vinum_conf.volume[i];
477 if ((vol->state > volume_uninit)
478 && (vol->name[0] != '\0')) { /* paranoia */
479 if (vol->preferred_plex >= 0) /* preferences, */
480 sprintf(s,
481 "volume %s state %s readpol prefer %s",
482 vol->name,
483 volume_state(vol->state),
484 vinum_conf.plex[vol->preferred_plex].name);
485 else /* default round-robin */
486 sprintf(s,
487 "volume %s state %s",
488 vol->name,
489 volume_state(vol->state));
490 while (*s)
491 s++; /* find the end */
492 s = sappend("\n", s);
493 if (s > &config[len - 80]) {
494 log(LOG_ERR, "vinum: configuration data overflow\n");
495 return;
496 }
497 }
498 }
499
500 /* Then the plex configuration */
501 for (i = 0; i < vinum_conf.plexes_allocated; i++) {
502 struct plex *plex;
503
504 plex = &vinum_conf.plex[i];
505 if ((plex->state != plex_referenced)
506 && (plex->name[0] != '\0')) { /* paranoia */
507 sprintf(s, "plex name %s state %s org %s ",
508 plex->name,
509 plex_state(plex->state),
510 plex_org(plex->organization));
511 while (*s)
512 s++; /* find the end */
513 if ((plex->organization == plex_striped)
514 || (plex->organization == plex_raid5)) {
515 sprintf(s, "%db ", (int) plex->stripesize);
516 while (*s)
517 s++; /* find the end */
518 }
519 if (plex->volno >= 0) /* we have a volume */
520 sprintf(s, "vol %s ", vinum_conf.volume[plex->volno].name);
521 while (*s)
522 s++; /* find the end */
523 for (j = 0; j < plex->subdisks; j++) {
524 sprintf(s, " sd %s", vinum_conf.sd[plex->sdnos[j]].name);
525 }
526 s = sappend("\n", s);
527 if (s > &config[len - 80]) {
528 log(LOG_ERR, "vinum: configuration data overflow\n");
529 return;
530 }
531 }
532 }
533
534 /* And finally the subdisk configuration */
535 for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
536 struct sd *sd;
537
538 sd = &SD[i];
539 if ((sd->state != sd_referenced)
540 && (sd->name[0] != '\0')) { /* paranoia */
541 sprintf(s,
542 "sd name %s drive %s plex %s state %s len ",
543 sd->name,
544 vinum_conf.drive[sd->driveno].label.name,
545 vinum_conf.plex[sd->plexno].name,
546 sd_state(sd->state));
547 while (*s)
548 s++; /* find the end */
549 s = lltoa(sd->sectors, s);
550 s = sappend("b driveoffset ", s);
551 s = lltoa(sd->driveoffset, s);
552 s = sappend("b plexoffset ", s);
553 s = lltoa(sd->plexoffset, s);
554 s = sappend("b\n", s);
555 if (s > &config[len - 80]) {
556 log(LOG_ERR, "vinum: configuration data overflow\n");
557 return;
558 }
559 }
560 }
561 }
562
563 /*
564 * issue a save config request to the dæmon. The actual work
565 * is done in process context by daemon_save_config
566 */
567 void
568 save_config(void)
569 {
570 queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) NULL);
571 }
572
573 /*
574 * Write the configuration to all vinum slices. This
575 * is performed by the dæmon only
576 */
577 void
578 daemon_save_config(void)
579 {
580 int error;
581 int written_config; /* set when we first write the config to disk */
582 int driveno;
583 struct drive *drive; /* point to current drive info */
584 struct vinum_hdr *vhdr; /* and as header */
585 char *config; /* point to config data */
586 int wlabel_on; /* to set writing label on/off */
587
588 /* don't save the configuration while we're still working on it */
589 if (vinum_conf.flags & VF_CONFIGURING)
590 return;
591 written_config = 0; /* no config written yet */
592 /* Build a volume header */
593 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
594 CHECKALLOC(vhdr, "Can't allocate config data");
595 vhdr->magic = VINUM_MAGIC; /* magic number */
596 vhdr->config_length = MAXCONFIG; /* length of following config info */
597
598 config = Malloc(MAXCONFIG); /* get space for the config data */
599 CHECKALLOC(config, "Can't allocate config data");
600
601 format_config(config, MAXCONFIG);
602 error = 0; /* no errors yet */
603 for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
604 drive = &vinum_conf.drive[driveno]; /* point to drive */
605 if (drive->state > drive_referenced) {
606 LOCKDRIVE(drive); /* don't let it change */
607
608 /*
609 * First, do some drive consistency checks. Some
610 * of these are kludges, others require a process
611 * context and couldn't be done before
612 */
613 if ((drive->devicename[0] == '\0')
614 || (drive->label.name[0] == '\0')) {
615 unlockdrive(drive);
616 free_drive(drive); /* get rid of it */
617 break;
618 }
619 if (((drive->flags & VF_OPEN) == 0) /* drive not open */
620 &&(drive->state > drive_down)) { /* and it thinks it's not down */
621 unlockdrive(drive);
622 set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
623 continue;
624 }
625 if ((drive->state == drive_down) /* it's down */
626 &&(drive->flags & VF_OPEN)) { /* but open, */
627 unlockdrive(drive);
628 close_drive(drive); /* close it */
629 } else if (drive->state > drive_down) {
630 getmicrotime(&drive->label.last_update); /* time of last update is now */
631 bcopy((char *) &drive->label, /* and the label info from the drive structure */
632 (char *) &vhdr->label,
633 sizeof(vhdr->label));
634 if ((drive->state != drive_unallocated)
635 && (drive->state != drive_referenced)) { /* and it's a real drive */
636 wlabel_on = 1; /* enable writing the label */
637 error = (*bdevsw[major(drive->dev)]->d_ioctl) (drive->dev, /* make the label writeable */
638 DIOCWLABEL,
639 (caddr_t) & wlabel_on,
640 FWRITE,
641 curproc);
642 if (error == 0)
643 error = write_drive(drive, (char *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
644 if (error == 0)
645 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET); /* first config copy */
646 if (error == 0)
647 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET + MAXCONFIG); /* second copy */
648 wlabel_on = 0; /* enable writing the label */
649 if (error == 0)
650 /* make the label non-writeable again */
651 error = (*bdevsw[major(drive->dev)]->d_ioctl) (drive->dev,
652 DIOCWLABEL,
653 (caddr_t) & wlabel_on,
654 FWRITE,
655 curproc);
656 unlockdrive(drive);
657 if (error) {
658 log(LOG_ERR,
659 "vinum: Can't write config to %s, error %d\n",
660 drive->devicename,
661 error);
662 set_drive_state(drive->driveno, drive_down, setstate_force);
663 } else
664 written_config = 1; /* we've written it on at least one drive */
665 }
666 } else /* not worth looking at, */
667 unlockdrive(drive); /* just unlock it again */
668 }
669 }
670 Free(vhdr);
671 Free(config);
672 }
673
674 /*
675 * Disk labels are a mess. The correct way to
676 * access them is with the DIOC[GSW]DINFO ioctls,
677 * but some programs, such as newfs, access the
678 * disk directly, so we have to write things
679 * there. We do this only on request. If a user
680 * request tries to read it directly, we fake up
681 * one on the fly.
682 */
683
684 /*
685 * get_volume_label returns a label structure to lp, which
686 * is allocated by the caller
687 */
688 void
689 get_volume_label(char *name, int plexes, u_int64_t size, struct disklabel *lp)
690 {
691 bzero(lp, sizeof(struct disklabel));
692
693 strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
694 lp->d_type = DTYPE_VINUM;
695 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
696 lp->d_rpm = 14400 * plexes; /* to keep them guessing */
697 lp->d_interleave = 1;
698 lp->d_flags = 0;
699
700 /*
701 * A Vinum volume has a single track with all
702 * its sectors.
703 */
704 lp->d_secsize = DEV_BSIZE; /* bytes per sector */
705 lp->d_nsectors = size; /* data sectors per track */
706 lp->d_ntracks = 1; /* tracks per cylinder */
707 lp->d_ncylinders = 1; /* data cylinders per unit */
708 lp->d_secpercyl = size; /* data sectors per cylinder */
709 lp->d_secperunit = size; /* data sectors per unit */
710
711 lp->d_bbsize = BBSIZE;
712 lp->d_sbsize = SBSIZE;
713
714 lp->d_magic = DISKMAGIC;
715 lp->d_magic2 = DISKMAGIC;
716
717 /*
718 * Set up partitions a, b and c to be identical
719 * and the size of the volume. a is UFS, b is
720 * swap, c is nothing.
721 */
722 lp->d_partitions[0].p_size = size;
723 lp->d_partitions[0].p_fsize = 1024;
724 lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
725 lp->d_partitions[0].p_fsize = 1024; /* FS fragment size */
726 lp->d_partitions[0].p_frag = 8; /* and fragments per block */
727 lp->d_partitions[SWAP_PART].p_size = size;
728 lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
729 lp->d_partitions[LABEL_PART].p_size = size;
730 lp->d_npartitions = LABEL_PART + 1;
731 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
732 lp->d_checksum = dkcksum(lp);
733 }
734
735 /* Write a volume label. This implements the VINUM_LABEL ioctl. */
736 int
737 write_volume_label(int volno)
738 {
739 struct disklabel *lp;
740 struct buf *bp;
741 struct disklabel *dlp;
742 struct volume *vol;
743 int error;
744
745 lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
746 if (lp == 0)
747 return ENOMEM;
748
749 if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_allocated) /* invalid volume */
750 return ENOENT;
751
752 vol = &VOL[volno]; /* volume in question */
753 if (vol->state <= volume_uninit) /* nothing there */
754 return ENXIO;
755 else if (vol->state < volume_up) /* not accessible */
756 return EIO; /* I/O error */
757
758 get_volume_label(vol->name, vol->plexes, vol->size, lp); /* get the label */
759
760 /*
761 * Now write to disk. This code is derived from the
762 * system writedisklabel (), which does silly things
763 * like reading the label and refusing to write
764 * unless it's already there.
765 */
766 bp = geteblk((int) lp->d_secsize); /* get a buffer */
767 bp->b_dev = makedev(VINUM_CDEV_MAJOR, vol->volno); /* our own raw volume */
768 bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
769 bp->b_bcount = lp->d_secsize;
770 bzero(bp->b_data, lp->d_secsize);
771 dlp = (struct disklabel *) bp->b_data;
772 *dlp = *lp;
773 bp->b_flags &= ~B_INVAL;
774 bp->b_flags |= B_BUSY | B_WRITE;
775 vinumstrategy(bp);
776 error = biowait(bp);
777 bp->b_flags |= B_INVAL | B_AGE;
778 bp->b_flags &= ~B_ERROR;
779
780 brelse(bp);
781 return error;
782 }
783
784 /* Look at all disks on the system for vinum slices */
785 int
786 vinum_scandisk(char *devicename[], int drives)
787 {
788 struct drive *volatile drive;
789 volatile int driveno;
790 int firstdrive; /* first drive in this list */
791 volatile int gooddrives; /* number of usable drives found */
792 int firsttime; /* set if we have never configured before */
793 int error;
794 char *config_text; /* read the config info from disk into here */
795 char *volatile cptr; /* pointer into config information */
796 char *eptr; /* end pointer into config information */
797 char *config_line; /* copy the config line to */
798 volatile int status;
799 int *volatile drivelist; /* list of drive indices */
800 #define DRIVENAMELEN 64
801 #define DRIVEPARTS 35 /* max partitions per drive, excluding c */
802 char partname[DRIVENAMELEN]; /* for creating partition names */
803
804 status = 0; /* success indication */
805 vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */
806
807 gooddrives = 0; /* number of usable drives found */
808 firstdrive = vinum_conf.drives_used; /* the first drive */
809 firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */
810
811 /* allocate a drive pointer list */
812 drivelist = (int *) Malloc(drives * DRIVEPARTS * sizeof(int));
813 CHECKALLOC(drivelist, "Can't allocate memory");
814
815 /* Open all drives and find which was modified most recently */
816 for (driveno = 0; driveno < drives; driveno++) {
817 char part; /* UNIX partition */
818 int slice;
819 int founddrive; /* flag when we find a vinum drive */
820
821 founddrive = 0; /* no vinum drive found yet on this spindle */
822 /* first try the partition table */
823 for (slice = 1; slice < 5; slice++)
824 for (part = 'a'; part < 'i'; part++) {
825 if (part != 'c') { /* don't do the c partition */
826 snprintf(partname,
827 DRIVENAMELEN,
828 "%ss%d%c",
829 devicename[driveno],
830 slice,
831 part);
832 drive = check_drive(partname); /* try to open it */
833 if (drive->lasterror != 0) /* didn't work, */
834 free_drive(drive); /* get rid of it */
835 else if (drive->flags & VF_CONFIGURED) /* already read this config, */
836 log(LOG_WARNING,
837 "vinum: already read config from %s\n", /* say so */
838 drive->label.name);
839 else {
840 drivelist[gooddrives] = drive->driveno; /* keep the drive index */
841 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
842 gooddrives++;
843 founddrive++;
844 }
845 }
846 }
847 if (founddrive == 0) { /* didn't find anything, */
848 for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */
849 if (part != 'c') { /* don't do the c partition */
850 snprintf(partname, /* /dev/sd0a */
851 DRIVENAMELEN,
852 "%s%c",
853 devicename[driveno],
854 part);
855 drive = check_drive(partname); /* try to open it */
856 if ((drive->lasterror != 0) /* didn't work, */
857 ||(drive->state != drive_up))
858 free_drive(drive); /* get rid of it */
859 else if (drive->flags & VF_CONFIGURED) /* already read this config, */
860 log(LOG_WARNING,
861 "vinum: already read config from %s\n", /* say so */
862 drive->label.name);
863 else {
864 drivelist[gooddrives] = drive->driveno; /* keep the drive index */
865 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
866 gooddrives++;
867 }
868 }
869 }
870 }
871
872 if (gooddrives == 0) {
873 log(LOG_WARNING, "vinum: no drives found\n");
874 return ENOENT;
875 }
876 /*
877 * We now have at least one drive
878 * open. Sort them in order of config time
879 * and merge the config info with what we
880 * have already.
881 */
882 qsort(drivelist, gooddrives, sizeof(int), drivecmp);
883 config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
884 CHECKALLOC(config_text, "Can't allocate memory");
885 config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
886 CHECKALLOC(config_line, "Can't allocate memory");
887 for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */
888 drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
889
890 if (firsttime && (driveno == 0)) /* we've never configured before, */
891 log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
892 else
893 log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
894
895 if (drive->state == drive_up)
896 /* Read in both copies of the configuration information */
897 error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
898 else {
899 error = EIO;
900 printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state));
901 }
902
903 if (error != 0) {
904 log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
905 free_drive(drive); /* give it back */
906 status = error;
907 }
908 /*
909 * At this point, check that the two copies
910 * are the same, and do something useful if
911 * not. In particular, consider which is
912 * newer, and what this means for the
913 * integrity of the data on the drive.
914 */
915 else {
916 vinum_conf.drives_used++; /* another drive in use */
917 /* Parse the configuration, and add it to the global configuration */
918 for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
919 volatile int parse_status; /* return value from parse_config */
920
921 for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
922 *eptr++ = *cptr++;
923 *eptr = '\0'; /* and delimit */
924 if (setjmp(command_fail) == 0) { /* come back here on error and continue */
925 parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
926 if (parse_status < 0) { /* error in config */
927 /*
928 * This config should have been parsed in user
929 * space. If we run into problems here, something
930 * serious is afoot. Complain and let the user
931 * snarf the config to see what's wrong.
932 */
933 log(LOG_ERR,
934 "vinum: Config error on %s, aborting integration\n",
935 drive->devicename);
936 free_drive(drive); /* give it back */
937 status = EINVAL;
938 }
939 }
940 while (*cptr == '\n')
941 cptr++; /* skip to next line */
942 }
943 }
944 drive->flags |= VF_CONFIGURED; /* read this drive's configuration */
945 }
946
947 Free(config_text);
948 Free(drivelist);
949 vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */
950 if (status != 0)
951 printf("vinum: couldn't read configuration");
952 else
953 updateconfig(VF_READING_CONFIG); /* update from disk config */
954 return status;
955 }
956
957 /*
958 * Compare the modification dates of the drives, for qsort.
959 * Return 1 if a < b, 0 if a == b, 01 if a > b: in other
960 * words, sort backwards.
961 */
962 int
963 drivecmp(const void *va, const void *vb)
964 {
965 const struct drive *a = &DRIVE[*(const int *) va];
966 const struct drive *b = &DRIVE[*(const int *) vb];
967
968 if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
969 && (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
970 return 0;
971 else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
972 || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
973 && (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
974 return -1;
975 else
976 return 1;
977 }
978 /* Local Variables: */
979 /* fill-column: 50 */
980 /* End: */
Cache object: e9354d8f21a1308cc44a643e0fff81c4
|