1 /*-
2 * Copyright (c) 1997, 1998
3 * Nan Yang Computer Services Limited. All rights reserved.
4 *
5 * This software is distributed under the so-called ``Berkeley
6 * License'':
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by Nan Yang Computer
19 * Services Limited.
20 * 4. Neither the name of the Company nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * This software is provided ``as is'', and any express or implied
25 * warranties, including, but not limited to, the implied warranties of
26 * merchantability and fitness for a particular purpose are disclaimed.
27 * In no event shall the company or contributors be liable for any
28 * direct, indirect, incidental, special, exemplary, or consequential
29 * damages (including, but not limited to, procurement of substitute
30 * goods or services; loss of use, data, or profits; or business
31 * interruption) however caused and on any theory of liability, whether
32 * in contract, strict liability, or tort (including negligence or
33 * otherwise) arising in any way out of the use of this software, even if
34 * advised of the possibility of such damage.
35 *
36 * $Id: vinumio.c,v 1.30 2000/05/10 23:23:30 grog Exp grog $
37 * $FreeBSD$
38 */
39
40 #include <dev/vinum/vinumhdr.h>
41 #include <dev/vinum/request.h>
42 #include <vm/vm_zone.h>
43
44 static char *sappend(char *txt, char *s);
45 static int drivecmp(const void *va, const void *vb);
46
47 /*
48 * Open the device associated with the drive, and set drive's vp.
49 * Return an error number
50 */
51 int
52 open_drive(struct drive *drive, struct proc *p, int verbose)
53 {
54 int devmajor; /* major devs for disk device */
55 int devminor; /* minor devs for disk device */
56 int unit;
57 char *dname;
58 struct cdevsw *dsw; /* pointer to cdevsw entry */
59
60 if (bcmp(drive->devicename, "/dev/", 5)) /* device name doesn't start with /dev */
61 return ENOENT; /* give up */
62 if (drive->flags & VF_OPEN) /* open already, */
63 return EBUSY; /* don't do it again */
64
65 /*
66 * Yes, Bruce, I know this is horrible, but we
67 * don't have a root file system when we first
68 * try to do this. If you can come up with a
69 * better solution, I'd really like it. I'm
70 * just putting it in now to add ammuntion to
71 * moving the system to devfs.
72 */
73 dname = &drive->devicename[5];
74 drive->dev = NULL; /* no device yet */
75
76 /* Find the device */
77 if (bcmp(dname, "ad", 2) == 0) /* IDE disk */
78 devmajor = 116;
79 else if (bcmp(dname, "wd", 2) == 0) /* IDE disk */
80 devmajor = 3;
81 else if (bcmp(dname, "da", 2) == 0)
82 devmajor = 13;
83 else if (bcmp(dname, "vn", 2) == 0)
84 devmajor = 43;
85 else if (bcmp(dname, "md", 2) == 0)
86 devmajor = 95;
87 else if (bcmp(dname, "amrd", 4) == 0) {
88 devmajor = 133;
89 dname += 2;
90 } else if (bcmp(dname, "mlxd", 4) == 0) {
91 devmajor = 131;
92 dname += 2;
93 } else if (bcmp(dname, "idad", 4) == 0) {
94 devmajor = 109;
95 dname += 2;
96 } else if (bcmp(dname, "twed", 4) == 0) { /* 3ware raid */
97 devmajor = 147;
98 dname += 2;
99 } else
100 return ENODEV;
101 dname += 2; /* point past */
102
103 /*
104 * Found the device. We can expect one of
105 * two formats for the rest: a unit number,
106 * then either a partition letter for the
107 * compatiblity partition (e.g. h) or a
108 * slice ID and partition (e.g. s2e).
109 * Create a minor number for each of them.
110 */
111 unit = 0;
112 while ((*dname >= '') /* unit number */
113 &&(*dname <= '9')) {
114 unit = unit * 10 + *dname - '';
115 dname++;
116 }
117
118 if (*dname == 's') { /* slice */
119 if (((dname[1] < '1') || (dname[1] > '4')) /* invalid slice */
120 ||((dname[2] < 'a') || (dname[2] > 'h'))) /* or invalid partition */
121 return ENODEV;
122 devminor = ((unit & 31) << 3) /* unit */
123 +(dname[2] - 'a') /* partition */
124 +((dname[1] - '' + 1) << 16) /* slice */
125 +((unit & ~31) << 16); /* high-order unit bits */
126 } else { /* compatibility partition */
127 if ((*dname < 'a') || (*dname > 'h')) /* or invalid partition */
128 return ENODEV;
129 devminor = (*dname - 'a') /* partition */
130 +((unit & 31) << 3) /* unit */
131 +((unit & ~31) << 16); /* high-order unit bits */
132 }
133
134 if ((devminor & 7) == 2) /* partition c */
135 return ENOTTY; /* not buying that */
136
137 drive->dev = makedev(devmajor, devminor); /* find the device */
138 if (drive->dev == NULL) /* didn't find anything */
139 return ENODEV;
140
141 drive->dev->si_iosize_max = DFLTPHYS;
142 dsw = devsw(drive->dev);
143 if (dsw == NULL)
144 drive->lasterror = ENOENT;
145 else
146 drive->lasterror = (dsw->d_open) (drive->dev, FWRITE, 0, NULL);
147
148 if (drive->lasterror != 0) { /* failed */
149 drive->state = drive_down; /* just force it down */
150 if (verbose)
151 log(LOG_WARNING,
152 "vinum open_drive %s: failed with error %d\n",
153 drive->devicename, drive->lasterror);
154 } else
155 drive->flags |= VF_OPEN; /* we're open now */
156
157 return drive->lasterror;
158 }
159
160 /*
161 * Set some variables in the drive struct
162 * in more convenient form. Return error indication
163 */
164 int
165 set_drive_parms(struct drive *drive)
166 {
167 drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */
168 drive->secsperblock = drive->blocksize /* number of sectors per block */
169 / drive->partinfo.disklab->d_secsize;
170
171 /* Now update the label part */
172 bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
173 getmicrotime(&drive->label.date_of_birth); /* and current time */
174 drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
175 *((u_int64_t) drive->partinfo.disklab->d_secsize);
176 #if VINUMDEBUG
177 if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */
178 drive->label.drive_size *= 100;
179 #endif
180
181 /* number of sectors available for subdisks */
182 drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
183
184 /*
185 * Bug in 3.0 as of January 1998: you can open
186 * non-existent slices. They have a length of 0.
187 */
188 if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
189 set_drive_state(drive->driveno, drive_down, setstate_force);
190 drive->lasterror = ENOSPC;
191 return ENOSPC;
192 }
193 drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
194 drive->freelist = (struct drive_freelist *)
195 Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
196 if (drive->freelist == NULL) /* can't malloc, dammit */
197 return ENOSPC;
198 drive->freelist_entries = 1; /* just (almost) the complete drive */
199 drive->freelist[0].offset = DATASTART; /* starts here */
200 drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
201 if (drive->label.name[0] != '\0') /* got a name */
202 set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
203 else /* we know about it, but that's all */
204 drive->state = drive_referenced;
205 return 0;
206 }
207
208 /*
209 * Initialize a drive: open the device and add device
210 * information
211 */
212 int
213 init_drive(struct drive *drive, int verbose)
214 {
215 if (drive->devicename[0] != '/') {
216 drive->lasterror = EINVAL;
217 log(LOG_ERR, "vinum: Can't open drive without drive name\n");
218 return EINVAL;
219 }
220 drive->lasterror = open_drive(drive, curproc, verbose); /* open the drive */
221 if (drive->lasterror)
222 return drive->lasterror;
223
224 drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
225 DIOCGPART,
226 (caddr_t) & drive->partinfo,
227 FREAD,
228 curproc);
229 if (drive->lasterror) {
230 if (verbose)
231 log(LOG_WARNING,
232 "vinum open_drive %s: Can't get partition information, drive->lasterror %d\n",
233 drive->devicename,
234 drive->lasterror);
235 close_drive(drive);
236 return drive->lasterror;
237 }
238 if (drive->partinfo.part->p_fstype != FS_VINUM) { /* not Vinum */
239 drive->lasterror = EFTYPE;
240 if (verbose)
241 log(LOG_WARNING,
242 "vinum open_drive %s: Wrong partition type for vinum\n",
243 drive->devicename);
244 close_drive(drive);
245 return EFTYPE;
246 }
247 return set_drive_parms(drive); /* set various odds and ends */
248 }
249
250 /* Close a drive if it's open. */
251 void
252 close_drive(struct drive *drive)
253 {
254 LOCKDRIVE(drive); /* keep the daemon out */
255 if (drive->flags & VF_OPEN)
256 close_locked_drive(drive); /* and close it */
257 if (drive->state > drive_down) /* if it's up */
258 drive->state = drive_down; /* make sure it's down */
259 unlockdrive(drive);
260 }
261
262 /*
263 * Real drive close code, called with drive already locked.
264 * We have also checked that the drive is open. No errors.
265 */
266 void
267 close_locked_drive(struct drive *drive)
268 {
269 /*
270 * If we can't access the drive, we can't flush
271 * the queues, which spec_close() will try to
272 * do. Get rid of them here first.
273 */
274 drive->lasterror = (*devsw(drive->dev)->d_close) (drive->dev, 0, 0, NULL);
275 drive->flags &= ~VF_OPEN; /* no longer open */
276 }
277
278 /*
279 * Remove drive from the configuration.
280 * Caller must ensure that it isn't active.
281 */
282 void
283 remove_drive(int driveno)
284 {
285 struct drive *drive = &vinum_conf.drive[driveno];
286 struct vinum_hdr *vhdr; /* buffer for header */
287 int error;
288
289 if (drive->state > drive_referenced) { /* real drive */
290 if (drive->state == drive_up) {
291 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */
292 CHECKALLOC(vhdr, "Can't allocate memory");
293 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
294 if (error)
295 drive->lasterror = error;
296 else {
297 vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */
298 write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
299 }
300 Free(vhdr);
301 }
302 free_drive(drive); /* close it and free resources */
303 save_config(); /* and save the updated configuration */
304 }
305 }
306
307 /*
308 * Transfer drive data. Usually called from one of these defines;
309 * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
310 * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
311 *
312 * length and offset are in bytes, but must be multiples of sector
313 * size. The function *does not check* for this condition, and
314 * truncates ruthlessly.
315 * Return error number
316 */
317 int
318 driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
319 {
320 int error;
321 struct buf *bp;
322
323 error = 0; /* to keep the compiler happy */
324 while (length) { /* divide into small enough blocks */
325 int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */
326
327 bp = geteblk(len); /* get a buffer header */
328 bp->b_flags = flag;
329 bp->b_dev = drive->dev; /* device */
330 bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
331 bp->b_saveaddr = bp->b_data;
332 bp->b_data = buf;
333 bp->b_bcount = len;
334 BUF_STRATEGY(bp, 0); /* initiate the transfer */
335 error = biowait(bp);
336 bp->b_data = bp->b_saveaddr;
337 bp->b_flags |= B_INVAL | B_AGE;
338 bp->b_flags &= ~B_ERROR;
339 brelse(bp);
340 if (error)
341 break;
342 length -= len; /* update pointers */
343 buf += len;
344 offset += len;
345 }
346 return error;
347 }
348
349 /*
350 * Check a drive for a vinum header. If found,
351 * update the drive information. We come here
352 * with a partially populated drive structure
353 * which includes the device name.
354 *
355 * Return information on what we found.
356 *
357 * This function is called from two places: check_drive,
358 * which wants to find out whether the drive is a
359 * Vinum drive, and config_drive, which asserts that
360 * it is a vinum drive. In the first case, we don't
361 * print error messages (verbose==0), in the second
362 * we do (verbose==1).
363 */
364 enum drive_label_info
365 read_drive_label(struct drive *drive, int verbose)
366 {
367 int error;
368 int result; /* result of our search */
369 struct vinum_hdr *vhdr; /* and as header */
370
371 error = init_drive(drive, 0); /* find the drive */
372 if (error) /* find the drive */
373 return DL_CANT_OPEN; /* not ours */
374
375 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
376 CHECKALLOC(vhdr, "Can't allocate memory");
377
378 drive->state = drive_up; /* be optimistic */
379 error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
380 if (vhdr->magic == VINUM_MAGIC) { /* ours! */
381 if (drive->label.name[0] /* we have a name for this drive */
382 &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
383 drive->lasterror = EINVAL;
384 result = DL_WRONG_DRIVE; /* it's the wrong drive */
385 drive->state = drive_unallocated; /* put it back, it's not ours */
386 } else
387 result = DL_OURS;
388 /*
389 * We copy the drive anyway so that we have
390 * the correct name in the drive info. This
391 * may not be the name specified
392 */
393 drive->label = vhdr->label; /* put in the label information */
394 } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
395 result = DL_DELETED_LABEL; /* and return the info */
396 else
397 result = DL_NOT_OURS; /* we could have it, but we don't yet */
398 Free(vhdr); /* that's all. */
399 return result;
400 }
401
402 /*
403 * Check a drive for a vinum header. If found,
404 * read configuration information from the drive and
405 * incorporate the data into the configuration.
406 *
407 * Return drive number.
408 */
409 struct drive *
410 check_drive(char *devicename)
411 {
412 int driveno;
413 int i;
414 struct drive *drive;
415
416 driveno = find_drive_by_dev(devicename, 1); /* if entry doesn't exist, create it */
417 drive = &vinum_conf.drive[driveno]; /* and get a pointer */
418
419 if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */
420 for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
421 if ((i != driveno) /* not this drive */
422 &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */
423 &&(strcmp(DRIVE[i].label.name,
424 DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
425 struct drive *mydrive = &DRIVE[i];
426
427 if (mydrive->devicename[0] == '/') { /* we know a device name for it */
428 /*
429 * set an error, but don't take the
430 * drive down: that would cause unneeded
431 * error messages.
432 */
433 drive->lasterror = EEXIST;
434 break;
435 } else { /* it's just a place holder, */
436 int sdno;
437
438 for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
439 if ((SD[sdno].driveno == i) /* it's pointing to this one, */
440 &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */
441 SD[sdno].driveno = drive->driveno; /* point to the one we found */
442 update_sd_state(sdno); /* and update its state */
443 }
444 }
445 bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */
446 }
447 }
448 }
449 } else {
450 if (drive->lasterror == 0)
451 drive->lasterror = ENODEV;
452 close_drive(drive);
453 drive->state = drive_down;
454 }
455 return drive;
456 }
457
458 static char *
459 sappend(char *txt, char *s)
460 {
461 while ((*s++ = *txt++) != 0);
462 return s - 1;
463 }
464
465 void
466 format_config(char *config, int len)
467 {
468 int i;
469 int j;
470 char *s = config;
471 char *configend = &config[len];
472
473 bzero(config, len);
474
475 /* First write the volume configuration */
476 for (i = 0; i < vinum_conf.volumes_allocated; i++) {
477 struct volume *vol;
478
479 vol = &vinum_conf.volume[i];
480 if ((vol->state > volume_uninit)
481 && (vol->name[0] != '\0')) { /* paranoia */
482 snprintf(s,
483 configend - s,
484 "volume %s state %s",
485 vol->name,
486 volume_state(vol->state));
487 while (*s)
488 s++; /* find the end */
489 if (vol->preferred_plex >= 0) /* preferences, */
490 snprintf(s,
491 configend - s,
492 " readpol prefer %s",
493 vinum_conf.plex[vol->preferred_plex].name);
494 while (*s)
495 s++; /* find the end */
496 s = sappend("\n", s);
497 }
498 }
499
500 /* Then the plex configuration */
501 for (i = 0; i < vinum_conf.plexes_allocated; i++) {
502 struct plex *plex;
503
504 plex = &vinum_conf.plex[i];
505 if ((plex->state > plex_referenced)
506 && (plex->name[0] != '\0')) { /* paranoia */
507 snprintf(s,
508 configend - s,
509 "plex name %s state %s org %s ",
510 plex->name,
511 plex_state(plex->state),
512 plex_org(plex->organization));
513 while (*s)
514 s++; /* find the end */
515 if (isstriped(plex)) {
516 snprintf(s,
517 configend - s,
518 "%ds ",
519 (int) plex->stripesize);
520 while (*s)
521 s++; /* find the end */
522 }
523 if (plex->volno >= 0) /* we have a volume */
524 snprintf(s,
525 configend - s,
526 "vol %s ",
527 vinum_conf.volume[plex->volno].name);
528 while (*s)
529 s++; /* find the end */
530 for (j = 0; j < plex->subdisks; j++) {
531 snprintf(s,
532 configend - s,
533 " sd %s",
534 vinum_conf.sd[plex->sdnos[j]].name);
535 }
536 s = sappend("\n", s);
537 }
538 }
539
540 /* And finally the subdisk configuration */
541 for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
542 struct sd *sd;
543 char *drivename;
544
545 sd = &SD[i];
546 if ((sd->state != sd_referenced)
547 && (sd->state != sd_unallocated)
548 && (sd->name[0] != '\0')) { /* paranoia */
549 drivename = vinum_conf.drive[sd->driveno].label.name;
550 /*
551 * XXX We've seen cases of dead subdisks
552 * which don't have a drive. If we let them
553 * through here, the drive name is null, so
554 * they get the drive named 'plex'.
555 *
556 * This is a breakage limiter, not a fix.
557 */
558 if (drivename[0] == '\0')
559 drivename = "*invalid*";
560 snprintf(s,
561 configend - s,
562 "sd name %s drive %s len %llus driveoffset %llus state %s",
563 sd->name,
564 drivename,
565 (unsigned long long) sd->sectors,
566 (unsigned long long) sd->driveoffset,
567 sd_state(sd->state));
568 while (*s)
569 s++; /* find the end */
570 if (sd->plexno >= 0)
571 snprintf(s,
572 configend - s,
573 " plex %s plexoffset %llds",
574 vinum_conf.plex[sd->plexno].name,
575 (long long) sd->plexoffset);
576 else
577 snprintf(s, configend - s, " detached");
578 while (*s)
579 s++; /* find the end */
580 if (sd->flags & VF_RETRYERRORS) {
581 snprintf(s, configend - s, " retryerrors");
582 while (*s)
583 s++; /* find the end */
584 }
585 snprintf(s, configend - s, " \n");
586 while (*s)
587 s++; /* find the end */
588 }
589 }
590 if (s > &config[len - 2])
591 panic("vinum: configuration data overflow");
592 }
593
594 /*
595 * issue a save config request to the dæmon. The actual work
596 * is done in process context by daemon_save_config
597 */
598 void
599 save_config(void)
600 {
601 queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) NULL);
602 }
603
604 /*
605 * Write the configuration to all vinum slices. This
606 * is performed by the dæmon only
607 */
608 void
609 daemon_save_config(void)
610 {
611 int error;
612 int written_config; /* set when we first write the config to disk */
613 int driveno;
614 struct drive *drive; /* point to current drive info */
615 struct vinum_hdr *vhdr; /* and as header */
616 char *config; /* point to config data */
617 int wlabel_on; /* to set writing label on/off */
618
619 /* don't save the configuration while we're still working on it */
620 if (vinum_conf.flags & VF_CONFIGURING)
621 return;
622 written_config = 0; /* no config written yet */
623 /* Build a volume header */
624 vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
625 CHECKALLOC(vhdr, "Can't allocate config data");
626 vhdr->magic = VINUM_MAGIC; /* magic number */
627 vhdr->config_length = MAXCONFIG; /* length of following config info */
628
629 config = Malloc(MAXCONFIG); /* get space for the config data */
630 CHECKALLOC(config, "Can't allocate config data");
631
632 format_config(config, MAXCONFIG);
633 error = 0; /* no errors yet */
634 for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
635 drive = &vinum_conf.drive[driveno]; /* point to drive */
636 if (drive->state > drive_referenced) {
637 LOCKDRIVE(drive); /* don't let it change */
638
639 /*
640 * First, do some drive consistency checks. Some
641 * of these are kludges, others require a process
642 * context and couldn't be done before
643 */
644 if ((drive->devicename[0] == '\0')
645 || (drive->label.name[0] == '\0')) {
646 unlockdrive(drive);
647 free_drive(drive); /* get rid of it */
648 break;
649 }
650 if (((drive->flags & VF_OPEN) == 0) /* drive not open */
651 &&(drive->state > drive_down)) { /* and it thinks it's not down */
652 unlockdrive(drive);
653 set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
654 continue;
655 }
656 if ((drive->state == drive_down) /* it's down */
657 &&(drive->flags & VF_OPEN)) { /* but open, */
658 unlockdrive(drive);
659 close_drive(drive); /* close it */
660 } else if (drive->state > drive_down) {
661 getmicrotime(&drive->label.last_update); /* time of last update is now */
662 bcopy((char *) &drive->label, /* and the label info from the drive structure */
663 (char *) &vhdr->label,
664 sizeof(vhdr->label));
665 if ((drive->state != drive_unallocated)
666 && (drive->state != drive_referenced)) { /* and it's a real drive */
667 wlabel_on = 1; /* enable writing the label */
668 error = (*devsw(drive->dev)->d_ioctl) (drive->dev, /* make the label writeable */
669 DIOCWLABEL,
670 (caddr_t) & wlabel_on,
671 FWRITE,
672 curproc);
673 if (error == 0)
674 error = write_drive(drive, (char *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
675 if (error == 0)
676 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET); /* first config copy */
677 if (error == 0)
678 error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET + MAXCONFIG); /* second copy */
679 wlabel_on = 0; /* enable writing the label */
680 if (error == 0)
681 error = (*devsw(drive->dev)->d_ioctl) (drive->dev, /* make the label non-writeable again */
682 DIOCWLABEL,
683 (caddr_t) & wlabel_on,
684 FWRITE,
685 curproc);
686 unlockdrive(drive);
687 if (error) {
688 log(LOG_ERR,
689 "vinum: Can't write config to %s, error %d\n",
690 drive->devicename,
691 error);
692 set_drive_state(drive->driveno, drive_down, setstate_force);
693 } else
694 written_config = 1; /* we've written it on at least one drive */
695 }
696 } else /* not worth looking at, */
697 unlockdrive(drive); /* just unlock it again */
698 }
699 }
700 Free(vhdr);
701 Free(config);
702 }
703
704 /*
705 * Disk labels are a mess. The correct way to
706 * access them is with the DIOC[GSW]DINFO ioctls,
707 * but some programs, such as newfs, access the
708 * disk directly, so we have to write things
709 * there. We do this only on request. If a user
710 * request tries to read it directly, we fake up
711 * one on the fly.
712 */
713
714 /*
715 * get_volume_label returns a label structure to lp, which
716 * is allocated by the caller
717 */
718 void
719 get_volume_label(char *name, int plexes, u_int64_t size, struct disklabel *lp)
720 {
721 bzero(lp, sizeof(struct disklabel));
722
723 strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
724 lp->d_type = DTYPE_VINUM;
725 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
726 lp->d_rpm = 14400 * plexes; /* to keep them guessing */
727 lp->d_interleave = 1;
728 lp->d_flags = 0;
729
730 /*
731 * A Vinum volume has a single track with all
732 * its sectors.
733 */
734 lp->d_secsize = DEV_BSIZE; /* bytes per sector */
735 lp->d_nsectors = size; /* data sectors per track */
736 lp->d_ntracks = 1; /* tracks per cylinder */
737 lp->d_ncylinders = 1; /* data cylinders per unit */
738 lp->d_secpercyl = size; /* data sectors per cylinder */
739 lp->d_secperunit = size; /* data sectors per unit */
740
741 lp->d_bbsize = BBSIZE;
742 lp->d_sbsize = SBSIZE;
743
744 lp->d_magic = DISKMAGIC;
745 lp->d_magic2 = DISKMAGIC;
746
747 /*
748 * Set up partitions a, b and c to be identical
749 * and the size of the volume. a is UFS, b is
750 * swap, c is nothing.
751 */
752 lp->d_partitions[0].p_size = size;
753 lp->d_partitions[0].p_fstype = FS_BSDFFS; /* FreeBSD File System :-) */
754 lp->d_partitions[0].p_fsize = 2048; /* FS fragment size */
755 lp->d_partitions[0].p_frag = 8; /* and fragments per block */
756 lp->d_partitions[SWAP_PART].p_size = size;
757 lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP; /* swap partition */
758 lp->d_partitions[LABEL_PART].p_size = size;
759 lp->d_npartitions = LABEL_PART + 1;
760 strncpy(lp->d_packname, name, min(sizeof(lp->d_packname), sizeof(name)));
761 lp->d_checksum = dkcksum(lp);
762 }
763
764 /* Write a volume label. This implements the VINUM_LABEL ioctl. */
765 int
766 write_volume_label(int volno)
767 {
768 struct disklabel *lp;
769 struct buf *bp;
770 struct disklabel *dlp;
771 struct volume *vol;
772 int error;
773
774 lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
775 if (lp == 0)
776 return ENOMEM;
777
778 if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_allocated) /* invalid volume */
779 return ENOENT;
780
781 vol = &VOL[volno]; /* volume in question */
782 if (vol->state <= volume_uninit) /* nothing there */
783 return ENXIO;
784 else if (vol->state < volume_up) /* not accessible */
785 return EIO; /* I/O error */
786
787 get_volume_label(vol->name, vol->plexes, vol->size, lp); /* get the label */
788
789 /*
790 * Now write to disk. This code is derived from the
791 * system writedisklabel (), which does silly things
792 * like reading the label and refusing to write
793 * unless it's already there.
794 */
795 bp = geteblk((int) lp->d_secsize); /* get a buffer */
796 bp->b_dev = makedev(VINUM_CDEV_MAJOR, vol->volno); /* our own raw volume */
797 bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
798 bp->b_bcount = lp->d_secsize;
799 bzero(bp->b_data, lp->d_secsize);
800 dlp = (struct disklabel *) bp->b_data;
801 *dlp = *lp;
802 bp->b_flags &= ~B_INVAL;
803 bp->b_flags |= B_WRITE;
804
805 /*
806 * This should read:
807 *
808 * vinumstrategy (bp);
809 *
810 * Negotiate with phk to get it fixed.
811 */
812 BUF_STRATEGY(bp, 0);
813 error = biowait(bp);
814 bp->b_flags |= B_INVAL | B_AGE;
815 bp->b_flags &= ~B_ERROR;
816
817 brelse(bp);
818 return error;
819 }
820
821 /* Look at all disks on the system for vinum slices */
822 int
823 vinum_scandisk(char *devicename[], int drives)
824 {
825 struct drive *volatile drive;
826 volatile int driveno;
827 int firstdrive; /* first drive in this list */
828 volatile int gooddrives; /* number of usable drives found */
829 int firsttime; /* set if we have never configured before */
830 int error;
831 char *config_text; /* read the config info from disk into here */
832 char *volatile cptr; /* pointer into config information */
833 char *eptr; /* end pointer into config information */
834 char *config_line; /* copy the config line to */
835 volatile int status;
836 int *volatile drivelist; /* list of drive indices */
837 #define DRIVENAMELEN 64
838 #define DRIVEPARTS 35 /* max partitions per drive, excluding c */
839 char partname[DRIVENAMELEN]; /* for creating partition names */
840
841 status = 0; /* success indication */
842 vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */
843
844 gooddrives = 0; /* number of usable drives found */
845 firstdrive = vinum_conf.drives_used; /* the first drive */
846 firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */
847
848 /* allocate a drive pointer list */
849 drivelist = (int *) Malloc(drives * DRIVEPARTS * sizeof(int));
850 CHECKALLOC(drivelist, "Can't allocate memory");
851
852 /* Open all drives and find which was modified most recently */
853 for (driveno = 0; driveno < drives; driveno++) {
854 char part; /* UNIX partition */
855 int slice;
856 int founddrive; /* flag when we find a vinum drive */
857
858 founddrive = 0; /* no vinum drive found yet on this spindle */
859 /* first try the partition table */
860 for (slice = 1; slice < 5; slice++)
861 for (part = 'a'; part < 'i'; part++) {
862 if (part != 'c') { /* don't do the c partition */
863 snprintf(partname,
864 DRIVENAMELEN,
865 "%ss%d%c",
866 devicename[driveno],
867 slice,
868 part);
869 drive = check_drive(partname); /* try to open it */
870 if ((drive->lasterror != 0) /* didn't work, */
871 ||(drive->state != drive_up))
872 free_drive(drive); /* get rid of it */
873 else if (drive->flags & VF_CONFIGURED) /* already read this config, */
874 log(LOG_WARNING,
875 "vinum: already read config from %s\n", /* say so */
876 drive->label.name);
877 else {
878 drivelist[gooddrives] = drive->driveno; /* keep the drive index */
879 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
880 gooddrives++;
881 founddrive++;
882 }
883 }
884 }
885 if (founddrive == 0) { /* didn't find anything, */
886 for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */
887 if (part != 'c') { /* don't do the c partition */
888 snprintf(partname, /* /dev/sd0a */
889 DRIVENAMELEN,
890 "%s%c",
891 devicename[driveno],
892 part);
893 drive = check_drive(partname); /* try to open it */
894 if ((drive->lasterror != 0) /* didn't work, */
895 ||(drive->state != drive_up))
896 free_drive(drive); /* get rid of it */
897 else if (drive->flags & VF_CONFIGURED) /* already read this config, */
898 log(LOG_WARNING,
899 "vinum: already read config from %s\n", /* say so */
900 drive->label.name);
901 else {
902 drivelist[gooddrives] = drive->driveno; /* keep the drive index */
903 drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
904 gooddrives++;
905 }
906 }
907 }
908 }
909
910 if (gooddrives == 0) {
911 if (firsttime)
912 log(LOG_WARNING, "vinum: no drives found\n");
913 else
914 log(LOG_INFO, "vinum: no additional drives found\n");
915 return ENOENT;
916 }
917 /*
918 * We now have at least one drive
919 * open. Sort them in order of config time
920 * and merge the config info with what we
921 * have already.
922 */
923 qsort(drivelist, gooddrives, sizeof(int), drivecmp);
924 config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
925 CHECKALLOC(config_text, "Can't allocate memory");
926 config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
927 CHECKALLOC(config_line, "Can't allocate memory");
928 for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */
929 drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
930
931 if (firsttime && (driveno == 0)) /* we've never configured before, */
932 log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
933 else
934 log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
935
936 if (drive->state == drive_up)
937 /* Read in both copies of the configuration information */
938 error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
939 else {
940 error = EIO;
941 printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state));
942 }
943
944 if (error != 0) {
945 log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
946 free_drive(drive); /* give it back */
947 status = error;
948 }
949 /*
950 * At this point, check that the two copies
951 * are the same, and do something useful if
952 * not. In particular, consider which is
953 * newer, and what this means for the
954 * integrity of the data on the drive.
955 */
956 else {
957 vinum_conf.drives_used++; /* another drive in use */
958 /* Parse the configuration, and add it to the global configuration */
959 for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
960 volatile int parse_status; /* return value from parse_config */
961
962 for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
963 *eptr++ = *cptr++;
964 *eptr = '\0'; /* and delimit */
965 if (setjmp(command_fail) == 0) { /* come back here on error and continue */
966 parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
967 if (parse_status < 0) { /* error in config */
968 /*
969 * This config should have been parsed in user
970 * space. If we run into problems here, something
971 * serious is afoot. Complain and let the user
972 * snarf the config to see what's wrong.
973 */
974 log(LOG_ERR,
975 "vinum: Config error on %s, aborting integration\n",
976 drive->devicename);
977 free_drive(drive); /* give it back */
978 status = EINVAL;
979 }
980 }
981 while (*cptr == '\n')
982 cptr++; /* skip to next line */
983 }
984 }
985 drive->flags |= VF_CONFIGURED; /* read this drive's configuration */
986 }
987
988 Free(config_line);
989 Free(config_text);
990 Free(drivelist);
991 vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */
992 if (status != 0)
993 printf("vinum: couldn't read configuration");
994 else
995 updateconfig(VF_READING_CONFIG); /* update from disk config */
996 return status;
997 }
998
999 /*
1000 * Compare the modification dates of the drives, for qsort.
1001 * Return 1 if a < b, 0 if a == b, 01 if a > b: in other
1002 * words, sort backwards.
1003 */
1004 int
1005 drivecmp(const void *va, const void *vb)
1006 {
1007 const struct drive *a = &DRIVE[*(const int *) va];
1008 const struct drive *b = &DRIVE[*(const int *) vb];
1009
1010 if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
1011 && (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
1012 return 0;
1013 else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
1014 || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
1015 && (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
1016 return -1;
1017 else
1018 return 1;
1019 }
1020 /* Local Variables: */
1021 /* fill-column: 50 */
1022 /* End: */
Cache object: aee5eb3bf590077dc3204d022dc8463b
|