FreeBSD/Linux Kernel Cross Reference
sys/port/devfs.c
1 /*
2 * File system devices.
3 * Follows device config in Ken's file server.
4 * Builds mirrors, concatenations, interleavings, and partitions
5 * of devices out of other (inner) devices.
6 */
7
8 #include "u.h"
9 #include "../port/lib.h"
10 #include "mem.h"
11 #include "dat.h"
12 #include "fns.h"
13 #include "io.h"
14 #include "ureg.h"
15 #include "../port/error.h"
16
17 enum {
18 Fmirror, /* mirror of others */
19 Fcat, /* catenation of others */
20 Finter, /* interleaving of others */
21 Fpart, /* part of other */
22 Fclear, /* start over */
23
24 Blksize = 8*1024, /* for Finter only */
25
26 Qtop = 0, /* top dir (contains "fs") */
27 Qdir, /* actual dir */
28 Qctl, /* ctl file */
29 Qfirst, /* first fs file */
30
31 Iswrite = 0,
32 Isread,
33
34 Optional = 0,
35 Mustexist,
36
37 /* tunable parameters */
38 Maxconf = 4*1024, /* max length for config */
39 Ndevs = 32, /* max. inner devs per command */
40 Nfsdevs = 128, /* max. created devs, total */
41 Maxretries = 3, /* max. retries of i/o errors */
42 Retrypause = 5000, /* ms. to pause between retries */
43 };
44
45 #define Cfgstr "fsdev:\n"
46
47 typedef struct Inner Inner;
48 struct Inner
49 {
50 char *iname; /* inner device name */
51 vlong isize; /* size of inner device */
52 Chan *idev; /* inner device */
53 };
54
55 typedef struct Fsdev Fsdev;
56 struct Fsdev
57 {
58 int type;
59 char *name; /* name for this fsdev */
60 vlong size; /* min(inner[X].isize) */
61 vlong start; /* start address (for Fpart) */
62 int ndevs; /* number of inner devices */
63 Inner inner[Ndevs];
64 };
65
66 extern Dev fsdevtab; /* forward */
67
68 /*
69 * Once configured, a fsdev is never removed. The name of those
70 * configured is never nil. We have no locks here.
71 */
72 static Fsdev fsdev[Nfsdevs]; /* internal representation of config */
73 static char confstr[Maxconf]; /* textual configuration */
74
75 static Qid tqid = {Qtop, 0, QTDIR};
76 static Qid dqid = {Qdir, 0, QTDIR};
77 static Qid cqid = {Qctl, 0, 0};
78
79 static Cmdtab configs[] = {
80 Fmirror,"mirror", 0,
81 Fcat, "cat", 0,
82 Finter, "inter", 0,
83 Fpart, "part", 5,
84 Fclear, "clear", 1,
85 };
86
87 static Fsdev*
88 path2dev(int i, int mustexist)
89 {
90 if (i < 0 || i >= nelem(fsdev))
91 error("bug: bad index in devfsdev");
92 if (mustexist && fsdev[i].name == nil)
93 error(Enonexist);
94
95 if (fsdev[i].name == nil)
96 return nil;
97 else
98 return &fsdev[i];
99 }
100
101 static Fsdev*
102 devalloc(void)
103 {
104 int i;
105
106 for (i = 0; i < nelem(fsdev); i++)
107 if (fsdev[i].name == nil)
108 break;
109 if (i == nelem(fsdev))
110 error(Enodev);
111
112 return &fsdev[i];
113 }
114
115 static void
116 setdsize(Fsdev* mp)
117 {
118 int i;
119 long l;
120 vlong inlen;
121 uchar buf[128]; /* old DIRLEN plus a little should be plenty */
122 Dir d;
123 Inner *in;
124
125 if (mp->type != Fpart){
126 mp->start = 0;
127 mp->size = 0;
128 }
129 for (i = 0; i < mp->ndevs; i++){
130 in = &mp->inner[i];
131 l = devtab[in->idev->type]->stat(in->idev, buf, sizeof buf);
132 convM2D(buf, l, &d, nil);
133 inlen = d.length;
134 in->isize = inlen;
135 switch(mp->type){
136 case Finter:
137 /* truncate to multiple of Blksize */
138 inlen &= ~(Blksize-1);
139 in->isize = inlen;
140 /* fall through */
141 case Fmirror:
142 /* use size of smallest inner device */
143 if (mp->size == 0 || mp->size > inlen)
144 mp->size = inlen;
145 break;
146 case Fcat:
147 mp->size += inlen;
148 break;
149 case Fpart:
150 /* should raise errors here? */
151 if (mp->start > inlen) {
152 print("#k/%s: partition start truncated from "
153 "%lld to %lld bytes\n", mp->name,
154 mp->start, inlen);
155 mp->start = inlen; /* empty partition */
156 }
157 /* truncate partition to keep it within inner device */
158 if (inlen < mp->start + mp->size) {
159 print("#k/%s: partition truncated from "
160 "%lld to %lld bytes\n", mp->name,
161 mp->size, inlen - mp->start);
162 mp->size = inlen - mp->start;
163 }
164 break;
165 }
166 }
167 if(mp->type == Finter)
168 mp->size *= mp->ndevs;
169 }
170
171 static void
172 mpshut(Fsdev *mp)
173 {
174 int i;
175 char *nm;
176
177 nm = mp->name;
178 mp->name = nil; /* prevent others from using this. */
179 if (nm)
180 free(nm);
181 for (i = 0; i < mp->ndevs; i++){
182 if (mp->inner[i].idev != nil)
183 cclose(mp->inner[i].idev);
184 if (mp->inner[i].iname)
185 free(mp->inner[i].iname);
186 }
187 memset(mp, 0, sizeof *mp);
188 }
189
190
191 /*
192 * process a single line of configuration,
193 * often of the form "cmd newname idev0 idev1".
194 */
195 static void
196 mconfig(char* a, long n)
197 {
198 int i;
199 vlong size, start;
200 char *c, *oldc;
201 Cmdbuf *cb;
202 Cmdtab *ct;
203 Fsdev *mp;
204 Inner *inprv;
205 static QLock lck;
206
207 /* ignore comments & empty lines */
208 if (*a == '\0' || *a == '#' || *a == '\n')
209 return;
210
211 size = 0;
212 start = 0;
213 /* insert header if config is empty */
214 if (confstr[0] == 0)
215 seprint(confstr, confstr + sizeof confstr, Cfgstr);
216 mp = nil;
217 cb = nil;
218 oldc = confstr + strlen(confstr);
219
220 qlock(&lck);
221 if (waserror()){
222 *oldc = 0;
223 if (mp != nil)
224 mpshut(mp);
225 qunlock(&lck);
226 if (cb)
227 free(cb);
228 nexterror();
229 }
230
231 /* append this command after parsing to confstr */
232 cb = parsecmd(a, n);
233 c = oldc;
234 for (i = 0; i < cb->nf; i++)
235 c = seprint(c, confstr + sizeof confstr - 1, "%s ", cb->f[i]);
236 if (c > oldc) {
237 c[-1] = '\n';
238 c[0] = '\0';
239 }
240
241 /* lookup command, execute special cases */
242 ct = lookupcmd(cb, configs, nelem(configs));
243 cb->f++; /* skip command */
244 cb->nf--;
245 if (cb->nf < 0) /* nothing to see here, move along */
246 ct->index = -1;
247 switch (ct->index) {
248 case Fpart:
249 if (cb->nf < 4)
250 error("too few fields in fs config");
251 start = strtoll(cb->f[2], nil, 10);
252 size = strtoll(cb->f[3], nil, 10);
253 cb->nf -= 2;
254 break;
255 case Fclear:
256 /* clear both internal & textual representations of config */
257 for (mp = fsdev; mp < fsdev + nelem(fsdev); mp++)
258 mpshut(mp);
259 *confstr = '\0';
260 /* FALL THROUGH */
261 case -1:
262 poperror();
263 qunlock(&lck);
264 free(cb);
265 return;
266 }
267 if (cb->nf < 2)
268 error("too few fields in fs config");
269 else if (cb->nf - 1 > Ndevs)
270 error("too many devices; fix #k: increase Ndevs");
271
272 /* reject new name if already in use, validate old ones */
273 for (i = 0; i < nelem(fsdev); i++)
274 if (fsdev[i].name != nil && strcmp(fsdev[i].name, cb->f[0])==0)
275 error(Eexist);
276 for (i = 0; i < cb->nf; i++)
277 validname(cb->f[i], (i != 0));
278
279 /* populate new Fsdev with parsed command */
280 mp = devalloc();
281 mp->type = ct->index;
282 if (mp->type == Fpart){
283 mp->start = start;
284 mp->size = size;
285 }
286 kstrdup(&mp->name, cb->f[0]);
287 if (waserror()){
288 mpshut(mp);
289 nexterror();
290 }
291 for (i = 1; i < cb->nf; i++){
292 inprv = &mp->inner[i-1];
293 kstrdup(&inprv->iname, cb->f[i]);
294 inprv->idev = namec(inprv->iname, Aopen, ORDWR, 0);
295 if (inprv->idev == nil)
296 error(Egreg);
297 mp->ndevs++;
298 }
299 poperror();
300 setdsize(mp);
301
302 poperror();
303 qunlock(&lck);
304 free(cb);
305 }
306
307 static void
308 rdconf(void)
309 {
310 int mustrd;
311 char *c, *e, *p, *s;
312 Chan *cc;
313 static int configed;
314
315 /* only read config file once */
316 if (configed)
317 return;
318 configed = 1;
319
320 /* identify the config file */
321 s = getconf("fsconfig");
322 if (s == nil){
323 mustrd = 0;
324 s = "/dev/sdC0/fscfg";
325 } else
326 mustrd = 1;
327
328 /* read it */
329 cc = nil;
330 c = nil;
331 if (waserror()){
332 if (cc != nil)
333 cclose(cc);
334 if (c)
335 free(c);
336 if (!mustrd)
337 return;
338 nexterror();
339 }
340 cc = namec(s, Aopen, OREAD, 0);
341 devtab[cc->type]->read(cc, confstr, sizeof confstr, 0);
342 cclose(cc);
343 cc = nil;
344
345 /* validate, copy and erase config; mconfig will repopulate confstr */
346 if (strncmp(confstr, Cfgstr, strlen(Cfgstr)) != 0)
347 error("bad #k config, first line must be: 'fsdev:\\n'");
348 kstrdup(&c, confstr + strlen(Cfgstr));
349 memset(confstr, 0, sizeof confstr);
350
351 /* process config copy one line at a time */
352 for (p = c; p != nil && *p != '\0'; p = e){
353 e = strchr(p, '\n');
354 if (e == nil)
355 e = p + strlen(p);
356 else
357 e++;
358 mconfig(p, e - p);
359 }
360 USED(cc); /* until now, can be used in waserror clause */
361 poperror();
362 }
363
364
365 static int
366 mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
367 {
368 Qid qid;
369 Fsdev *mp;
370
371 if (c->qid.path == Qtop)
372 switch(i){
373 case DEVDOTDOT:
374 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
375 return 1;
376 case 0:
377 devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
378 return 1;
379 default:
380 return -1;
381 }
382 if (c->qid.path != Qdir)
383 switch(i){
384 case DEVDOTDOT:
385 devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
386 return 1;
387 default:
388 return -1;
389 }
390 switch(i){
391 case DEVDOTDOT:
392 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
393 return 1;
394 case 0:
395 devdir(c, cqid, "ctl", 0, eve, 0664, dp);
396 return 1;
397 }
398 i--; /* for ctl */
399 qid.path = Qfirst + i;
400 qid.vers = 0;
401 qid.type = 0;
402 mp = path2dev(i, Optional);
403 if (mp == nil)
404 return -1;
405 kstrcpy(up->genbuf, mp->name, sizeof(up->genbuf));
406 devdir(c, qid, up->genbuf, mp->size, eve, 0664, dp);
407 return 1;
408 }
409
410 static Chan*
411 mattach(char *spec)
412 {
413 return devattach(fsdevtab.dc, spec);
414 }
415
416 static Walkqid*
417 mwalk(Chan *c, Chan *nc, char **name, int nname)
418 {
419 rdconf();
420 return devwalk(c, nc, name, nname, 0, 0, mgen);
421 }
422
423 static int
424 mstat(Chan *c, uchar *db, int n)
425 {
426 Dir d;
427 Fsdev *mp;
428 int p;
429
430 p = c->qid.path;
431 memset(&d, 0, sizeof d);
432 switch(p){
433 case Qtop:
434 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d);
435 break;
436 case Qdir:
437 devdir(c, dqid, "fs", 0, eve, DMDIR|0775, &d);
438 break;
439 case Qctl:
440 devdir(c, cqid, "ctl", 0, eve, 0664, &d);
441 break;
442 default:
443 mp = path2dev(p - Qfirst, Mustexist);
444 devdir(c, c->qid, mp->name, mp->size, eve, 0664, &d);
445 }
446 n = convD2M(&d, db, n);
447 if (n == 0)
448 error(Ebadarg);
449 return n;
450 }
451
452 static Chan*
453 mopen(Chan *c, int omode)
454 {
455 // TODO: call devopen()?
456 if((c->qid.type & QTDIR) && omode != OREAD)
457 error(Eperm);
458 // if (c->flag & COPEN)
459 // return c;
460 c->mode = openmode(omode & ~OTRUNC);
461 c->flag |= COPEN;
462 c->offset = 0;
463 return c;
464 }
465
466 static void
467 mclose(Chan*)
468 {
469 /* that's easy */
470 }
471
472
473 static long
474 io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
475 {
476 long wl;
477 Chan *mc = in->idev;
478
479 if (waserror()) {
480 print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
481 in->iname, off, l, mp->name, (isread? "read": "write"),
482 (up && up->errstr? up->errstr: ""));
483 nexterror();
484 }
485 if (isread)
486 wl = devtab[mc->type]->read(mc, a, l, off);
487 else
488 wl = devtab[mc->type]->write(mc, a, l, off);
489 poperror();
490 return wl;
491 }
492
493 /* NB: a transfer could span multiple inner devices */
494 static long
495 catio(Fsdev *mp, int isread, void *a, long n, vlong off)
496 {
497 int i;
498 long l, res;
499 Inner *in;
500
501 // print("catio %d %p %ld %lld\n", isread, a, n, off);
502 res = n;
503 for (i = 0; n > 0 && i < mp->ndevs; i++){
504 in = &mp->inner[i];
505 if (off >= in->isize){
506 off -= in->isize;
507 continue; /* not there yet */
508 }
509 if (off + n > in->isize)
510 l = in->isize - off;
511 else
512 l = n;
513 // print("\tdev %d %p %ld %lld\n", i, a, l, off);
514
515 if (io(mp, in, isread, a, l, off) != l)
516 error(Eio);
517
518 a = (char*)a + l;
519 off = 0;
520 n -= l;
521 }
522 // print("\tres %ld\n", res - n);
523 return res - n;
524 }
525
526 static long
527 interio(Fsdev *mp, int isread, void *a, long n, vlong off)
528 {
529 int i;
530 long boff, res, l, wl, wsz;
531 vlong woff, blk, mblk;
532
533 blk = off / Blksize;
534 boff = off % Blksize;
535 wsz = Blksize - boff;
536 res = n;
537 while(n > 0){
538 mblk = blk / mp->ndevs;
539 i = blk % mp->ndevs;
540 woff = mblk*Blksize + boff;
541 if (n > wsz)
542 l = wsz;
543 else
544 l = n;
545
546 wl = io(mp, &mp->inner[i], isread, a, l, woff);
547 if (wl != l)
548 error(Eio);
549
550 blk++;
551 boff = 0;
552 wsz = Blksize;
553 a = (char*)a + l;
554 n -= l;
555 }
556 return res;
557 }
558
559 static long
560 mread(Chan *c, void *a, long n, vlong off)
561 {
562 int i, retry;
563 long l, res;
564 Fsdev *mp;
565
566 if (c->qid.type & QTDIR)
567 return devdirread(c, a, n, 0, 0, mgen);
568 if (c->qid.path == Qctl) {
569 i = strlen(Cfgstr);
570 if (strlen(confstr) >= i) /* skip header if present */
571 return readstr((long)off, a, n, confstr + i);
572 else
573 return readstr((long)off, a, n, confstr);
574 }
575 i = c->qid.path - Qfirst;
576 mp = path2dev(i, Mustexist);
577
578 if (off >= mp->size)
579 return 0;
580 if (off + n > mp->size)
581 n = mp->size - off;
582 if (n == 0)
583 return 0;
584
585 res = -1;
586 switch(mp->type){
587 case Fcat:
588 res = catio(mp, Isread, a, n, off);
589 break;
590 case Finter:
591 res = interio(mp, Isread, a, n, off);
592 break;
593 case Fpart:
594 res = io(mp, &mp->inner[0], Isread, a, n, mp->start + off);
595 break;
596 case Fmirror:
597 retry = 0;
598 do {
599 if (retry > 0) {
600 print("#k/%s: retry %d read for byte %,lld "
601 "count %ld: %s\n", mp->name, retry, off,
602 n, (up && up->errstr? up->errstr: ""));
603 /*
604 * pause before retrying in case it's due to
605 * a transient bus or controller problem.
606 */
607 tsleep(&up->sleep, return0, 0, Retrypause);
608 }
609 for (i = 0; i < mp->ndevs; i++){
610 if (waserror())
611 continue;
612 l = io(mp, &mp->inner[i], Isread, a, n, off);
613 poperror();
614 if (l >= 0){
615 res = l;
616 break; /* read a good copy */
617 }
618 }
619 } while (i == mp->ndevs && ++retry <= Maxretries);
620 if (retry > Maxretries) {
621 /* no mirror had a good copy of the block */
622 print("#k/%s: byte %,lld count %ld: CAN'T READ "
623 "from mirror: %s\n", mp->name, off, n,
624 (up && up->errstr? up->errstr: ""));
625 error(Eio);
626 } else if (retry > 0)
627 print("#k/%s: byte %,lld count %ld: retry read OK "
628 "from mirror: %s\n", mp->name, off, n,
629 (up && up->errstr? up->errstr: ""));
630 break;
631 }
632 return res;
633 }
634
635 static long
636 mwrite(Chan *c, void *a, long n, vlong off)
637 {
638 int i, allbad, anybad, retry;
639 long l, res;
640 Fsdev *mp;
641
642 if (c->qid.type & QTDIR)
643 error(Eperm);
644 if (c->qid.path == Qctl){
645 mconfig(a, n);
646 return n;
647 }
648 mp = path2dev(c->qid.path - Qfirst, Mustexist);
649
650 if (off >= mp->size)
651 return 0;
652 if (off + n > mp->size)
653 n = mp->size - off;
654 if (n == 0)
655 return 0;
656 res = n;
657 switch(mp->type){
658 case Fcat:
659 res = catio(mp, Iswrite, a, n, off);
660 break;
661 case Finter:
662 res = interio(mp, Iswrite, a, n, off);
663 break;
664 case Fpart:
665 res = io(mp, &mp->inner[0], Iswrite, a, n, mp->start + off);
666 if (res != n)
667 error(Eio);
668 break;
669 case Fmirror:
670 retry = 0;
671 do {
672 if (retry > 0) {
673 print("#k/%s: retry %d write for byte %,lld "
674 "count %ld: %s\n", mp->name, retry, off,
675 n, (up && up->errstr? up->errstr: ""));
676 /*
677 * pause before retrying in case it's due to
678 * a transient bus or controller problem.
679 */
680 tsleep(&up->sleep, return0, 0, Retrypause);
681 }
682 allbad = 1;
683 anybad = 0;
684 for (i = mp->ndevs - 1; i >= 0; i--){
685 if (waserror()) {
686 anybad = 1;
687 continue;
688 }
689 l = io(mp, &mp->inner[i], Iswrite, a, n, off);
690 poperror();
691 if (l == n)
692 allbad = 0; /* wrote a good copy */
693 else
694 anybad = 1;
695 }
696 } while (anybad && ++retry <= Maxretries);
697 if (allbad) {
698 /* no mirror took a good copy of the block */
699 print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
700 "to mirror: %s\n", mp->name, off, n,
701 (up && up->errstr? up->errstr: ""));
702 error(Eio);
703 } else if (retry > 0)
704 print("#k/%s: byte %,lld count %ld: retry wrote OK "
705 "to mirror: %s\n", mp->name, off, n,
706 (up && up->errstr? up->errstr: ""));
707
708 break;
709 }
710 return res;
711 }
712
713 Dev fsdevtab = {
714 'k',
715 "devfs",
716
717 devreset,
718 devinit,
719 devshutdown,
720 mattach,
721 mwalk,
722 mstat,
723 mopen,
724 devcreate,
725 mclose,
726 mread,
727 devbread,
728 mwrite,
729 devbwrite,
730 devremove,
731 devwstat,
732 devpower,
733 devconfig,
734 };
Cache object: 595a1529ddb0d5d1b5c0298fe8dbe415
|