FreeBSD/Linux Kernel Cross Reference
sys/port/devbridge.c
1 /*
2 * IPv4 Ethernet bridge
3 */
4 #include "u.h"
5 #include "../port/lib.h"
6 #include "mem.h"
7 #include "dat.h"
8 #include "fns.h"
9 #include "../ip/ip.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
12
13 typedef struct Bridge Bridge;
14 typedef struct Port Port;
15 typedef struct Centry Centry;
16 typedef struct Iphdr Iphdr;
17 typedef struct Tcphdr Tcphdr;
18
19 enum
20 {
21 Qtopdir= 1, /* top level directory */
22
23 Qbridgedir, /* bridge* directory */
24 Qbctl,
25 Qstats,
26 Qcache,
27 Qlog,
28
29 Qportdir, /* directory for a protocol */
30 Qpctl,
31 Qlocal,
32 Qstatus,
33
34 MaxQ,
35
36 Maxbridge= 4,
37 Maxport= 128, // power of 2
38 CacheHash= 257, // prime
39 CacheLook= 5, // how many cache entries to examine
40 CacheSize= (CacheHash+CacheLook-1),
41 CacheTimeout= 5*60, // timeout for cache entry in seconds
42
43 TcpMssMax = 1300, // max desirable Tcp MSS value
44 TunnelMtu = 1400,
45 };
46
47 static Dirtab bridgedirtab[]={
48 "ctl", {Qbctl}, 0, 0666,
49 "stats", {Qstats}, 0, 0444,
50 "cache", {Qcache}, 0, 0444,
51 "log", {Qlog}, 0, 0666,
52 };
53
54 static Dirtab portdirtab[]={
55 "ctl", {Qpctl}, 0, 0666,
56 "local", {Qlocal}, 0, 0444,
57 "status", {Qstatus}, 0, 0444,
58 };
59
60 enum {
61 Logcache= (1<<0),
62 Logmcast= (1<<1),
63 };
64
65 // types of interfaces
66 enum
67 {
68 Tether,
69 Ttun,
70 };
71
72 static Logflag logflags[] =
73 {
74 { "cache", Logcache, },
75 { "multicast", Logmcast, },
76 { nil, 0, },
77 };
78
79 static Dirtab *dirtab[MaxQ];
80
81 #define TYPE(x) (((ulong)(x).path) & 0xff)
82 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1))
83 #define QID(x, y) (((x)<<8) | (y))
84
85 struct Centry
86 {
87 uchar d[Eaddrlen];
88 int port;
89 long expire; // entry expires this many seconds after bootime
90 long src;
91 long dst;
92 };
93
94 struct Bridge
95 {
96 QLock;
97 int nport;
98 Port *port[Maxport];
99 Centry cache[CacheSize];
100 ulong hit;
101 ulong miss;
102 ulong copy;
103 long delay0; // constant microsecond delay per packet
104 long delayn; // microsecond delay per byte
105 int tcpmss; // modify tcpmss value
106
107 Log;
108 };
109
110 struct Port
111 {
112 int id;
113 Bridge *bridge;
114 int ref;
115 int closed;
116
117 Chan *data[2]; // channel to data
118
119 Proc *readp; // read proc
120
121 // the following uniquely identifies the port
122 int type;
123 char name[KNAMELEN];
124
125 // owner hash - avoids bind/unbind races
126 ulong ownhash;
127
128 // various stats
129 int in; // number of packets read
130 int inmulti; // multicast or broadcast
131 int inunknown; // unknown address
132 int out; // number of packets read
133 int outmulti; // multicast or broadcast
134 int outunknown; // unknown address
135 int outfrag; // fragmented the packet
136 int nentry; // number of cache entries for this port
137 };
138
139 enum {
140 IP_TCPPROTO = 6,
141 EOLOPT = 0,
142 NOOPOPT = 1,
143 MSSOPT = 2,
144 MSS_LENGTH = 4, /* Mean segment size */
145 SYN = 0x02, /* Pkt. is synchronise */
146 IPHDR = 20, /* sizeof(Iphdr) */
147 };
148
149 struct Iphdr
150 {
151 uchar vihl; /* Version and header length */
152 uchar tos; /* Type of service */
153 uchar length[2]; /* packet length */
154 uchar id[2]; /* ip->identification */
155 uchar frag[2]; /* Fragment information */
156 uchar ttl; /* Time to live */
157 uchar proto; /* Protocol */
158 uchar cksum[2]; /* Header checksum */
159 uchar src[4]; /* IP source */
160 uchar dst[4]; /* IP destination */
161 };
162
163 struct Tcphdr
164 {
165 uchar sport[2];
166 uchar dport[2];
167 uchar seq[4];
168 uchar ack[4];
169 uchar flag[2];
170 uchar win[2];
171 uchar cksum[2];
172 uchar urg[2];
173 };
174
175 static Bridge bridgetab[Maxbridge];
176
177 static int m2p[] = {
178 [OREAD] 4,
179 [OWRITE] 2,
180 [ORDWR] 6
181 };
182
183 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
184 static void portbind(Bridge *b, int argc, char *argv[]);
185 static void portunbind(Bridge *b, int argc, char *argv[]);
186 static void etherread(void *a);
187 static char *cachedump(Bridge *b);
188 static void portfree(Port *port);
189 static void cacheflushport(Bridge *b, int port);
190 static void etherwrite(Port *port, Block *bp);
191
192 static void
193 bridgeinit(void)
194 {
195 int i;
196 Dirtab *dt;
197
198 // setup dirtab with non directory entries
199 for(i=0; i<nelem(bridgedirtab); i++) {
200 dt = bridgedirtab + i;
201 dirtab[TYPE(dt->qid)] = dt;
202 }
203 for(i=0; i<nelem(portdirtab); i++) {
204 dt = portdirtab + i;
205 dirtab[TYPE(dt->qid)] = dt;
206 }
207 }
208
209 static Chan*
210 bridgeattach(char* spec)
211 {
212 Chan *c;
213 int dev;
214
215 dev = atoi(spec);
216 if(dev<0 || dev >= Maxbridge)
217 error("bad specification");
218
219 c = devattach('B', spec);
220 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
221 c->dev = dev;
222 return c;
223 }
224
225 static Walkqid*
226 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
227 {
228 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
229 }
230
231 static int
232 bridgestat(Chan* c, uchar* db, int n)
233 {
234 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
235 }
236
237 static Chan*
238 bridgeopen(Chan* c, int omode)
239 {
240 int perm;
241 Bridge *b;
242
243 omode &= 3;
244 perm = m2p[omode];
245 USED(perm);
246
247 b = bridgetab + c->dev;
248 USED(b);
249
250 switch(TYPE(c->qid)) {
251 default:
252 break;
253 case Qlog:
254 logopen(b);
255 break;
256 case Qcache:
257 c->aux = cachedump(b);
258 break;
259 }
260 c->mode = openmode(omode);
261 c->flag |= COPEN;
262 c->offset = 0;
263 return c;
264 }
265
266 static void
267 bridgeclose(Chan* c)
268 {
269 Bridge *b = bridgetab + c->dev;
270
271 switch(TYPE(c->qid)) {
272 case Qcache:
273 if(c->flag & COPEN)
274 free(c->aux);
275 break;
276 case Qlog:
277 if(c->flag & COPEN)
278 logclose(b);
279 break;
280 }
281 }
282
283 static long
284 bridgeread(Chan *c, void *a, long n, vlong off)
285 {
286 char buf[256];
287 Bridge *b = bridgetab + c->dev;
288 Port *port;
289 int i, ingood, outgood;
290
291 USED(off);
292 switch(TYPE(c->qid)) {
293 default:
294 error(Eperm);
295 case Qtopdir:
296 case Qbridgedir:
297 case Qportdir:
298 return devdirread(c, a, n, 0, 0, bridgegen);
299 case Qlog:
300 return logread(b, a, off, n);
301 case Qstatus:
302 qlock(b);
303 port = b->port[PORT(c->qid)];
304 if(port == 0)
305 strcpy(buf, "unbound\n");
306 else {
307 i = 0;
308 switch(port->type) {
309 default:
310 panic("bridgeread: unknown port type: %d",
311 port->type);
312 case Tether:
313 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
314 break;
315 case Ttun:
316 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
317 break;
318 }
319 ingood = port->in - port->inmulti - port->inunknown;
320 outgood = port->out - port->outmulti - port->outunknown;
321 i += snprint(buf+i, sizeof(buf)-i,
322 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
323 port->in, ingood, port->inmulti, port->inunknown,
324 port->out, outgood, port->outmulti,
325 port->outunknown, port->outfrag);
326 USED(i);
327 }
328 n = readstr(off, a, n, buf);
329 qunlock(b);
330 return n;
331 case Qbctl:
332 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
333 b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
334 n = readstr(off, a, n, buf);
335 return n;
336 case Qcache:
337 n = readstr(off, a, n, c->aux);
338 return n;
339 case Qstats:
340 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
341 b->hit, b->miss, b->copy);
342 n = readstr(off, a, n, buf);
343 return n;
344 }
345 }
346
347 static void
348 bridgeoption(Bridge *b, char *option, int value)
349 {
350 if(strcmp(option, "tcpmss") == 0)
351 b->tcpmss = value;
352 else
353 error("unknown bridge option");
354 }
355
356
357 static long
358 bridgewrite(Chan *c, void *a, long n, vlong off)
359 {
360 Bridge *b = bridgetab + c->dev;
361 Cmdbuf *cb;
362 char *arg0, *p;
363
364 USED(off);
365 switch(TYPE(c->qid)) {
366 default:
367 error(Eperm);
368 case Qbctl:
369 cb = parsecmd(a, n);
370 qlock(b);
371 if(waserror()) {
372 qunlock(b);
373 free(cb);
374 nexterror();
375 }
376 if(cb->nf == 0)
377 error("short write");
378 arg0 = cb->f[0];
379 if(strcmp(arg0, "bind") == 0) {
380 portbind(b, cb->nf-1, cb->f+1);
381 } else if(strcmp(arg0, "unbind") == 0) {
382 portunbind(b, cb->nf-1, cb->f+1);
383 } else if(strcmp(arg0, "cacheflush") == 0) {
384 log(b, Logcache, "cache flush\n");
385 memset(b->cache, 0, CacheSize*sizeof(Centry));
386 } else if(strcmp(arg0, "set") == 0) {
387 if(cb->nf != 2)
388 error("usage: set option");
389 bridgeoption(b, cb->f[1], 1);
390 } else if(strcmp(arg0, "clear") == 0) {
391 if(cb->nf != 2)
392 error("usage: clear option");
393 bridgeoption(b, cb->f[1], 0);
394 } else if(strcmp(arg0, "delay") == 0) {
395 if(cb->nf != 3)
396 error("usage: delay delay0 delayn");
397 b->delay0 = strtol(cb->f[1], nil, 10);
398 b->delayn = strtol(cb->f[2], nil, 10);
399 } else
400 error("unknown control request");
401 poperror();
402 qunlock(b);
403 free(cb);
404 return n;
405 case Qlog:
406 cb = parsecmd(a, n);
407 p = logctl(b, cb->nf, cb->f, logflags);
408 free(cb);
409 if(p != nil)
410 error(p);
411 return n;
412 }
413 }
414
415 static int
416 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
417 {
418 Bridge *b = bridgetab + c->dev;
419 int type = TYPE(c->qid);
420 Dirtab *dt;
421 Qid qid;
422
423 if(s == DEVDOTDOT){
424 switch(TYPE(c->qid)){
425 case Qtopdir:
426 case Qbridgedir:
427 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
428 mkqid(&qid, Qtopdir, 0, QTDIR);
429 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
430 break;
431 case Qportdir:
432 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
433 mkqid(&qid, Qbridgedir, 0, QTDIR);
434 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
435 break;
436 default:
437 panic("bridgewalk %llux", c->qid.path);
438 }
439 return 1;
440 }
441
442 switch(type) {
443 default:
444 /* non-directory entries end up here */
445 if(c->qid.type & QTDIR)
446 panic("bridgegen: unexpected directory");
447 if(s != 0)
448 return -1;
449 dt = dirtab[TYPE(c->qid)];
450 if(dt == nil)
451 panic("bridgegen: unknown type: %lud", TYPE(c->qid));
452 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
453 return 1;
454 case Qtopdir:
455 if(s != 0)
456 return -1;
457 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
458 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
459 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
460 return 1;
461 case Qbridgedir:
462 if(s<nelem(bridgedirtab)) {
463 dt = bridgedirtab+s;
464 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
465 return 1;
466 }
467 s -= nelem(bridgedirtab);
468 if(s >= b->nport)
469 return -1;
470 mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
471 snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
472 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
473 return 1;
474 case Qportdir:
475 if(s>=nelem(portdirtab))
476 return -1;
477 dt = portdirtab+s;
478 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
479 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
480 return 1;
481 }
482 }
483
484 // parse mac address; also in netif.c
485 static int
486 parseaddr(uchar *to, char *from, int alen)
487 {
488 char nip[4];
489 char *p;
490 int i;
491
492 p = from;
493 for(i = 0; i < alen; i++){
494 if(*p == 0)
495 return -1;
496 nip[0] = *p++;
497 if(*p == 0)
498 return -1;
499 nip[1] = *p++;
500 nip[2] = 0;
501 to[i] = strtoul(nip, 0, 16);
502 if(*p == ':')
503 p++;
504 }
505 return 0;
506 }
507
508 // assumes b is locked
509 static void
510 portbind(Bridge *b, int argc, char *argv[])
511 {
512 Port *port;
513 Chan *ctl;
514 int type = 0, i, n;
515 ulong ownhash;
516 char *dev, *dev2 = nil, *p;
517 char buf[100], name[KNAMELEN], path[8*KNAMELEN];
518 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
519
520 memset(name, 0, KNAMELEN);
521 if(argc < 4)
522 error(usage);
523 if(strcmp(argv[0], "ether") == 0) {
524 if(argc != 4)
525 error(usage);
526 type = Tether;
527 strncpy(name, argv[1], KNAMELEN);
528 name[KNAMELEN-1] = 0;
529 // parseaddr(addr, argv[1], Eaddrlen);
530 } else if(strcmp(argv[0], "tunnel") == 0) {
531 if(argc != 5)
532 error(usage);
533 type = Ttun;
534 strncpy(name, argv[1], KNAMELEN);
535 name[KNAMELEN-1] = 0;
536 // parseip(addr, argv[1]);
537 dev2 = argv[4];
538 } else
539 error(usage);
540 ownhash = atoi(argv[2]);
541 dev = argv[3];
542 for(i=0; i<b->nport; i++) {
543 port = b->port[i];
544 if(port != nil && port->type == type &&
545 memcmp(port->name, name, KNAMELEN) == 0)
546 error("port in use");
547 }
548 for(i=0; i<Maxport; i++)
549 if(b->port[i] == nil)
550 break;
551 if(i == Maxport)
552 error("no more ports");
553 port = smalloc(sizeof(Port));
554 port->ref = 1;
555 port->id = i;
556 port->ownhash = ownhash;
557
558 if(waserror()) {
559 portfree(port);
560 nexterror();
561 }
562 port->type = type;
563 memmove(port->name, name, KNAMELEN);
564 switch(port->type) {
565 default:
566 panic("portbind: unknown port type: %d", type);
567 case Tether:
568 snprint(path, sizeof(path), "%s/clone", dev);
569 ctl = namec(path, Aopen, ORDWR, 0);
570 if(waserror()) {
571 cclose(ctl);
572 nexterror();
573 }
574 // check addr?
575
576 // get directory name
577 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
578 buf[n] = 0;
579 for(p = buf; *p == ' '; p++)
580 ;
581 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
582
583 // setup connection to be promiscuous
584 snprint(buf, sizeof(buf), "connect -1");
585 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
586 snprint(buf, sizeof(buf), "promiscuous");
587 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
588 snprint(buf, sizeof(buf), "bridge");
589 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590
591 // open data port
592 port->data[0] = namec(path, Aopen, ORDWR, 0);
593 // dup it
594 incref(port->data[0]);
595 port->data[1] = port->data[0];
596
597 poperror();
598 cclose(ctl);
599
600 break;
601 case Ttun:
602 port->data[0] = namec(dev, Aopen, OREAD, 0);
603 port->data[1] = namec(dev2, Aopen, OWRITE, 0);
604 break;
605 }
606
607 poperror();
608
609 /* committed to binding port */
610 b->port[port->id] = port;
611 port->bridge = b;
612 if(b->nport <= port->id)
613 b->nport = port->id+1;
614
615 // assumes kproc always succeeds
616 kproc("etherread", etherread, port); // poperror must be next
617 port->ref++;
618 }
619
620 // assumes b is locked
621 static void
622 portunbind(Bridge *b, int argc, char *argv[])
623 {
624 int type = 0, i;
625 char name[KNAMELEN];
626 ulong ownhash;
627 Port *port = nil;
628 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
629
630 memset(name, 0, KNAMELEN);
631 if(argc < 2 || argc > 3)
632 error(usage);
633 if(strcmp(argv[0], "ether") == 0) {
634 type = Tether;
635 strncpy(name, argv[1], KNAMELEN);
636 name[KNAMELEN-1] = 0;
637 // parseaddr(addr, argv[1], Eaddrlen);
638 } else if(strcmp(argv[0], "tunnel") == 0) {
639 type = Ttun;
640 strncpy(name, argv[1], KNAMELEN);
641 name[KNAMELEN-1] = 0;
642 // parseip(addr, argv[1]);
643 } else
644 error(usage);
645 if(argc == 3)
646 ownhash = atoi(argv[2]);
647 else
648 ownhash = 0;
649 for(i=0; i<b->nport; i++) {
650 port = b->port[i];
651 if(port != nil && port->type == type &&
652 memcmp(port->name, name, KNAMELEN) == 0)
653 break;
654 }
655 if(i == b->nport)
656 error("port not found");
657 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
658 error("bad owner hash");
659
660 port->closed = 1;
661 b->port[i] = nil; // port is now unbound
662 cacheflushport(b, i);
663
664 // try and stop reader
665 if(port->readp)
666 postnote(port->readp, 1, "unbind", 0);
667 portfree(port);
668 }
669
670 // assumes b is locked
671 static Centry *
672 cachelookup(Bridge *b, uchar d[Eaddrlen])
673 {
674 int i;
675 uint h;
676 Centry *p;
677 long sec;
678
679 // dont cache multicast or broadcast
680 if(d[0] & 1)
681 return 0;
682
683 h = 0;
684 for(i=0; i<Eaddrlen; i++) {
685 h *= 7;
686 h += d[i];
687 }
688 h %= CacheHash;
689 p = b->cache + h;
690 sec = TK2SEC(m->ticks);
691 for(i=0; i<CacheLook; i++,p++) {
692 if(memcmp(d, p->d, Eaddrlen) == 0) {
693 p->dst++;
694 if(sec >= p->expire) {
695 log(b, Logcache, "expired cache entry: %E %d\n",
696 d, p->port);
697 return nil;
698 }
699 p->expire = sec + CacheTimeout;
700 return p;
701 }
702 }
703 log(b, Logcache, "cache miss: %E\n", d);
704 return nil;
705 }
706
707 // assumes b is locked
708 static void
709 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
710 {
711 int i;
712 uint h;
713 Centry *p, *pp;
714 long sec;
715
716 // dont cache multicast or broadcast
717 if(d[0] & 1) {
718 log(b, Logcache, "bad source address: %E\n", d);
719 return;
720 }
721
722 h = 0;
723 for(i=0; i<Eaddrlen; i++) {
724 h *= 7;
725 h += d[i];
726 }
727 h %= CacheHash;
728 p = b->cache + h;
729 pp = p;
730 sec = p->expire;
731
732 // look for oldest entry
733 for(i=0; i<CacheLook; i++,p++) {
734 if(memcmp(p->d, d, Eaddrlen) == 0) {
735 p->expire = TK2SEC(m->ticks) + CacheTimeout;
736 if(p->port != port) {
737 log(b, Logcache, "NIC changed port %d->%d: %E\n",
738 p->port, port, d);
739 p->port = port;
740 }
741 p->src++;
742 return;
743 }
744 if(p->expire < sec) {
745 sec = p->expire;
746 pp = p;
747 }
748 }
749 if(pp->expire != 0)
750 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
751 pp->expire = TK2SEC(m->ticks) + CacheTimeout;
752 memmove(pp->d, d, Eaddrlen);
753 pp->port = port;
754 pp->src = 1;
755 pp->dst = 0;
756 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
757 }
758
759 // assumes b is locked
760 static void
761 cacheflushport(Bridge *b, int port)
762 {
763 Centry *ce;
764 int i;
765
766 ce = b->cache;
767 for(i=0; i<CacheSize; i++,ce++) {
768 if(ce->port != port)
769 continue;
770 memset(ce, 0, sizeof(Centry));
771 }
772 }
773
774 static char *
775 cachedump(Bridge *b)
776 {
777 int i, n;
778 long sec, off;
779 char *buf, *p, *ep;
780 Centry *ce;
781 char c;
782
783 qlock(b);
784 if(waserror()) {
785 qunlock(b);
786 nexterror();
787 }
788 sec = TK2SEC(m->ticks);
789 n = 0;
790 for(i=0; i<CacheSize; i++)
791 if(b->cache[i].expire != 0)
792 n++;
793
794 n *= 51; // change if print format is changed
795 n += 10; // some slop at the end
796 buf = malloc(n);
797 p = buf;
798 ep = buf + n;
799 ce = b->cache;
800 off = seconds() - sec;
801 for(i=0; i<CacheSize; i++,ce++) {
802 if(ce->expire == 0)
803 continue;
804 c = (sec < ce->expire)?'v':'e';
805 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
806 ce->port, ce->src, ce->dst, ce->expire+off, c);
807 }
808 *p = 0;
809 poperror();
810 qunlock(b);
811
812 return buf;
813 }
814
815
816
817 // assumes b is locked
818 static void
819 ethermultiwrite(Bridge *b, Block *bp, Port *port)
820 {
821 Port *oport;
822 Block *bp2;
823 Etherpkt *ep;
824 int i, mcast;
825
826 if(waserror()) {
827 if(bp)
828 freeb(bp);
829 nexterror();
830 }
831
832 ep = (Etherpkt*)bp->rp;
833 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
834
835 oport = nil;
836 for(i=0; i<b->nport; i++) {
837 if(i == port->id || b->port[i] == nil)
838 continue;
839 /*
840 * we need to forward multicast packets for ipv6,
841 * so always do it.
842 */
843 if(mcast)
844 b->port[i]->outmulti++;
845 else
846 b->port[i]->outunknown++;
847
848 // delay one so that the last write does not copy
849 if(oport != nil) {
850 b->copy++;
851 bp2 = copyblock(bp, blocklen(bp));
852 if(!waserror()) {
853 etherwrite(oport, bp2);
854 poperror();
855 }
856 }
857 oport = b->port[i];
858 }
859
860 // last write free block
861 if(oport) {
862 bp2 = bp; bp = nil; USED(bp);
863 if(!waserror()) {
864 etherwrite(oport, bp2);
865 poperror();
866 }
867 } else
868 freeb(bp);
869
870 poperror();
871 }
872
873 static void
874 tcpmsshack(Etherpkt *epkt, int n)
875 {
876 int hl, optlen;
877 Iphdr *iphdr;
878 Tcphdr *tcphdr;
879 ulong mss, cksum;
880 uchar *optr;
881
882 /* ignore non-ipv4 packets */
883 if(nhgets(epkt->type) != ETIP4)
884 return;
885 iphdr = (Iphdr*)(epkt->data);
886 n -= ETHERHDRSIZE;
887 if(n < IPHDR)
888 return;
889
890 /* ignore bad packets */
891 if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
892 hl = (iphdr->vihl&0xF)<<2;
893 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
894 return;
895 } else
896 hl = IP_HLEN4<<2;
897
898 /* ignore non-tcp packets */
899 if(iphdr->proto != IP_TCPPROTO)
900 return;
901 n -= hl;
902 if(n < sizeof(Tcphdr))
903 return;
904 tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
905 // MSS can only appear in SYN packet
906 if(!(tcphdr->flag[1] & SYN))
907 return;
908 hl = (tcphdr->flag[0] & 0xf0)>>2;
909 if(n < hl)
910 return;
911
912 // check for MSS option
913 optr = (uchar*)tcphdr + sizeof(Tcphdr);
914 n = hl - sizeof(Tcphdr);
915 for(;;) {
916 if(n <= 0 || *optr == EOLOPT)
917 return;
918 if(*optr == NOOPOPT) {
919 n--;
920 optr++;
921 continue;
922 }
923 optlen = optr[1];
924 if(optlen < 2 || optlen > n)
925 return;
926 if(*optr == MSSOPT && optlen == MSS_LENGTH)
927 break;
928 n -= optlen;
929 optr += optlen;
930 }
931
932 mss = nhgets(optr+2);
933 if(mss <= TcpMssMax)
934 return;
935 // fit checksum
936 cksum = nhgets(tcphdr->cksum);
937 if(optr-(uchar*)tcphdr & 1) {
938 print("tcpmsshack: odd alignment!\n");
939 // odd alignments are a pain
940 cksum += nhgets(optr+1);
941 cksum -= (optr[1]<<8)|(TcpMssMax>>8);
942 cksum += (cksum>>16);
943 cksum &= 0xffff;
944 cksum += nhgets(optr+3);
945 cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
946 cksum += (cksum>>16);
947 } else {
948 cksum += mss;
949 cksum -= TcpMssMax;
950 cksum += (cksum>>16);
951 }
952 hnputs(tcphdr->cksum, cksum);
953 hnputs(optr+2, TcpMssMax);
954 }
955
956 /*
957 * process to read from the ethernet
958 */
959 static void
960 etherread(void *a)
961 {
962 Port *port = a;
963 Bridge *b = port->bridge;
964 Block *bp, *bp2;
965 Etherpkt *ep;
966 Centry *ce;
967 long md;
968
969 qlock(b);
970 port->readp = up; /* hide identity under a rock for unbind */
971
972 while(!port->closed){
973 // release lock to read - error means it is time to quit
974 qunlock(b);
975 if(waserror()) {
976 print("etherread read error: %s\n", up->errstr);
977 qlock(b);
978 break;
979 }
980 if(0)
981 print("devbridge: etherread: reading\n");
982 bp = devtab[port->data[0]->type]->bread(port->data[0],
983 ETHERMAXTU, 0);
984 if(0)
985 print("devbridge: etherread: blocklen = %d\n",
986 blocklen(bp));
987 poperror();
988 qlock(b);
989 if(bp == nil || port->closed)
990 break;
991 if(waserror()) {
992 // print("etherread bridge error\n");
993 if(bp)
994 freeb(bp);
995 continue;
996 }
997 if(blocklen(bp) < ETHERMINTU)
998 error("short packet");
999 port->in++;
1000
1001 ep = (Etherpkt*)bp->rp;
1002 cacheupdate(b, ep->s, port->id);
1003 if(b->tcpmss)
1004 tcpmsshack(ep, BLEN(bp));
1005
1006 /*
1007 * delay packets to simulate a slow link
1008 */
1009 if(b->delay0 || b->delayn){
1010 md = b->delay0 + b->delayn * BLEN(bp);
1011 if(md > 0)
1012 microdelay(md);
1013 }
1014
1015 if(ep->d[0] & 1) {
1016 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1017 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1018 port->inmulti++;
1019 bp2 = bp; bp = nil;
1020 ethermultiwrite(b, bp2, port);
1021 } else {
1022 ce = cachelookup(b, ep->d);
1023 if(ce == nil) {
1024 b->miss++;
1025 port->inunknown++;
1026 bp2 = bp; bp = nil;
1027 ethermultiwrite(b, bp2, port);
1028 }else if(ce->port != port->id){
1029 b->hit++;
1030 bp2 = bp; bp = nil;
1031 etherwrite(b->port[ce->port], bp2);
1032 }
1033 }
1034
1035 poperror();
1036 if(bp)
1037 freeb(bp);
1038 }
1039 // print("etherread: trying to exit\n");
1040 port->readp = nil;
1041 portfree(port);
1042 qunlock(b);
1043 pexit("hangup", 1);
1044 }
1045
1046 static int
1047 fragment(Etherpkt *epkt, int n)
1048 {
1049 Iphdr *iphdr;
1050
1051 if(n <= TunnelMtu)
1052 return 0;
1053
1054 /* ignore non-ipv4 packets */
1055 if(nhgets(epkt->type) != ETIP4)
1056 return 0;
1057 iphdr = (Iphdr*)(epkt->data);
1058 n -= ETHERHDRSIZE;
1059 /*
1060 * ignore: IP runt packets, bad packets (I don't handle IP
1061 * options for the moment), packets with don't-fragment set,
1062 * and short blocks.
1063 */
1064 if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1065 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1066 return 0;
1067
1068 return 1;
1069 }
1070
1071
1072 static void
1073 etherwrite(Port *port, Block *bp)
1074 {
1075 Iphdr *eh, *feh;
1076 Etherpkt *epkt;
1077 int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1078 Block *xp, *nb;
1079 ushort fragoff, frag;
1080
1081 port->out++;
1082 epkt = (Etherpkt*)bp->rp;
1083 n = blocklen(bp);
1084 if(port->type != Ttun || !fragment(epkt, n)) {
1085 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1086 return;
1087 }
1088 port->outfrag++;
1089 if(waserror()){
1090 freeblist(bp);
1091 nexterror();
1092 }
1093
1094 seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1095 eh = (Iphdr*)(epkt->data);
1096 len = nhgets(eh->length);
1097 frag = nhgets(eh->frag);
1098 mf = frag & IP_MF;
1099 frag <<= 3;
1100 dlen = len - IPHDR;
1101 xp = bp;
1102 lid = nhgets(eh->id);
1103 offset = ETHERHDRSIZE+IPHDR;
1104 while(xp != nil && offset && offset >= BLEN(xp)) {
1105 offset -= BLEN(xp);
1106 xp = xp->next;
1107 }
1108 xp->rp += offset;
1109
1110 if(0)
1111 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1112 seglen, dlen, mf, frag);
1113 for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1114 nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1115
1116 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1117
1118 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1119 nb->wp += ETHERHDRSIZE+IPHDR;
1120
1121 if((fragoff + seglen) >= dlen) {
1122 seglen = dlen - fragoff;
1123 hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1124 }
1125 else
1126 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1127
1128 hnputs(feh->length, seglen + IPHDR);
1129 hnputs(feh->id, lid);
1130
1131 /* Copy up the data area */
1132 chunk = seglen;
1133 while(chunk) {
1134 blklen = chunk;
1135 if(BLEN(xp) < chunk)
1136 blklen = BLEN(xp);
1137 memmove(nb->wp, xp->rp, blklen);
1138 nb->wp += blklen;
1139 xp->rp += blklen;
1140 chunk -= blklen;
1141 if(xp->rp == xp->wp)
1142 xp = xp->next;
1143 }
1144
1145 feh->cksum[0] = 0;
1146 feh->cksum[1] = 0;
1147 hnputs(feh->cksum, ipcsum(&feh->vihl));
1148
1149 /* don't generate small packets */
1150 if(BLEN(nb) < ETHERMINTU)
1151 nb->wp = nb->rp + ETHERMINTU;
1152 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1153 }
1154 poperror();
1155 freeblist(bp);
1156 }
1157
1158 // hold b lock
1159 static void
1160 portfree(Port *port)
1161 {
1162 port->ref--;
1163 if(port->ref < 0)
1164 panic("portfree: bad ref");
1165 if(port->ref > 0)
1166 return;
1167
1168 if(port->data[0])
1169 cclose(port->data[0]);
1170 if(port->data[1])
1171 cclose(port->data[1]);
1172 memset(port, 0, sizeof(Port));
1173 free(port);
1174 }
1175
1176 Dev bridgedevtab = {
1177 'B',
1178 "bridge",
1179
1180 devreset,
1181 bridgeinit,
1182 devshutdown,
1183 bridgeattach,
1184 bridgewalk,
1185 bridgestat,
1186 bridgeopen,
1187 devcreate,
1188 bridgeclose,
1189 bridgeread,
1190 devbread,
1191 bridgewrite,
1192 devbwrite,
1193 devremove,
1194 devwstat,
1195 };
Cache object: d7ef25c68c4f03c149c9cacd60b3c04e
|