FreeBSD/Linux Kernel Cross Reference
sys/pc/etherm10g.c
1 /*
2 * myricom 10 Gb ethernet driver
3 * © 2007 erik quanstrom, coraid
4 *
5 * the card is big endian.
6 * we use uvlong rather than uintptr to hold addresses so that
7 * we don't get "warning: stupid shift" on 32-bit architectures.
8 */
9 #include "u.h"
10 #include "../port/lib.h"
11 #include "mem.h"
12 #include "dat.h"
13 #include "fns.h"
14 #include "io.h"
15 #include "../port/error.h"
16 #include "../port/netif.h"
17
18 #include "../pc/etherif.h"
19
20 #ifndef KiB
21 #define KiB 1024u /* Kibi 0x0000000000000400 */
22 #define MiB 1048576u /* Mebi 0x0000000000100000 */
23 #endif /* KiB */
24
25 #define dprint(...) if(debug) print(__VA_ARGS__)
26 #define pcicapdbg(...)
27 #define malign(n) mallocalign((n), 4*KiB, 0, 0)
28
29 #include "etherm10g2k.i"
30 #include "etherm10g4k.i"
31
32 static int debug = 0;
33 static char Etimeout[] = "timeout";
34
35 enum {
36 Epromsz = 256,
37 Maxslots= 1024,
38 Align = 4096,
39 Maxmtu = 9000,
40 Noconf = 0xffffffff,
41
42 Fwoffset= 1*MiB,
43 Cmdoff = 0xf80000, /* command port offset */
44 Fwsubmt = 0xfc0000, /* firmware submission command port offset */
45 Rdmaoff = 0xfc01c0, /* rdma command port offset */
46 };
47
48 enum {
49 CZero,
50 Creset,
51 Cversion,
52
53 CSintrqdma, /* issue these before Cetherup */
54 CSbigsz, /* in bytes bigsize = 2^n */
55 CSsmallsz,
56
57 CGsendoff,
58 CGsmallrxoff,
59 CGbigrxoff,
60 CGirqackoff,
61 CGirqdeassoff,
62 CGsendrgsz,
63 CGrxrgsz,
64
65 CSintrqsz, /* 2^n */
66 Cetherup, /* above parameters + mtu/mac addr must be set first. */
67 Cetherdn,
68
69 CSmtu, /* below may be issued live */
70 CGcoaloff, /* in µs */
71 CSstatsrate, /* in µs */
72 CSstatsdma,
73
74 Cpromisc,
75 Cnopromisc,
76 CSmac,
77
78 Cenablefc,
79 Cdisablefc,
80
81 Cdmatest, /* address in d[0-1], d[2]=length */
82
83 Cenableallmc,
84 Cdisableallmc,
85
86 CSjoinmc,
87 CSleavemc,
88 Cleaveallmc,
89
90 CSstatsdma2, /* adds (unused) multicast stats */
91 };
92
93 typedef union {
94 uint i[2];
95 uchar c[8];
96 } Cmd;
97
98 typedef ulong Slot;
99 typedef struct {
100 ushort cksum;
101 ushort len;
102 } Slotparts;
103
104 enum {
105 SFsmall = 1,
106 SFfirst = 2,
107 SFalign = 4,
108 SFnotso = 16,
109 };
110
111 typedef struct {
112 ulong high;
113 ulong low;
114 ushort hdroff;
115 ushort len;
116 uchar pad;
117 uchar nrdma;
118 uchar chkoff;
119 uchar flags;
120 } Send;
121
122 typedef struct {
123 QLock;
124 Send *lanai; /* tx ring (cksum+len in lanai memory) */
125 Send *host; /* tx ring (data in our memory) */
126 Block **bring;
127 // uchar *wcfifo; /* what the heck is a w/c fifo? */
128 int size; /* of buffers in the z8's memory */
129 ulong segsz;
130 uint n; /* rxslots */
131 uint m; /* mask; rxslots must be a power of two */
132 uint i; /* number of segments (not frames) queued */
133 uint cnt; /* number of segments sent by the card */
134
135 ulong npkt;
136 vlong nbytes;
137 } Tx;
138
139 typedef struct {
140 Lock;
141 Block *head;
142 uint size; /* buffer size of each block */
143 uint n; /* n free buffers */
144 uint cnt;
145 } Bpool;
146
147 static Bpool smpool = { .size = 128, };
148 static Bpool bgpool = { .size = Maxmtu, };
149
150 typedef struct {
151 Bpool *pool; /* free buffers */
152 ulong *lanai; /* rx ring; we have no permanent host shadow */
153 Block **host; /* called "info" in myricom driver */
154 // uchar *wcfifo; /* cmd submission fifo */
155 uint m;
156 uint n; /* rxslots */
157 uint i;
158 uint cnt; /* number of buffers allocated (lifetime) */
159 uint allocfail;
160 } Rx;
161
162 /* dma mapped. unix network byte order. */
163 typedef struct {
164 uchar txcnt[4];
165 uchar linkstat[4];
166 uchar dlink[4];
167 uchar derror[4];
168 uchar drunt[4];
169 uchar doverrun[4];
170 uchar dnosm[4];
171 uchar dnobg[4];
172 uchar nrdma[4];
173 uchar txstopped;
174 uchar down;
175 uchar updated;
176 uchar valid;
177 } Stats;
178
179 enum {
180 Detached,
181 Attached,
182 Runed,
183 };
184
185 typedef struct {
186 Slot *entry;
187 uvlong busaddr;
188 uint m;
189 uint n;
190 uint i;
191 } Done;
192
193 typedef struct Ctlr Ctlr;
194 typedef struct Ctlr {
195 QLock;
196 int state;
197 int kprocs;
198 uvlong port;
199 Pcidev* pcidev;
200 Ctlr* next;
201 int active;
202 int id; /* do we need this? */
203
204 uchar ra[Eaddrlen];
205
206 int ramsz;
207 uchar *ram;
208
209 ulong *irqack;
210 ulong *irqdeass;
211 ulong *coal;
212
213 char eprom[Epromsz];
214 ulong serial; /* unit serial number */
215
216 QLock cmdl;
217 Cmd *cmd; /* address of command return */
218 uvlong cprt; /* bus address of command */
219
220 uvlong boot; /* boot address */
221
222 Done done;
223 Tx tx;
224 Rx sm;
225 Rx bg;
226 Stats *stats;
227 uvlong statsprt;
228
229 Rendez rxrendez;
230 Rendez txrendez;
231
232 int msi;
233 ulong linkstat;
234 ulong nrdma;
235 } Ctlr;
236
237 static Ctlr *ctlrs;
238
239 enum {
240 PciCapPMG = 0x01, /* power management */
241 PciCapAGP = 0x02,
242 PciCapVPD = 0x03, /* vital product data */
243 PciCapSID = 0x04, /* slot id */
244 PciCapMSI = 0x05,
245 PciCapCHS = 0x06, /* compact pci hot swap */
246 PciCapPCIX = 0x07,
247 PciCapHTC = 0x08, /* hypertransport irq conf */
248 PciCapVND = 0x09, /* vendor specific information */
249 PciCapHSW = 0x0C, /* hot swap */
250 PciCapPCIe = 0x10,
251 PciCapMSIX = 0x11,
252 };
253
254 enum {
255 PcieAERC = 1,
256 PcieVC,
257 PcieSNC,
258 PciePBC,
259 };
260
261 enum {
262 AercCCR = 0x18, /* control register */
263 };
264
265 enum {
266 PcieCTL = 8,
267 PcieLCR = 12,
268 PcieMRD = 0x7000, /* maximum read size */
269 };
270
271 static int
272 pcicap(Pcidev *p, int cap)
273 {
274 int i, c, off;
275
276 pcicapdbg("pcicap: %x:%d\n", p->vid, p->did);
277 off = 0x34; /* 0x14 for cardbus */
278 for(i = 48; i--; ){
279 pcicapdbg("\t" "loop %x\n", off);
280 off = pcicfgr8(p, off);
281 pcicapdbg("\t" "pcicfgr8 %x\n", off);
282 if(off < 0x40)
283 break;
284 off &= ~3;
285 c = pcicfgr8(p, off);
286 pcicapdbg("\t" "pcicfgr8 %x\n", c);
287 if(c == 0xff)
288 break;
289 if(c == cap)
290 return off;
291 off++;
292 }
293 return 0;
294 }
295
296 /*
297 * this function doesn't work because pcicgr32 doesn't have access
298 * to the pcie extended configuration space.
299 */
300 static int
301 pciecap(Pcidev *p, int cap)
302 {
303 uint off, i;
304
305 off = 0x100;
306 while(((i = pcicfgr32(p, off))&0xffff) != cap){
307 off = i >> 20;
308 print("pciecap offset = %ud\n", off);
309 if(off < 0x100 || off >= 4*KiB - 1)
310 return 0;
311 }
312 print("pciecap found = %ud\n", off);
313 return off;
314 }
315
316 static int
317 setpcie(Pcidev *p)
318 {
319 int off;
320
321 /* set 4k writes */
322 off = pcicap(p, PciCapPCIe);
323 if(off < 64)
324 return -1;
325 off += PcieCTL;
326 pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12);
327 return 0;
328 }
329
330 static int
331 whichfw(Pcidev *p)
332 {
333 char *s;
334 int i, off, lanes, ecrc;
335 ulong cap;
336
337 /* check the number of configured lanes. */
338 off = pcicap(p, PciCapPCIe);
339 if(off < 64)
340 return -1;
341 off += PcieLCR;
342 cap = pcicfgr16(p, off);
343 lanes = (cap>>4) & 0x3f;
344
345 /* check AERC register. we need it on. */
346 off = pciecap(p, PcieAERC);
347 print("%d offset\n", off);
348 cap = 0;
349 if(off != 0){
350 off += AercCCR;
351 cap = pcicfgr32(p, off);
352 print("%lud cap\n", cap);
353 }
354 ecrc = (cap>>4) & 0xf;
355 /* if we don't like the aerc, kick it here. */
356
357 print("m10g %d lanes; ecrc=%d; ", lanes, ecrc);
358 if(s = getconf("myriforce")){
359 i = atoi(s);
360 if(i != 4*KiB || i != 2*KiB)
361 i = 2*KiB;
362 print("fw=%d [forced]\n", i);
363 return i;
364 }
365 if(lanes <= 4){
366 print("fw = 4096 [lanes]\n");
367 return 4*KiB;
368 }
369 if(ecrc & 10){
370 print("fw = 4096 [ecrc set]\n");
371 return 4*KiB;
372 }
373 print("fw = 4096 [default]\n");
374 return 4*KiB;
375 }
376
377 static int
378 parseeprom(Ctlr *c)
379 {
380 int i, j, k, l, bits;
381 char *s;
382
383 dprint("m10g eprom:\n");
384 s = c->eprom;
385 bits = 3;
386 for(i = 0; s[i] && i < Epromsz; i++){
387 l = strlen(s+i);
388 dprint("\t%s\n", s+i);
389 if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){
390 bits ^= 1;
391 j = i + 4;
392 for(k = 0; k < 6; k++)
393 c->ra[k] = strtoul(s+j+3*k, 0, 16);
394 }else if(strncmp(s+i, "SN=", 3) == 0){
395 bits ^= 2;
396 c->serial = atoi(s+i+3);
397 }
398 i += l;
399 }
400 if(bits)
401 return -1;
402 return 0;
403 }
404
405 static ushort
406 pbit16(ushort i)
407 {
408 ushort j;
409 uchar *p;
410
411 p = (uchar*)&j;
412 p[1] = i;
413 p[0] = i>>8;
414 return j;
415 }
416
417 static ushort
418 gbit16(uchar i[2])
419 {
420 ushort j;
421
422 j = i[1];
423 j |= i[0]<<8;
424 return j;
425 }
426
427 static ulong
428 pbit32(ulong i)
429 {
430 ulong j;
431 uchar *p;
432
433 p = (uchar*)&j;
434 p[3] = i;
435 p[2] = i>>8;
436 p[1] = i>>16;
437 p[0] = i>>24;
438 return j;
439 }
440
441 static ulong
442 gbit32(uchar i[4])
443 {
444 ulong j;
445
446 j = i[3];
447 j |= i[2]<<8;
448 j |= i[1]<<16;
449 j |= i[0]<<24;
450 return j;
451 }
452
453 static void
454 prepcmd(ulong *cmd, int i)
455 {
456 while(i-- > 0)
457 cmd[i] = pbit32(cmd[i]);
458 }
459
460 /*
461 * the command looks like this (int 32bit integers)
462 * cmd type
463 * addr (low)
464 * addr (high)
465 * pad (used for dma testing)
466 * response (high)
467 * response (low)
468 * 40 byte = 5 int pad.
469 */
470
471 ulong
472 cmd(Ctlr *c, int type, uvlong data)
473 {
474 ulong buf[16], i;
475 Cmd *cmd;
476
477 qlock(&c->cmdl);
478 cmd = c->cmd;
479 cmd->i[1] = Noconf;
480 memset(buf, 0, sizeof buf);
481 buf[0] = type;
482 buf[1] = data;
483 buf[2] = data >> 32;
484 buf[4] = c->cprt >> 32;
485 buf[5] = c->cprt;
486 prepcmd(buf, 6);
487 coherence();
488 memmove(c->ram + Cmdoff, buf, sizeof buf);
489
490 if(waserror())
491 nexterror();
492 for(i = 0; i < 15; i++){
493 if(cmd->i[1] != Noconf){
494 poperror();
495 i = gbit32(cmd->c);
496 qunlock(&c->cmdl);
497 if(cmd->i[1] != 0)
498 dprint("[%lux]", i);
499 return i;
500 }
501 tsleep(&up->sleep, return0, 0, 1);
502 }
503 qunlock(&c->cmdl);
504 iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n",
505 cmd->i[0], cmd->i[1], type);
506 error(Etimeout);
507 return ~0; /* silence! */
508 }
509
510 ulong
511 maccmd(Ctlr *c, int type, uchar *m)
512 {
513 ulong buf[16], i;
514 Cmd *cmd;
515
516 qlock(&c->cmdl);
517 cmd = c->cmd;
518 cmd->i[1] = Noconf;
519 memset(buf, 0, sizeof buf);
520 buf[0] = type;
521 buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3];
522 buf[2] = m[4]<< 8 | m[5];
523 buf[4] = c->cprt >> 32;
524 buf[5] = c->cprt;
525 prepcmd(buf, 6);
526 coherence();
527 memmove(c->ram + Cmdoff, buf, sizeof buf);
528
529 if(waserror())
530 nexterror();
531 for(i = 0; i < 15; i++){
532 if(cmd->i[1] != Noconf){
533 poperror();
534 i = gbit32(cmd->c);
535 qunlock(&c->cmdl);
536 if(cmd->i[1] != 0)
537 dprint("[%lux]", i);
538 return i;
539 }
540 tsleep(&up->sleep, return0, 0, 1);
541 }
542 qunlock(&c->cmdl);
543 iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n",
544 cmd->i[0], cmd->i[1], type);
545 error(Etimeout);
546 return ~0; /* silence! */
547 }
548
549 /* remove this garbage after testing */
550 enum {
551 DMAread = 0x10000,
552 DMAwrite= 0x1,
553 };
554
555 ulong
556 dmatestcmd(Ctlr *c, int type, uvlong addr, int len)
557 {
558 ulong buf[16], i;
559
560 memset(buf, 0, sizeof buf);
561 memset(c->cmd, Noconf, sizeof *c->cmd);
562 buf[0] = Cdmatest;
563 buf[1] = addr;
564 buf[2] = addr >> 32;
565 buf[3] = len * type;
566 buf[4] = c->cprt >> 32;
567 buf[5] = c->cprt;
568 prepcmd(buf, 6);
569 coherence();
570 memmove(c->ram + Cmdoff, buf, sizeof buf);
571
572 if(waserror())
573 nexterror();
574 for(i = 0; i < 15; i++){
575 if(c->cmd->i[1] != Noconf){
576 i = gbit32(c->cmd->c);
577 if(i == 0)
578 error(Eio);
579 poperror();
580 return i;
581 }
582 tsleep(&up->sleep, return0, 0, 5);
583 }
584 error(Etimeout);
585 return ~0; /* silence! */
586 }
587
588 ulong
589 rdmacmd(Ctlr *c, int on)
590 {
591 ulong buf[16], i;
592
593 memset(buf, 0, sizeof buf);
594 c->cmd->i[0] = 0;
595 coherence();
596 buf[0] = c->cprt >> 32;
597 buf[1] = c->cprt;
598 buf[2] = Noconf;
599 buf[3] = c->cprt >> 32;
600 buf[4] = c->cprt;
601 buf[5] = on;
602 prepcmd(buf, 6);
603 memmove(c->ram + Rdmaoff, buf, sizeof buf);
604
605 if(waserror())
606 nexterror();
607 for(i = 0; i < 20; i++){
608 if(c->cmd->i[0] == Noconf){
609 poperror();
610 return gbit32(c->cmd->c);
611 }
612 tsleep(&up->sleep, return0, 0, 1);
613 }
614 error(Etimeout);
615 iprint("m10g: rdmacmd timeout\n");
616 return ~0; /* silence! */
617 }
618
619 static int
620 loadfw(Ctlr *c, int *align)
621 {
622 ulong *f, *s, sz;
623 int i;
624
625 if((*align = whichfw(c->pcidev)) == 4*KiB){
626 f = (ulong*)fw4k;
627 sz = sizeof fw4k;
628 }else{
629 f = (ulong*)fw2k;
630 sz = sizeof fw2k;
631 }
632
633 s = (ulong*)(c->ram + Fwoffset);
634 for(i = 0; i < sz / 4; i++)
635 s[i] = f[i];
636 return sz & ~3;
637 }
638
639 static int
640 bootfw(Ctlr *c)
641 {
642 int i, sz, align;
643 ulong buf[16];
644 Cmd* cmd;
645
646 if((sz = loadfw(c, &align)) == 0)
647 return 0;
648 dprint("bootfw %d bytes ... ", sz);
649 cmd = c->cmd;
650
651 memset(buf, 0, sizeof buf);
652 c->cmd->i[0] = 0;
653 coherence();
654 buf[0] = c->cprt >> 32; /* upper dma target address */
655 buf[1] = c->cprt; /* lower */
656 buf[2] = Noconf; /* writeback */
657 buf[3] = Fwoffset + 8,
658 buf[4] = sz - 8;
659 buf[5] = 8;
660 buf[6] = 0;
661 prepcmd(buf, 7);
662 coherence();
663 memmove(c->ram + Fwsubmt, buf, sizeof buf);
664
665 for(i = 0; i < 20; i++){
666 if(cmd->i[0] == Noconf)
667 break;
668 delay(1);
669 }
670 dprint("[%lux %lux]", gbit32(cmd->c), gbit32(cmd->c+4));
671 if(i == 20){
672 print("m10g: cannot load fw\n");
673 return -1;
674 }
675 dprint("\n");
676 c->tx.segsz = align;
677 return 0;
678 }
679
680 static int
681 kickthebaby(Pcidev *p, Ctlr *c)
682 {
683 /* don't kick the baby! */
684 ulong code;
685
686 pcicfgw8(p, 0x10 + c->boot, 0x3);
687 pcicfgw32(p, 0x18 + c->boot, 0xfffffff0);
688 code = pcicfgr32(p, 0x14 + c->boot);
689
690 dprint("reboot status = %lux\n", code);
691 if(code != 0xfffffff0)
692 return -1;
693 return 0;
694 }
695
696 typedef struct {
697 uchar len[4];
698 uchar type[4];
699 char version[128];
700 uchar globals[4];
701 uchar ramsz[4];
702 uchar specs[4];
703 uchar specssz[4];
704 } Fwhdr;
705
706 enum {
707 Tmx = 0x4d582020,
708 Tpcie = 0x70636965,
709 Teth = 0x45544820,
710 Tmcp0 = 0x4d435030,
711 };
712
713 static char *
714 fwtype(ulong type)
715 {
716 switch(type){
717 case Tmx:
718 return "mx";
719 case Tpcie:
720 return "PCIe";
721 case Teth:
722 return "eth";
723 case Tmcp0:
724 return "mcp0";
725 }
726 return "*GOK*";
727 }
728
729 static int
730 chkfw(Ctlr *c)
731 {
732 ulong off, type;
733 Fwhdr *h;
734
735 off = gbit32(c->ram+0x3c);
736 dprint("firmware %lux\n", off);
737 if((off&3) || off + sizeof *h > c->ramsz){
738 print("!m10g: bad firmware %lux\n", off);
739 return -1;
740 }
741 h = (Fwhdr*)(c->ram + off);
742 type = gbit32(h->type);
743 dprint("\t" "type %s\n", fwtype(type));
744 dprint("\t" "vers %s\n", h->version);
745 dprint("\t" "ramsz %lux\n", gbit32(h->ramsz));
746 if(type != Teth){
747 print("!m10g: bad card type %s\n", fwtype(type));
748 return -1;
749 }
750
751 return bootfw(c) || rdmacmd(c, 0);
752 }
753
754 static int
755 reset(Ether *e, Ctlr *c)
756 {
757 ulong i, sz;
758
759 if(waserror()){
760 print("m10g: reset error\n");
761 nexterror();
762 return -1;
763 }
764
765 chkfw(c);
766 cmd(c, Creset, 0);
767
768 cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry);
769 cmd(c, CSintrqdma, c->done.busaddr);
770 c->irqack = (ulong*)(c->ram + cmd(c, CGirqackoff, 0));
771 /* required only if we're not doing msi? */
772 c->irqdeass = (ulong*)(c->ram + cmd(c, CGirqdeassoff, 0));
773 /* this is the driver default, why fiddle with this? */
774 c->coal = (ulong*)(c->ram + cmd(c, CGcoaloff, 0));
775 *c->coal = pbit32(25);
776
777 dprint("dma stats:\n");
778 rdmacmd(c, 1);
779 sz = c->tx.segsz;
780 i = dmatestcmd(c, DMAread, c->done.busaddr, sz);
781 print("\t" "read: %lud MB/s\n", ((i>>16)*sz*2) / (i&0xffff));
782 i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz);
783 print("\t" "write: %lud MB/s\n", ((i>>16)*sz*2) / (i&0xffff));
784 i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz);
785 print("\t" "r/w: %lud MB/s\n", ((i>>16)*sz*2*2) / (i&0xffff));
786 memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry);
787
788 maccmd(c, CSmac, c->ra);
789 // cmd(c, Cnopromisc, 0);
790 cmd(c, Cenablefc, 0);
791 e->maxmtu = Maxmtu;
792 cmd(c, CSmtu, e->maxmtu);
793 dprint("CSmtu %d...\n", e->maxmtu);
794
795 poperror();
796 return 0;
797 }
798
799 static void
800 ctlrfree(Ctlr *c)
801 {
802 /* free up all the Block*s, too */
803 free(c->tx.host);
804 free(c->sm.host);
805 free(c->bg.host);
806 free(c->cmd);
807 free(c->done.entry);
808 free(c->stats);
809 free(c);
810 }
811
812 static int
813 setmem(Pcidev *p, Ctlr *c)
814 {
815 ulong i;
816 uvlong raddr;
817 Done *d;
818 void *mem;
819
820 c->tx.segsz = 2048;
821 c->ramsz = 2*MiB - (2*48*KiB + 32*KiB) - 0x100;
822 if(c->ramsz > p->mem[0].size)
823 return -1;
824
825 raddr = p->mem[0].bar & ~0x0F;
826 mem = vmap(raddr, p->mem[0].size);
827 if(mem == nil){
828 print("m10g: can't map %8.8lux\n", p->mem[0].bar);
829 return -1;
830 }
831 dprint("%llux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size);
832 c->port = raddr;
833 c->ram = mem;
834 c->cmd = malign(sizeof *c->cmd);
835 c->cprt = PCIWADDR(c->cmd);
836
837 d = &c->done;
838 d->n = Maxslots;
839 d->m = d->n - 1;
840 i = d->n * sizeof *d->entry;
841 d->entry = malign(i);
842 memset(d->entry, 0, i);
843 d->busaddr = PCIWADDR(d->entry);
844
845 c->stats = malign(sizeof *c->stats);
846 memset(c->stats, 0, sizeof *c->stats);
847 c->statsprt = PCIWADDR(c->stats);
848
849 memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2);
850 return setpcie(p) || parseeprom(c);
851 }
852
853 static Rx*
854 whichrx(Ctlr *c, int sz)
855 {
856 if(sz <= smpool.size)
857 return &c->sm;
858 return &c->bg;
859 }
860
861 static Block*
862 balloc(Rx* rx)
863 {
864 Block *b;
865
866 ilock(rx->pool);
867 if((b = rx->pool->head) != nil){
868 rx->pool->head = b->next;
869 b->next = nil;
870 rx->pool->n--;
871 }
872 iunlock(rx->pool);
873 return b;
874 }
875
876 static void
877 smbfree(Block *b)
878 {
879 Bpool *p;
880
881 b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
882 p = &smpool;
883 ilock(p);
884 b->next = p->head;
885 p->head = b;
886 p->n++;
887 p->cnt++;
888 iunlock(p);
889 }
890
891 static void
892 bgbfree(Block *b)
893 {
894 Bpool *p;
895
896 b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
897 p = &bgpool;
898 ilock(p);
899 b->next = p->head;
900 p->head = b;
901 p->n++;
902 p->cnt++;
903 iunlock(p);
904 }
905
906 static void
907 replenish(Rx *rx)
908 {
909 ulong buf[16], i, idx, e;
910 Bpool *p;
911 Block *b;
912
913 p = rx->pool;
914 if(p->n < 8)
915 return;
916 memset(buf, 0, sizeof buf);
917 e = (rx->i - rx->cnt) & ~7;
918 e += rx->n;
919 while(p->n >= 8 && e){
920 idx = rx->cnt & rx->m;
921 for(i = 0; i < 8; i++){
922 b = balloc(rx);
923 buf[i*2] = pbit32((uvlong)PCIWADDR(b->wp) >> 32);
924 buf[i*2+1] = pbit32(PCIWADDR(b->wp));
925 rx->host[idx+i] = b;
926 assert(b);
927 }
928 memmove(rx->lanai + 2*idx, buf, sizeof buf);
929 coherence();
930 rx->cnt += 8;
931 e -= 8;
932 }
933 if(e && p->n > 7+1)
934 print("should panic? pool->n = %d\n", p->n);
935 }
936
937 /*
938 * future:
939 * if (c->mtrr >= 0) {
940 * c->tx.wcfifo = c->ram+0x200000;
941 * c->sm.wcfifo = c->ram+0x300000;
942 * c->bg.wcfifo = c->ram+0x340000;
943 * }
944 */
945
946 static int
947 nextpow(int j)
948 {
949 int i;
950
951 for(i = 0; j > (1 << i); i++)
952 ;
953 return 1 << i;
954 }
955
956 static void*
957 emalign(int sz)
958 {
959 void *v;
960
961 v = malign(sz);
962 if(v == nil)
963 error(Enomem);
964 memset(v, 0, sz);
965 return v;
966 }
967
968 static void
969 open0(Ether *e, Ctlr *c)
970 {
971 Block *b;
972 int i, sz, entries;
973
974 entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai;
975 c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0));
976 c->tx.host = emalign(entries * sizeof *c->tx.host);
977 c->tx.bring = emalign(entries * sizeof *c->tx.bring);
978 c->tx.n = entries;
979 c->tx.m = entries-1;
980
981 entries = cmd(c, CGrxrgsz, 0)/8;
982 c->sm.pool = &smpool;
983 cmd(c, CSsmallsz, c->sm.pool->size);
984 c->sm.lanai = (ulong*)(c->ram + cmd(c, CGsmallrxoff, 0));
985 c->sm.n = entries;
986 c->sm.m = entries-1;
987 c->sm.host = emalign(entries * sizeof *c->sm.host);
988
989 c->bg.pool = &bgpool;
990 c->bg.pool->size = nextpow(2 + e->maxmtu); /* 2-byte alignment pad */
991 cmd(c, CSbigsz, c->bg.pool->size);
992 c->bg.lanai = (ulong*)(c->ram + cmd(c, CGbigrxoff, 0));
993 c->bg.n = entries;
994 c->bg.m = entries-1;
995 c->bg.host = emalign(entries * sizeof *c->bg.host);
996
997 sz = c->sm.pool->size + BY2PG;
998 for(i = 0; i < c->sm.n; i++){
999 if((b = allocb(sz)) == 0)
1000 break;
1001 b->free = smbfree;
1002 freeb(b);
1003 }
1004 sz = c->bg.pool->size + BY2PG;
1005 for(i = 0; i < c->bg.n; i++){
1006 if((b = allocb(sz)) == 0)
1007 break;
1008 b->free = bgbfree;
1009 freeb(b);
1010 }
1011
1012 cmd(c, CSstatsdma, c->statsprt);
1013 c->linkstat = ~0;
1014 c->nrdma = 15;
1015
1016 cmd(c, Cetherup, 0);
1017 }
1018
1019 static Block*
1020 nextblock(Ctlr *c)
1021 {
1022 uint i;
1023 ushort l, k;
1024 Block *b;
1025 Done *d;
1026 Rx *rx;
1027 Slot *s;
1028 Slotparts *sp;
1029
1030 d = &c->done;
1031 s = d->entry;
1032 i = d->i & d->m;
1033 sp = (Slotparts *)(s + i);
1034 l = sp->len;
1035 if(l == 0)
1036 return 0;
1037 k = sp->cksum;
1038 s[i] = 0;
1039 d->i++;
1040 l = gbit16((uchar*)&l);
1041 //dprint("nextb: i=%d l=%d\n", d->i, l);
1042 rx = whichrx(c, l);
1043 if(rx->i >= rx->cnt){
1044 iprint("m10g: overrun\n");
1045 return 0;
1046 }
1047 i = rx->i & rx->m;
1048 b = rx->host[i];
1049 rx->host[i] = 0;
1050 if(b == 0){
1051 iprint("m10g: error rx to no block. memory is hosed.\n");
1052 return 0;
1053 }
1054 rx->i++;
1055
1056 b->flag |= Bipck|Btcpck|Budpck;
1057 b->checksum = k;
1058 b->rp += 2;
1059 b->wp += 2+l;
1060 b->lim = b->wp; /* lie like a dog. */
1061 return b;
1062 }
1063
1064 static int
1065 rxcansleep(void *v)
1066 {
1067 Ctlr *c;
1068 Slot *s;
1069 Slotparts *sp;
1070 Done *d;
1071
1072 c = v;
1073 d = &c->done;
1074 s = c->done.entry;
1075 sp = (Slotparts *)(s + (d->i & d->m));
1076 if(sp->len != 0)
1077 return -1;
1078 c->irqack[0] = pbit32(3);
1079 return 0;
1080 }
1081
1082 static void
1083 m10rx(void *v)
1084 {
1085 Ether *e;
1086 Ctlr *c;
1087 Block *b;
1088
1089 e = v;
1090 c = e->ctlr;
1091 for(;;){
1092 replenish(&c->sm);
1093 replenish(&c->bg);
1094 sleep(&c->rxrendez, rxcansleep, c);
1095 while(b = nextblock(c))
1096 etheriq(e, b, 1);
1097 }
1098 }
1099
1100 static void
1101 txcleanup(Tx *tx, ulong n)
1102 {
1103 Block *b;
1104 uint j, l, m;
1105
1106 if(tx->npkt == n)
1107 return;
1108 l = 0;
1109 m = tx->m;
1110 /*
1111 * if tx->cnt == tx->i, yet tx->npkt == n-1, we just
1112 * caught ourselves and myricom card updating.
1113 */
1114 for(;; tx->cnt++){
1115 j = tx->cnt & tx->m;
1116 if(b = tx->bring[j]){
1117 tx->bring[j] = 0;
1118 tx->nbytes += BLEN(b);
1119 freeb(b);
1120 if(++tx->npkt == n)
1121 return;
1122 }
1123 if(tx->cnt == tx->i)
1124 return;
1125 if(l++ == m){
1126 iprint("tx ovrun: %lud %lud\n", n, tx->npkt);
1127 return;
1128 }
1129 }
1130 }
1131
1132 static int
1133 txcansleep(void *v)
1134 {
1135 Ctlr *c;
1136
1137 c = v;
1138 if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt))
1139 return -1;
1140 return 0;
1141 }
1142
1143 static void
1144 txproc(void *v)
1145 {
1146 Ether *e;
1147 Ctlr *c;
1148 Tx *tx;
1149
1150 e = v;
1151 c = e->ctlr;
1152 tx = &c->tx;
1153 for(;;){
1154 sleep(&c->txrendez, txcansleep, c);
1155 txcleanup(tx, gbit32(c->stats->txcnt));
1156 }
1157 }
1158
1159 static void
1160 submittx(Tx *tx, int n)
1161 {
1162 Send *l, *h;
1163 int i0, i, m;
1164
1165 m = tx->m;
1166 i0 = tx->i & m;
1167 l = tx->lanai;
1168 h = tx->host;
1169 for(i = n-1; i >= 0; i--)
1170 memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h);
1171 tx->i += n;
1172 // coherence();
1173 }
1174
1175 static int
1176 nsegments(Block *b, int segsz)
1177 {
1178 uintptr bus, end, slen, len;
1179 int i;
1180
1181 bus = PCIWADDR(b->rp);
1182 i = 0;
1183 for(len = BLEN(b); len; len -= slen){
1184 end = bus + segsz & ~(segsz-1);
1185 slen = end - bus;
1186 if(slen > len)
1187 slen = len;
1188 bus += slen;
1189 i++;
1190 }
1191 return i;
1192 }
1193
1194 static void
1195 m10gtransmit(Ether *e)
1196 {
1197 ushort slen;
1198 ulong i, cnt, rdma, nseg, count, end, bus, len, segsz;
1199 uchar flags;
1200 Block *b;
1201 Ctlr *c;
1202 Send *s, *s0, *s0m8;
1203 Tx *tx;
1204
1205 c = e->ctlr;
1206 tx = &c->tx;
1207 segsz = tx->segsz;
1208
1209 qlock(tx);
1210 count = 0;
1211 s = tx->host + (tx->i & tx->m);
1212 cnt = tx->cnt;
1213 s0 = tx->host + (cnt & tx->m);
1214 s0m8 = tx->host + ((cnt - 8) & tx->m);
1215 i = tx->i;
1216 for(; s >= s0 || s < s0m8; i += nseg){
1217 if((b = qget(e->oq)) == nil)
1218 break;
1219 flags = SFfirst|SFnotso;
1220 if((len = BLEN(b)) < 1520)
1221 flags |= SFsmall;
1222 rdma = nseg = nsegments(b, segsz);
1223 bus = PCIWADDR(b->rp);
1224 for(; len; len -= slen){
1225 end = (bus + segsz) & ~(segsz-1);
1226 slen = end - bus;
1227 if(slen > len)
1228 slen = len;
1229 s->low = pbit32(bus);
1230 s->len = pbit16(slen);
1231 s->nrdma = rdma;
1232 s->flags = flags;
1233
1234 bus += slen;
1235 if(++s == tx->host + tx->n)
1236 s = tx->host;
1237 count++;
1238 flags &= ~SFfirst;
1239 rdma = 1;
1240 }
1241 tx->bring[(i + nseg - 1) & tx->m] = b;
1242 if(1 || count > 0){
1243 submittx(tx, count);
1244 count = 0;
1245 cnt = tx->cnt;
1246 s0 = tx->host + (cnt & tx->m);
1247 s0m8 = tx->host + ((cnt - 8) & tx->m);
1248 }
1249 }
1250 qunlock(tx);
1251 }
1252
1253 static void
1254 checkstats(Ether *e, Ctlr *c, Stats *s)
1255 {
1256 ulong i;
1257
1258 if(s->updated == 0)
1259 return;
1260
1261 i = gbit32(s->linkstat);
1262 if(c->linkstat != i){
1263 e->link = i;
1264 if(c->linkstat = i)
1265 dprint("m10g: link up\n");
1266 else
1267 dprint("m10g: link down\n");
1268 }
1269 i = gbit32(s->nrdma);
1270 if(i != c->nrdma){
1271 dprint("m10g: rdma timeout %ld\n", i);
1272 c->nrdma = i;
1273 }
1274 }
1275
1276 static void
1277 waitintx(Ctlr *c)
1278 {
1279 int i;
1280
1281 for(i = 0; i < 1024*1024; i++){
1282 if(c->stats->valid == 0)
1283 break;
1284 coherence();
1285 }
1286 }
1287
1288 static void
1289 m10ginterrupt(Ureg *, void *v)
1290 {
1291 Ether *e;
1292 Ctlr *c;
1293
1294 e = v;
1295 c = e->ctlr;
1296
1297 if(c->state != Runed || c->stats->valid == 0) /* not ready for us? */
1298 return;
1299
1300 if(c->stats->valid & 1)
1301 wakeup(&c->rxrendez);
1302 if(gbit32(c->stats->txcnt) != c->tx.npkt)
1303 wakeup(&c->txrendez);
1304 if(c->msi == 0)
1305 *c->irqdeass = 0;
1306 else
1307 c->stats->valid = 0;
1308 waitintx(c);
1309 checkstats(e, c, c->stats);
1310 c->irqack[1] = pbit32(3);
1311 }
1312
1313 static void
1314 m10gattach(Ether *e)
1315 {
1316 Ctlr *c;
1317 char name[12];
1318
1319 dprint("m10gattach\n");
1320
1321 qlock(e->ctlr);
1322 c = e->ctlr;
1323 if(c->state != Detached){
1324 qunlock(c);
1325 return;
1326 }
1327 if(waserror()){
1328 c->state = Detached;
1329 qunlock(c);
1330 nexterror();
1331 }
1332 reset(e, c);
1333 c->state = Attached;
1334 open0(e, c);
1335 if(c->kprocs == 0){
1336 c->kprocs++;
1337 snprint(name, sizeof name, "#l%drxproc", e->ctlrno);
1338 kproc(name, m10rx, e);
1339 snprint(name, sizeof name, "#l%dtxproc", e->ctlrno);
1340 kproc(name, txproc, e);
1341 }
1342 c->state = Runed;
1343 qunlock(c);
1344 poperror();
1345 }
1346
1347 static int
1348 m10gdetach(Ctlr *c)
1349 {
1350 dprint("m10gdetach\n");
1351 // reset(e->ctlr);
1352 vunmap(c->ram, c->pcidev->mem[0].size);
1353 ctlrfree(c);
1354 return -1;
1355 }
1356
1357 static int
1358 lstcount(Block *b)
1359 {
1360 int i;
1361
1362 i = 0;
1363 for(; b; b = b->next)
1364 i++;
1365 return i;
1366 }
1367
1368 static long
1369 m10gifstat(Ether *e, void *v, long n, ulong off)
1370 {
1371 int l;
1372 char *p;
1373 Ctlr *c;
1374 Stats s;
1375
1376 c = e->ctlr;
1377 p = malloc(READSTR+1);
1378 l = 0;
1379 /* no point in locking this because this is done via dma. */
1380 memmove(&s, c->stats, sizeof s);
1381
1382 // l +=
1383 snprint(p+l, READSTR,
1384 "txcnt = %lud\n" "linkstat = %lud\n" "dlink = %lud\n"
1385 "derror = %lud\n" "drunt = %lud\n" "doverrun = %lud\n"
1386 "dnosm = %lud\n" "dnobg = %lud\n" "nrdma = %lud\n"
1387 "txstopped = %ud\n" "down = %ud\n" "updated = %ud\n"
1388 "valid = %ud\n\n"
1389 "tx pkt = %lud\n" "tx bytes = %lld\n"
1390 "tx cnt = %ud\n" "tx n = %ud\n" "tx i = %ud\n"
1391 "sm cnt = %ud\n" "sm i = %ud\n" "sm n = %ud\n"
1392 "sm lst = %ud\n"
1393 "bg cnt = %ud\n" "bg i = %ud\n" "bg n = %ud\n"
1394 "bg lst = %ud\n"
1395 "segsz = %lud\n" "coal = %lud\n",
1396 gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlink),
1397 gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun),
1398 gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma),
1399 s.txstopped, s.down, s.updated, s.valid,
1400 c->tx.npkt, c->tx.nbytes,
1401 c->tx.cnt, c->tx.n, c->tx.i,
1402 c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head),
1403 c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head),
1404 c->tx.segsz, gbit32((uchar*)c->coal));
1405
1406 n = readstr(off, v, n, p);
1407 free(p);
1408 return n;
1409 }
1410
1411 //static void
1412 //summary(Ether *e)
1413 //{
1414 // char *buf;
1415 // int n, i, j;
1416 //
1417 // if(e == 0)
1418 // return;
1419 // buf = malloc(n=250);
1420 // if(buf == 0)
1421 // return;
1422 //
1423 // snprint(buf, n, "oq\n");
1424 // qsummary(e->oq, buf+3, n-3-1);
1425 // iprint("%s", buf);
1426 //
1427 // if(e->f) for(i = 0; e->f[i]; i++){
1428 // j = snprint(buf, n, "f%d %d\n", i, e->f[i]->type);
1429 // qsummary(e->f[i]->in, buf+j, n-j-1);
1430 // print("%s", buf);
1431 // }
1432 //
1433 // free(buf);
1434 //}
1435
1436 static void
1437 rxring(Ctlr *c)
1438 {
1439 Done *d;
1440 Slot *s;
1441 Slotparts *sp;
1442 int i;
1443
1444 d = &c->done;
1445 s = d->entry;
1446 for(i = 0; i < d->n; i++) {
1447 sp = (Slotparts *)(s + i);
1448 if(sp->len)
1449 iprint("s[%d] = %d\n", i, sp->len);
1450 }
1451 }
1452
1453 enum {
1454 CMdebug,
1455 CMcoal,
1456 CMwakeup,
1457 CMtxwakeup,
1458 CMqsummary,
1459 CMrxring,
1460 };
1461
1462 static Cmdtab ctab[] = {
1463 CMdebug, "debug", 2,
1464 CMcoal, "coal", 2,
1465 CMwakeup, "wakeup", 1,
1466 CMtxwakeup, "txwakeup", 1,
1467 // CMqsummary, "q", 1,
1468 CMrxring, "rxring", 1,
1469 };
1470
1471 static long
1472 m10gctl(Ether *e, void *v, long n)
1473 {
1474 int i;
1475 Cmdbuf *c;
1476 Cmdtab *t;
1477
1478 dprint("m10gctl\n");
1479 if(e->ctlr == nil)
1480 error(Enonexist);
1481
1482 c = parsecmd(v, n);
1483 if(waserror()){
1484 free(c);
1485 nexterror();
1486 }
1487 t = lookupcmd(c, ctab, nelem(ctab));
1488 switch(t->index){
1489 case CMdebug:
1490 debug = (strcmp(c->f[1], "on") == 0);
1491 break;
1492 case CMcoal:
1493 i = atoi(c->f[1]);
1494 if(i < 0 || i > 1000)
1495 error(Ebadarg);
1496 *((Ctlr*)e->ctlr)->coal = pbit32(i);
1497 break;
1498 case CMwakeup:
1499 wakeup(&((Ctlr*)e->ctlr)->rxrendez); /* you're kidding, right? */
1500 break;
1501 case CMtxwakeup:
1502 wakeup(&((Ctlr*)e->ctlr)->txrendez); /* you're kidding, right? */
1503 break;
1504 // case CMqsummary:
1505 // summary(e);
1506 // break;
1507 case CMrxring:
1508 rxring(e->ctlr);
1509 break;
1510 default:
1511 error(Ebadarg);
1512 }
1513 free(c);
1514 poperror();
1515 return n;
1516 }
1517
1518 static void
1519 m10gshutdown(Ether *e)
1520 {
1521 dprint("m10gshutdown\n");
1522 m10gdetach(e->ctlr);
1523 }
1524
1525 static void
1526 m10gpromiscuous(void *v, int on)
1527 {
1528 Ether *e;
1529 int i;
1530
1531 dprint("m10gpromiscuous\n");
1532 e = v;
1533 if(on)
1534 i = Cpromisc;
1535 else
1536 i = Cnopromisc;
1537 cmd(e->ctlr, i, 0);
1538 }
1539
1540 static int mcctab[] = { CSleavemc, CSjoinmc };
1541 static char *mcntab[] = { "leave", "join" };
1542
1543 static void
1544 m10gmulticast(void *v, uchar *ea, int on)
1545 {
1546 Ether *e;
1547 int i;
1548
1549 dprint("m10gmulticast\n");
1550 e = v;
1551 if((i = maccmd(e->ctlr, mcctab[on], ea)) != 0)
1552 print("m10g: can't %s %E: %d\n", mcntab[on], ea, i);
1553 }
1554
1555 static void
1556 m10gpci(void)
1557 {
1558 Pcidev *p;
1559 Ctlr *t, *c;
1560
1561 t = 0;
1562 for(p = 0; p = pcimatch(p, 0x14c1, 0x0008); ){
1563 c = malloc(sizeof *c);
1564 if(c == nil)
1565 continue;
1566 c->pcidev = p;
1567 c->id = p->did<<16 | p->vid;
1568 c->boot = pcicap(p, PciCapVND);
1569 // kickthebaby(p, c);
1570 pcisetbme(p);
1571 if(setmem(p, c) == -1){
1572 print("m10g failed\n");
1573 free(c);
1574 /* cleanup */
1575 continue;
1576 }
1577 if(t)
1578 t->next = c;
1579 else
1580 ctlrs = c;
1581 t = c;
1582 }
1583 }
1584
1585 static int
1586 m10gpnp(Ether *e)
1587 {
1588 Ctlr *c;
1589
1590 if(ctlrs == nil)
1591 m10gpci();
1592
1593 for(c = ctlrs; c != nil; c = c->next)
1594 if(c->active)
1595 continue;
1596 else if(e->port == 0 || e->port == c->port)
1597 break;
1598 if(c == nil)
1599 return -1;
1600 c->active = 1;
1601
1602 e->ctlr = c;
1603 e->port = c->port;
1604 e->irq = c->pcidev->intl;
1605 e->tbdf = c->pcidev->tbdf;
1606 e->mbps = 10000;
1607 memmove(e->ea, c->ra, Eaddrlen);
1608
1609 e->attach = m10gattach;
1610 e->detach = m10gshutdown;
1611 e->transmit = m10gtransmit;
1612 e->interrupt = m10ginterrupt;
1613 e->ifstat = m10gifstat;
1614 e->ctl = m10gctl;
1615 // e->power = m10gpower;
1616 e->shutdown = m10gshutdown;
1617
1618 e->arg = e;
1619 e->promiscuous = m10gpromiscuous;
1620 e->multicast = m10gmulticast;
1621
1622 return 0;
1623 }
1624
1625 void
1626 etherm10glink(void)
1627 {
1628 addethercard("m10g", m10gpnp);
1629 }
Cache object: 099ff302f1596f4d053100ef0e7db2d0
|