FreeBSD/Linux Kernel Cross Reference
sys/dev/e1000/if_igb.c
1 /******************************************************************************
2
3 Copyright (c) 2001-2008, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/bus.h>
43 #include <sys/endian.h>
44 #include <sys/kernel.h>
45 #include <sys/kthread.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/module.h>
49 #include <sys/rman.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54 #include <sys/eventhandler.h>
55 #include <sys/pcpu.h>
56 #ifdef IGB_TIMESYNC
57 #include <sys/ioccom.h>
58 #include <sys/time.h>
59 #endif
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/tcp_lro.h>
80 #include <netinet/udp.h>
81
82 #include <machine/in_cksum.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85
86 #include "e1000_api.h"
87 #include "e1000_82575.h"
88 #include "if_igb.h"
89
90 /*********************************************************************
91 * Set this to one to display debug statistics
92 *********************************************************************/
93 int igb_display_debug_stats = 0;
94
95 /*********************************************************************
96 * Driver version:
97 *********************************************************************/
98 char igb_driver_version[] = "version - 1.4.1";
99
100
101 /*********************************************************************
102 * PCI Device ID Table
103 *
104 * Used by probe to select devices to load on
105 * Last field stores an index into e1000_strings
106 * Last entry must be all 0s
107 *
108 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109 *********************************************************************/
110
111 static igb_vendor_info_t igb_vendor_info_array[] =
112 {
113 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
114 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
115 PCI_ANY_ID, PCI_ANY_ID, 0},
116 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
117 PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
119 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
121 /* required last entry */
122 { 0, 0, 0, 0, 0}
123 };
124
125 /*********************************************************************
126 * Table of branding strings for all supported NICs.
127 *********************************************************************/
128
129 static char *igb_strings[] = {
130 "Intel(R) PRO/1000 Network Connection"
131 };
132
133 /*********************************************************************
134 * Function prototypes
135 *********************************************************************/
136 static int igb_probe(device_t);
137 static int igb_attach(device_t);
138 static int igb_detach(device_t);
139 static int igb_shutdown(device_t);
140 static int igb_suspend(device_t);
141 static int igb_resume(device_t);
142 static void igb_start(struct ifnet *);
143 static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
144 static int igb_ioctl(struct ifnet *, u_long, caddr_t);
145 static void igb_watchdog(struct adapter *);
146 static void igb_init(void *);
147 static void igb_init_locked(struct adapter *);
148 static void igb_stop(void *);
149 static void igb_media_status(struct ifnet *, struct ifmediareq *);
150 static int igb_media_change(struct ifnet *);
151 static void igb_identify_hardware(struct adapter *);
152 static int igb_allocate_pci_resources(struct adapter *);
153 static int igb_allocate_msix(struct adapter *);
154 static int igb_allocate_legacy(struct adapter *);
155 static int igb_setup_msix(struct adapter *);
156 static void igb_free_pci_resources(struct adapter *);
157 static void igb_local_timer(void *);
158 static int igb_hardware_init(struct adapter *);
159 static void igb_setup_interface(device_t, struct adapter *);
160 static int igb_allocate_queues(struct adapter *);
161 static void igb_configure_queues(struct adapter *);
162
163 static int igb_allocate_transmit_buffers(struct tx_ring *);
164 static void igb_setup_transmit_structures(struct adapter *);
165 static void igb_setup_transmit_ring(struct tx_ring *);
166 static void igb_initialize_transmit_units(struct adapter *);
167 static void igb_free_transmit_structures(struct adapter *);
168 static void igb_free_transmit_buffers(struct tx_ring *);
169
170 static int igb_allocate_receive_buffers(struct rx_ring *);
171 static int igb_setup_receive_structures(struct adapter *);
172 static int igb_setup_receive_ring(struct rx_ring *);
173 static void igb_initialize_receive_units(struct adapter *);
174 static void igb_free_receive_structures(struct adapter *);
175 static void igb_free_receive_buffers(struct rx_ring *);
176
177 static void igb_enable_intr(struct adapter *);
178 static void igb_disable_intr(struct adapter *);
179 static void igb_update_stats_counters(struct adapter *);
180 static bool igb_txeof(struct tx_ring *);
181 static bool igb_rxeof(struct rx_ring *, int);
182 #ifndef __NO_STRICT_ALIGNMENT
183 static int igb_fixup_rx(struct rx_ring *);
184 #endif
185 static void igb_rx_checksum(u32, struct mbuf *);
186 static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
187 static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
188 static void igb_set_promisc(struct adapter *);
189 static void igb_disable_promisc(struct adapter *);
190 static void igb_set_multi(struct adapter *);
191 static void igb_print_hw_stats(struct adapter *);
192 static void igb_update_link_status(struct adapter *);
193 static int igb_get_buf(struct rx_ring *, int);
194
195 #ifdef IGB_HW_VLAN_SUPPORT
196 static void igb_register_vlan(void *, struct ifnet *, u16);
197 static void igb_unregister_vlan(void *, struct ifnet *, u16);
198 #endif
199
200 static int igb_xmit(struct tx_ring *, struct mbuf **);
201 static int igb_dma_malloc(struct adapter *, bus_size_t,
202 struct igb_dma_alloc *, int);
203 static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
204 static void igb_print_debug_info(struct adapter *);
205 static void igb_print_nvm_info(struct adapter *);
206 static int igb_is_valid_ether_addr(u8 *);
207 static int igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
208 static int igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
209 /* Management and WOL Support */
210 static void igb_init_manageability(struct adapter *);
211 static void igb_release_manageability(struct adapter *);
212 static void igb_get_hw_control(struct adapter *);
213 static void igb_release_hw_control(struct adapter *);
214 static void igb_enable_wakeup(device_t);
215
216 #ifdef IGB_TIMESYNC
217 /* Precision Time sync support */
218 static int igb_tsync_init(struct adapter *);
219 static void igb_tsync_disable(struct adapter *);
220 #endif
221
222 static int igb_irq_fast(void *);
223 static void igb_add_rx_process_limit(struct adapter *, const char *,
224 const char *, int *, int);
225 static void igb_handle_rxtx(void *context, int pending);
226 static void igb_handle_tx(void *context, int pending);
227 static void igb_handle_rx(void *context, int pending);
228 static void igb_handle_link(void *context, int pending);
229
230 /* These are MSIX only irq handlers */
231 static void igb_msix_rx(void *);
232 static void igb_msix_tx(void *);
233 static void igb_msix_link(void *);
234
235 /* Adaptive Interrupt Moderation */
236 static void igb_update_aim(struct rx_ring *);
237
238 /*********************************************************************
239 * FreeBSD Device Interface Entry Points
240 *********************************************************************/
241
242 static device_method_t igb_methods[] = {
243 /* Device interface */
244 DEVMETHOD(device_probe, igb_probe),
245 DEVMETHOD(device_attach, igb_attach),
246 DEVMETHOD(device_detach, igb_detach),
247 DEVMETHOD(device_shutdown, igb_shutdown),
248 DEVMETHOD(device_suspend, igb_suspend),
249 DEVMETHOD(device_resume, igb_resume),
250 {0, 0}
251 };
252
253 static driver_t igb_driver = {
254 "igb", igb_methods, sizeof(struct adapter),
255 };
256
257 static devclass_t igb_devclass;
258 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
259 MODULE_DEPEND(igb, pci, 1, 1, 1);
260 MODULE_DEPEND(igb, ether, 1, 1, 1);
261
262 /*********************************************************************
263 * Tunable default values.
264 *********************************************************************/
265
266 /* Descriptor defaults */
267 static int igb_rxd = IGB_DEFAULT_RXD;
268 static int igb_txd = IGB_DEFAULT_TXD;
269 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
270 TUNABLE_INT("hw.igb.txd", &igb_txd);
271
272 /*
273 ** These parameters are used in Adaptive
274 ** Interrupt Moderation. The value is set
275 ** into EITR and controls the interrupt
276 ** frequency. They can be modified but
277 ** be careful in tuning them.
278 */
279 static int igb_enable_aim = TRUE;
280 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
281 static int igb_low_latency = IGB_LOW_LATENCY;
282 TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
283 static int igb_ave_latency = IGB_AVE_LATENCY;
284 TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
285 static int igb_bulk_latency = IGB_BULK_LATENCY;
286 TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
287
288 /*
289 ** IF YOU CHANGE THESE: be sure and change IGB_MSIX_VEC in
290 ** if_igb.h to match. These can be autoconfigured if set to
291 ** 0, it will then be based on number of cpus.
292 */
293 static int igb_tx_queues = 1;
294 static int igb_rx_queues = 1;
295 TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
296 TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
297
298 /* How many packets rxeof tries to clean at a time */
299 static int igb_rx_process_limit = 100;
300 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
301
302 /* Flow control setting - default to none */
303 static int igb_fc_setting = 0;
304 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
305
306 /*
307 * Should the driver do LRO on the RX end
308 * this can be toggled on the fly, but the
309 * interface must be reset (down/up) for it
310 * to take effect.
311 */
312 static int igb_enable_lro = 1;
313 TUNABLE_INT("hw.igb.enable_lro", &igb_enable_lro);
314
315 extern int mp_ncpus;
316 /*********************************************************************
317 * Device identification routine
318 *
319 * igb_probe determines if the driver should be loaded on
320 * adapter based on PCI vendor/device id of the adapter.
321 *
322 * return BUS_PROBE_DEFAULT on success, positive on failure
323 *********************************************************************/
324
325 static int
326 igb_probe(device_t dev)
327 {
328 char adapter_name[60];
329 uint16_t pci_vendor_id = 0;
330 uint16_t pci_device_id = 0;
331 uint16_t pci_subvendor_id = 0;
332 uint16_t pci_subdevice_id = 0;
333 igb_vendor_info_t *ent;
334
335 INIT_DEBUGOUT("igb_probe: begin");
336
337 pci_vendor_id = pci_get_vendor(dev);
338 if (pci_vendor_id != IGB_VENDOR_ID)
339 return (ENXIO);
340
341 pci_device_id = pci_get_device(dev);
342 pci_subvendor_id = pci_get_subvendor(dev);
343 pci_subdevice_id = pci_get_subdevice(dev);
344
345 ent = igb_vendor_info_array;
346 while (ent->vendor_id != 0) {
347 if ((pci_vendor_id == ent->vendor_id) &&
348 (pci_device_id == ent->device_id) &&
349
350 ((pci_subvendor_id == ent->subvendor_id) ||
351 (ent->subvendor_id == PCI_ANY_ID)) &&
352
353 ((pci_subdevice_id == ent->subdevice_id) ||
354 (ent->subdevice_id == PCI_ANY_ID))) {
355 sprintf(adapter_name, "%s %s",
356 igb_strings[ent->index],
357 igb_driver_version);
358 device_set_desc_copy(dev, adapter_name);
359 return (BUS_PROBE_DEFAULT);
360 }
361 ent++;
362 }
363
364 return (ENXIO);
365 }
366
367 /*********************************************************************
368 * Device initialization routine
369 *
370 * The attach entry point is called when the driver is being loaded.
371 * This routine identifies the type of hardware, allocates all resources
372 * and initializes the hardware.
373 *
374 * return 0 on success, positive on failure
375 *********************************************************************/
376
377 static int
378 igb_attach(device_t dev)
379 {
380 struct adapter *adapter;
381 int error = 0;
382 u16 eeprom_data;
383
384 INIT_DEBUGOUT("igb_attach: begin");
385
386 adapter = device_get_softc(dev);
387 adapter->dev = adapter->osdep.dev = dev;
388 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
389
390 /* SYSCTL stuff */
391 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
392 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
393 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
394 igb_sysctl_debug_info, "I", "Debug Information");
395
396 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
397 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
398 OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
399 igb_sysctl_stats, "I", "Statistics");
400
401 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
402 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
403 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
404 &igb_fc_setting, 0, "Flow Control");
405
406 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
407 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
408 OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
409 &igb_enable_lro, 0, "Large Receive Offload");
410
411 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
412 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
413 OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
414 &igb_enable_aim, 1, "Interrupt Moderation");
415
416 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
417 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
418 OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
419 &igb_low_latency, 1, "Low Latency");
420
421 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
422 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
423 OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
424 &igb_ave_latency, 1, "Average Latency");
425
426 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
427 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
428 OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
429 &igb_bulk_latency, 1, "Bulk Latency");
430
431 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
432
433 /* Determine hardware and mac info */
434 igb_identify_hardware(adapter);
435
436 /* Setup PCI resources */
437 if (igb_allocate_pci_resources(adapter)) {
438 device_printf(dev, "Allocation of PCI resources failed\n");
439 error = ENXIO;
440 goto err_pci;
441 }
442
443 /* Do Shared Code initialization */
444 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
445 device_printf(dev, "Setup of Shared code failed\n");
446 error = ENXIO;
447 goto err_pci;
448 }
449
450 e1000_get_bus_info(&adapter->hw);
451
452 /* Sysctls for limiting the amount of work done in the taskqueue */
453 igb_add_rx_process_limit(adapter, "rx_processing_limit",
454 "max number of rx packets to process", &adapter->rx_process_limit,
455 igb_rx_process_limit);
456
457 /*
458 * Validate number of transmit and receive descriptors. It
459 * must not exceed hardware maximum, and must be multiple
460 * of E1000_DBA_ALIGN.
461 */
462 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
463 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
464 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
465 IGB_DEFAULT_TXD, igb_txd);
466 adapter->num_tx_desc = IGB_DEFAULT_TXD;
467 } else
468 adapter->num_tx_desc = igb_txd;
469 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
470 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
471 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
472 IGB_DEFAULT_RXD, igb_rxd);
473 adapter->num_rx_desc = IGB_DEFAULT_RXD;
474 } else
475 adapter->num_rx_desc = igb_rxd;
476
477 adapter->hw.mac.autoneg = DO_AUTO_NEG;
478 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
479 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
480 adapter->rx_buffer_len = 2048;
481
482 /* Copper options */
483 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
484 adapter->hw.phy.mdix = AUTO_ALL_MODES;
485 adapter->hw.phy.disable_polarity_correction = FALSE;
486 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
487 }
488
489 /*
490 * Set the frame limits assuming
491 * standard ethernet sized frames.
492 */
493 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
494 adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
495
496 /*
497 ** Allocate and Setup Queues
498 */
499 if (igb_allocate_queues(adapter)) {
500 error = ENOMEM;
501 goto err_pci;
502 }
503
504 /* Make sure we have a good EEPROM before we read from it */
505 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
506 /*
507 ** Some PCI-E parts fail the first check due to
508 ** the link being in sleep state, call it again,
509 ** if it fails a second time its a real issue.
510 */
511 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
512 device_printf(dev,
513 "The EEPROM Checksum Is Not Valid\n");
514 error = EIO;
515 goto err_late;
516 }
517 }
518
519 /* Initialize the hardware */
520 if (igb_hardware_init(adapter)) {
521 device_printf(dev, "Unable to initialize the hardware\n");
522 error = EIO;
523 goto err_late;
524 }
525
526 /* Copy the permanent MAC address out of the EEPROM */
527 if (e1000_read_mac_addr(&adapter->hw) < 0) {
528 device_printf(dev, "EEPROM read error while reading MAC"
529 " address\n");
530 error = EIO;
531 goto err_late;
532 }
533
534 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
535 device_printf(dev, "Invalid MAC address\n");
536 error = EIO;
537 goto err_late;
538 }
539
540 /*
541 ** Configure Interrupts
542 */
543 if (adapter->msix > 1) /* MSIX */
544 error = igb_allocate_msix(adapter);
545 else /* MSI or Legacy */
546 error = igb_allocate_legacy(adapter);
547 if (error)
548 goto err_late;
549
550 /* Setup OS specific network interface */
551 igb_setup_interface(dev, adapter);
552
553 /* Initialize statistics */
554 igb_update_stats_counters(adapter);
555
556 adapter->hw.mac.get_link_status = 1;
557 igb_update_link_status(adapter);
558
559 /* Indicate SOL/IDER usage */
560 if (e1000_check_reset_block(&adapter->hw))
561 device_printf(dev,
562 "PHY reset is blocked due to SOL/IDER session.\n");
563
564 /* Determine if we have to control management hardware */
565 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
566
567 /*
568 * Setup Wake-on-Lan
569 */
570 /* APME bit in EEPROM is mapped to WUC.APME */
571 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
572 if (eeprom_data)
573 adapter->wol = E1000_WUFC_MAG;
574
575 #ifdef IGB_HW_VLAN_SUPPORT
576 /* Register for VLAN events */
577 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
578 igb_register_vlan, 0, EVENTHANDLER_PRI_FIRST);
579 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
580 igb_unregister_vlan, 0, EVENTHANDLER_PRI_FIRST);
581 #endif
582
583 /* Tell the stack that the interface is not active */
584 adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
585
586 INIT_DEBUGOUT("igb_attach: end");
587
588 return (0);
589
590 err_late:
591 igb_free_transmit_structures(adapter);
592 igb_free_receive_structures(adapter);
593 igb_release_hw_control(adapter);
594 err_pci:
595 igb_free_pci_resources(adapter);
596 IGB_CORE_LOCK_DESTROY(adapter);
597
598 return (error);
599 }
600
601 /*********************************************************************
602 * Device removal routine
603 *
604 * The detach entry point is called when the driver is being removed.
605 * This routine stops the adapter and deallocates all the resources
606 * that were allocated for driver operation.
607 *
608 * return 0 on success, positive on failure
609 *********************************************************************/
610
611 static int
612 igb_detach(device_t dev)
613 {
614 struct adapter *adapter = device_get_softc(dev);
615 struct ifnet *ifp = adapter->ifp;
616
617 INIT_DEBUGOUT("igb_detach: begin");
618
619 /* Make sure VLANS are not using driver */
620 if (adapter->ifp->if_vlantrunk != NULL) {
621 device_printf(dev,"Vlan in use, detach first\n");
622 return (EBUSY);
623 }
624
625 IGB_CORE_LOCK(adapter);
626 adapter->in_detach = 1;
627 igb_stop(adapter);
628 IGB_CORE_UNLOCK(adapter);
629
630 e1000_phy_hw_reset(&adapter->hw);
631
632 /* Give control back to firmware */
633 igb_release_manageability(adapter);
634 igb_release_hw_control(adapter);
635
636 if (adapter->wol) {
637 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
638 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
639 igb_enable_wakeup(dev);
640 }
641
642 #ifdef IGB_HW_VLAN_SUPPORT
643 /* Unregister VLAN events */
644 if (adapter->vlan_attach != NULL)
645 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
646 if (adapter->vlan_detach != NULL)
647 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
648 #endif
649
650 ether_ifdetach(adapter->ifp);
651
652 callout_drain(&adapter->timer);
653
654 igb_free_pci_resources(adapter);
655 bus_generic_detach(dev);
656 if_free(ifp);
657
658 igb_free_transmit_structures(adapter);
659 igb_free_receive_structures(adapter);
660
661 IGB_CORE_LOCK_DESTROY(adapter);
662
663 return (0);
664 }
665
666 /*********************************************************************
667 *
668 * Shutdown entry point
669 *
670 **********************************************************************/
671
672 static int
673 igb_shutdown(device_t dev)
674 {
675 return igb_suspend(dev);
676 }
677
678 /*
679 * Suspend/resume device methods.
680 */
681 static int
682 igb_suspend(device_t dev)
683 {
684 struct adapter *adapter = device_get_softc(dev);
685
686 IGB_CORE_LOCK(adapter);
687
688 igb_stop(adapter);
689
690 igb_release_manageability(adapter);
691 igb_release_hw_control(adapter);
692
693 if (adapter->wol) {
694 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
695 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
696 igb_enable_wakeup(dev);
697 }
698
699 IGB_CORE_UNLOCK(adapter);
700
701 return bus_generic_suspend(dev);
702 }
703
704 static int
705 igb_resume(device_t dev)
706 {
707 struct adapter *adapter = device_get_softc(dev);
708 struct ifnet *ifp = adapter->ifp;
709
710 IGB_CORE_LOCK(adapter);
711 igb_init_locked(adapter);
712 igb_init_manageability(adapter);
713
714 if ((ifp->if_flags & IFF_UP) &&
715 (ifp->if_drv_flags & IFF_DRV_RUNNING))
716 igb_start(ifp);
717
718 IGB_CORE_UNLOCK(adapter);
719
720 return bus_generic_resume(dev);
721 }
722
723
724 /*********************************************************************
725 * Transmit entry point
726 *
727 * igb_start is called by the stack to initiate a transmit.
728 * The driver will remain in this routine as long as there are
729 * packets to transmit and transmit resources are available.
730 * In case resources are not available stack is notified and
731 * the packet is requeued.
732 **********************************************************************/
733
734 static void
735 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
736 {
737 struct adapter *adapter = ifp->if_softc;
738 struct mbuf *m_head;
739
740 IGB_TX_LOCK_ASSERT(txr);
741
742 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
743 IFF_DRV_RUNNING)
744 return;
745 if (!adapter->link_active)
746 return;
747
748 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
749
750 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
751 if (m_head == NULL)
752 break;
753 /*
754 * Encapsulation can modify our pointer, and or make it
755 * NULL on failure. In that event, we can't requeue.
756 */
757 if (igb_xmit(txr, &m_head)) {
758 if (m_head == NULL)
759 break;
760 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
761 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
762 break;
763 }
764
765 /* Send a copy of the frame to the BPF listener */
766 ETHER_BPF_MTAP(ifp, m_head);
767
768 /* Set timeout in case hardware has problems transmitting. */
769 txr->watchdog_timer = IGB_TX_TIMEOUT;
770 }
771 }
772
773 static void
774 igb_start(struct ifnet *ifp)
775 {
776 struct adapter *adapter = ifp->if_softc;
777 struct tx_ring *txr;
778 u32 queue = 0;
779
780 /*
781 ** This is really just here for testing
782 ** TX multiqueue, ultimately what is
783 ** needed is the flow support in the stack
784 ** and appropriate logic here to deal with
785 ** it. -jfv
786 */
787 if (adapter->num_tx_queues > 1)
788 queue = (curcpu % adapter->num_tx_queues);
789
790 txr = &adapter->tx_rings[queue];
791 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
792 IGB_TX_LOCK(txr);
793 igb_start_locked(txr, ifp);
794 IGB_TX_UNLOCK(txr);
795 }
796 }
797
798 /*********************************************************************
799 * Ioctl entry point
800 *
801 * igb_ioctl is called when the user wants to configure the
802 * interface.
803 *
804 * return 0 on success, positive on failure
805 **********************************************************************/
806
807 static int
808 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
809 {
810 struct adapter *adapter = ifp->if_softc;
811 struct ifreq *ifr = (struct ifreq *)data;
812 struct ifaddr *ifa = (struct ifaddr *)data;
813 int error = 0;
814
815 if (adapter->in_detach)
816 return (error);
817
818 switch (command) {
819 case SIOCSIFADDR:
820 if (ifa->ifa_addr->sa_family == AF_INET) {
821 /*
822 * XXX
823 * Since resetting hardware takes a very long time
824 * and results in link renegotiation we only
825 * initialize the hardware only when it is absolutely
826 * required.
827 */
828 ifp->if_flags |= IFF_UP;
829 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
830 IGB_CORE_LOCK(adapter);
831 igb_init_locked(adapter);
832 IGB_CORE_UNLOCK(adapter);
833 }
834 arp_ifinit(ifp, ifa);
835 } else
836 error = ether_ioctl(ifp, command, data);
837 break;
838 case SIOCSIFMTU:
839 {
840 int max_frame_size;
841
842 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
843
844 IGB_CORE_LOCK(adapter);
845 max_frame_size = 9234;
846 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
847 ETHER_CRC_LEN) {
848 IGB_CORE_UNLOCK(adapter);
849 error = EINVAL;
850 break;
851 }
852
853 ifp->if_mtu = ifr->ifr_mtu;
854 adapter->max_frame_size =
855 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
856 igb_init_locked(adapter);
857 IGB_CORE_UNLOCK(adapter);
858 break;
859 }
860 case SIOCSIFFLAGS:
861 IOCTL_DEBUGOUT("ioctl rcv'd:\
862 SIOCSIFFLAGS (Set Interface Flags)");
863 IGB_CORE_LOCK(adapter);
864 if (ifp->if_flags & IFF_UP) {
865 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
866 if ((ifp->if_flags ^ adapter->if_flags) &
867 (IFF_PROMISC | IFF_ALLMULTI)) {
868 igb_disable_promisc(adapter);
869 igb_set_promisc(adapter);
870 }
871 } else
872 igb_init_locked(adapter);
873 } else
874 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
875 igb_stop(adapter);
876 adapter->if_flags = ifp->if_flags;
877 IGB_CORE_UNLOCK(adapter);
878 break;
879 case SIOCADDMULTI:
880 case SIOCDELMULTI:
881 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
882 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
883 IGB_CORE_LOCK(adapter);
884 igb_disable_intr(adapter);
885 igb_set_multi(adapter);
886 igb_enable_intr(adapter);
887 IGB_CORE_UNLOCK(adapter);
888 }
889 break;
890 case SIOCSIFMEDIA:
891 /* Check SOL/IDER usage */
892 IGB_CORE_LOCK(adapter);
893 if (e1000_check_reset_block(&adapter->hw)) {
894 IGB_CORE_UNLOCK(adapter);
895 device_printf(adapter->dev, "Media change is"
896 " blocked due to SOL/IDER session.\n");
897 break;
898 }
899 IGB_CORE_UNLOCK(adapter);
900 case SIOCGIFMEDIA:
901 IOCTL_DEBUGOUT("ioctl rcv'd: \
902 SIOCxIFMEDIA (Get/Set Interface Media)");
903 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
904 break;
905 case SIOCSIFCAP:
906 {
907 int mask, reinit;
908
909 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
910 reinit = 0;
911 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
912 if (mask & IFCAP_HWCSUM) {
913 ifp->if_capenable ^= IFCAP_HWCSUM;
914 reinit = 1;
915 }
916 if (mask & IFCAP_TSO4) {
917 ifp->if_capenable ^= IFCAP_TSO4;
918 reinit = 1;
919 }
920 if (mask & IFCAP_VLAN_HWTAGGING) {
921 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
922 reinit = 1;
923 }
924 #ifdef IGB_HW_VLAN_SUPPORT
925 if (mask & IFCAP_VLAN_HWFILTER) {
926 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
927 reinit = 1;
928 }
929 #endif
930 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
931 igb_init(adapter);
932 VLAN_CAPABILITIES(ifp);
933 break;
934 }
935
936 #ifdef IGB_TIMESYNC
937 /*
938 ** IOCTL support for Precision Time (IEEE 1588) Support
939 */
940 case IGB_TIMESYNC_READTS:
941 {
942 u32 rx_ctl, tx_ctl;
943 struct igb_tsync_read *tdata;
944
945 tdata = (struct igb_tsync_read *) ifr->ifr_data;
946
947 if (tdata->read_current_time) {
948 getnanotime(&tdata->system_time);
949 tdata->network_time = E1000_READ_REG(&adapter->hw,
950 E1000_SYSTIML);
951 tdata->network_time |=
952 (u64)E1000_READ_REG(&adapter->hw,
953 E1000_SYSTIMH ) << 32;
954 }
955
956 rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
957 tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
958
959 if (rx_ctl & 0x1) {
960 u32 tmp;
961 unsigned char *tmp_cp;
962
963 tdata->rx_valid = 1;
964 tdata->rx_stamp = E1000_READ_REG(&adapter->hw, E1000_RXSTMPL);
965 tdata->rx_stamp |= (u64)E1000_READ_REG(&adapter->hw,
966 E1000_RXSTMPH) << 32;
967
968 tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRL);
969 tmp_cp = (unsigned char *) &tmp;
970 tdata->srcid[0] = tmp_cp[0];
971 tdata->srcid[1] = tmp_cp[1];
972 tdata->srcid[2] = tmp_cp[2];
973 tdata->srcid[3] = tmp_cp[3];
974 tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
975 tmp_cp = (unsigned char *) &tmp;
976 tdata->srcid[4] = tmp_cp[0];
977 tdata->srcid[5] = tmp_cp[1];
978 tdata->seqid = tmp >> 16;
979 tdata->seqid = htons(tdata->seqid);
980 } else
981 tdata->rx_valid = 0;
982
983 if (tx_ctl & 0x1) {
984 tdata->tx_valid = 1;
985 tdata->tx_stamp = E1000_READ_REG(&adapter->hw, E1000_TXSTMPL);
986 tdata->tx_stamp |= (u64) E1000_READ_REG(&adapter->hw,
987 E1000_TXSTMPH) << 32;
988 } else
989 tdata->tx_valid = 0;
990
991 return (0);
992 }
993 #endif /* IGB_TIMESYNC */
994
995 default:
996 error = ether_ioctl(ifp, command, data);
997 break;
998 }
999
1000 return (error);
1001 }
1002
1003 /*********************************************************************
1004 * Watchdog timer:
1005 *
1006 * This routine is called from the local timer every second.
1007 * As long as transmit descriptors are being cleaned the value
1008 * is non-zero and we do nothing. Reaching 0 indicates a tx hang
1009 * and we then reset the device.
1010 *
1011 **********************************************************************/
1012
1013 static void
1014 igb_watchdog(struct adapter *adapter)
1015 {
1016 struct tx_ring *txr = adapter->tx_rings;
1017 bool tx_hang = FALSE;
1018
1019 IGB_CORE_LOCK_ASSERT(adapter);
1020
1021 /*
1022 ** The timer is set to 5 every time start() queues a packet.
1023 ** Then txeof keeps resetting it as long as it cleans at
1024 ** least one descriptor.
1025 ** Finally, anytime all descriptors are clean the timer is
1026 ** set to 0.
1027 **
1028 ** With TX Multiqueue we need to check every queue's timer,
1029 ** if any time out we do the reset.
1030 */
1031 for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1032 IGB_TX_LOCK(txr);
1033 if (txr->watchdog_timer == 0 ||
1034 (--txr->watchdog_timer)) {
1035 IGB_TX_UNLOCK(txr);
1036 continue;
1037 } else {
1038 tx_hang = TRUE;
1039 IGB_TX_UNLOCK(txr);
1040 break;
1041 }
1042 }
1043 if (tx_hang == FALSE)
1044 return;
1045
1046 /* If we are in this routine because of pause frames, then
1047 * don't reset the hardware.
1048 */
1049 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1050 E1000_STATUS_TXOFF) {
1051 txr = adapter->tx_rings; /* reset pointer */
1052 for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1053 IGB_TX_LOCK(txr);
1054 txr->watchdog_timer = IGB_TX_TIMEOUT;
1055 IGB_TX_UNLOCK(txr);
1056 }
1057 return;
1058 }
1059
1060 if (e1000_check_for_link(&adapter->hw) == 0)
1061 device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1062
1063 for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1064 device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1065 i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1066 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1067 device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1068 " Next Desc to Clean = %d\n", i, txr->tx_avail,
1069 txr->next_to_clean);
1070 }
1071
1072 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1073 adapter->watchdog_events++;
1074
1075 igb_init_locked(adapter);
1076 }
1077
1078 /*********************************************************************
1079 * Init entry point
1080 *
1081 * This routine is used in two ways. It is used by the stack as
1082 * init entry point in network interface structure. It is also used
1083 * by the driver as a hw/sw initialization routine to get to a
1084 * consistent state.
1085 *
1086 * return 0 on success, positive on failure
1087 **********************************************************************/
1088
1089 static void
1090 igb_init_locked(struct adapter *adapter)
1091 {
1092 struct rx_ring *rxr = adapter->rx_rings;
1093 struct tx_ring *txr = adapter->tx_rings;
1094 struct ifnet *ifp = adapter->ifp;
1095 device_t dev = adapter->dev;
1096 u32 pba = 0;
1097
1098 INIT_DEBUGOUT("igb_init: begin");
1099
1100 IGB_CORE_LOCK_ASSERT(adapter);
1101
1102 igb_stop(adapter);
1103
1104 /*
1105 * Packet Buffer Allocation (PBA)
1106 * Writing PBA sets the receive portion of the buffer
1107 * the remainder is used for the transmit buffer.
1108 */
1109 if (adapter->hw.mac.type == e1000_82575) {
1110 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1111 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1112 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1113 }
1114
1115 /* Get the latest mac address, User can use a LAA */
1116 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1117 ETHER_ADDR_LEN);
1118
1119 /* Put the address into the Receive Address Array */
1120 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1121
1122 /* Initialize the hardware */
1123 if (igb_hardware_init(adapter)) {
1124 device_printf(dev, "Unable to initialize the hardware\n");
1125 return;
1126 }
1127 igb_update_link_status(adapter);
1128
1129 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1130
1131 #ifndef IGB_HW_VLAN_SUPPORT
1132 /* Vlan's enabled but HW Filtering off */
1133 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1134 u32 ctrl;
1135 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1136 ctrl |= E1000_CTRL_VME;
1137 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1138 }
1139 #endif
1140
1141 /* Set hardware offload abilities */
1142 ifp->if_hwassist = 0;
1143 if (ifp->if_capenable & IFCAP_TXCSUM)
1144 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1145 if (ifp->if_capenable & IFCAP_TSO4)
1146 ifp->if_hwassist |= CSUM_TSO;
1147
1148 /* Configure for OS presence */
1149 igb_init_manageability(adapter);
1150
1151 /* Prepare transmit descriptors and buffers */
1152 igb_setup_transmit_structures(adapter);
1153 igb_initialize_transmit_units(adapter);
1154
1155 /* Setup Multicast table */
1156 igb_set_multi(adapter);
1157
1158 /* Prepare receive descriptors and buffers */
1159 if (igb_setup_receive_structures(adapter)) {
1160 device_printf(dev, "Could not setup receive structures\n");
1161 igb_stop(adapter);
1162 return;
1163 }
1164 igb_initialize_receive_units(adapter);
1165
1166 /* Don't lose promiscuous settings */
1167 igb_set_promisc(adapter);
1168
1169 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1170 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1171
1172 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1173 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1174
1175 if (adapter->msix > 1) /* Set up queue routing */
1176 igb_configure_queues(adapter);
1177
1178 /* Set default RX interrupt moderation */
1179 for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
1180 E1000_WRITE_REG(&adapter->hw,
1181 E1000_EITR(rxr->msix), igb_ave_latency);
1182 rxr->eitr_setting = igb_ave_latency;
1183 }
1184
1185 /* Set TX interrupt rate & reset TX watchdog */
1186 for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1187 E1000_WRITE_REG(&adapter->hw,
1188 E1000_EITR(txr->msix), igb_ave_latency);
1189 txr->watchdog_timer = FALSE;
1190 }
1191
1192 /* this clears any pending interrupts */
1193 E1000_READ_REG(&adapter->hw, E1000_ICR);
1194 igb_enable_intr(adapter);
1195 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1196
1197 #ifdef IGB_TIMESYNC
1198 /* Initialize IEEE 1588 Time sync if available */
1199 if (adapter->hw.mac.type == e1000_82576)
1200 igb_tsync_init(adapter);
1201 #endif
1202
1203 /* Don't reset the phy next time init gets called */
1204 adapter->hw.phy.reset_disable = TRUE;
1205 }
1206
1207 static void
1208 igb_init(void *arg)
1209 {
1210 struct adapter *adapter = arg;
1211
1212 IGB_CORE_LOCK(adapter);
1213 igb_init_locked(adapter);
1214 IGB_CORE_UNLOCK(adapter);
1215 }
1216
1217
1218 static void
1219 igb_handle_link(void *context, int pending)
1220 {
1221 struct adapter *adapter = context;
1222 struct ifnet *ifp;
1223
1224 ifp = adapter->ifp;
1225
1226 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1227 return;
1228
1229 IGB_CORE_LOCK(adapter);
1230 callout_stop(&adapter->timer);
1231 igb_update_link_status(adapter);
1232 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1233 IGB_CORE_UNLOCK(adapter);
1234 }
1235
1236 static void
1237 igb_handle_rxtx(void *context, int pending)
1238 {
1239 struct adapter *adapter = context;
1240 struct tx_ring *txr = adapter->tx_rings;
1241 struct rx_ring *rxr = adapter->rx_rings;
1242 struct ifnet *ifp;
1243
1244 ifp = adapter->ifp;
1245
1246 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1247 if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1248 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1249 IGB_TX_LOCK(txr);
1250 igb_txeof(txr);
1251
1252 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1253 igb_start_locked(txr, ifp);
1254 IGB_TX_UNLOCK(txr);
1255 }
1256
1257 igb_enable_intr(adapter);
1258 }
1259
1260 static void
1261 igb_handle_rx(void *context, int pending)
1262 {
1263 struct rx_ring *rxr = context;
1264 struct adapter *adapter = rxr->adapter;
1265 struct ifnet *ifp = adapter->ifp;
1266
1267 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1268 if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1269 /* More to clean, schedule another task */
1270 taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1271
1272 }
1273
1274 static void
1275 igb_handle_tx(void *context, int pending)
1276 {
1277 struct tx_ring *txr = context;
1278 struct adapter *adapter = txr->adapter;
1279 struct ifnet *ifp = adapter->ifp;
1280
1281 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1282 IGB_TX_LOCK(txr);
1283 igb_txeof(txr);
1284 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1285 igb_start_locked(txr, ifp);
1286 IGB_TX_UNLOCK(txr);
1287 }
1288 }
1289
1290
1291 /*********************************************************************
1292 *
1293 * MSI/Legacy Deferred
1294 * Interrupt Service routine
1295 *
1296 *********************************************************************/
1297 static int
1298 igb_irq_fast(void *arg)
1299 {
1300 struct adapter *adapter = arg;
1301 uint32_t reg_icr;
1302
1303
1304 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1305
1306 /* Hot eject? */
1307 if (reg_icr == 0xffffffff)
1308 return FILTER_STRAY;
1309
1310 /* Definitely not our interrupt. */
1311 if (reg_icr == 0x0)
1312 return FILTER_STRAY;
1313
1314 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1315 return FILTER_STRAY;
1316
1317 /*
1318 * Mask interrupts until the taskqueue is finished running. This is
1319 * cheap, just assume that it is needed. This also works around the
1320 * MSI message reordering errata on certain systems.
1321 */
1322 igb_disable_intr(adapter);
1323 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1324
1325 /* Link status change */
1326 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1327 adapter->hw.mac.get_link_status = 1;
1328 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1329 }
1330
1331 if (reg_icr & E1000_ICR_RXO)
1332 adapter->rx_overruns++;
1333 return FILTER_HANDLED;
1334 }
1335
1336
1337 /*********************************************************************
1338 *
1339 * MSIX TX Interrupt Service routine
1340 *
1341 **********************************************************************/
1342
1343 static void
1344 igb_msix_tx(void *arg)
1345 {
1346 struct tx_ring *txr = arg;
1347 struct adapter *adapter = txr->adapter;
1348 struct ifnet *ifp = adapter->ifp;
1349
1350 ++txr->tx_irq;
1351 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1352 IGB_TX_LOCK(txr);
1353 igb_txeof(txr);
1354 IGB_TX_UNLOCK(txr);
1355 taskqueue_enqueue(adapter->tq, &txr->tx_task);
1356 }
1357 /* Reenable this interrupt */
1358 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1359 return;
1360 }
1361
1362 /*********************************************************************
1363 *
1364 * MSIX RX Interrupt Service routine
1365 *
1366 **********************************************************************/
1367
1368 static void
1369 igb_msix_rx(void *arg)
1370 {
1371 struct rx_ring *rxr = arg;
1372 struct adapter *adapter = rxr->adapter;
1373 u32 more, loop = 5;
1374
1375 ++rxr->rx_irq;
1376 do {
1377 more = igb_rxeof(rxr, adapter->rx_process_limit);
1378 } while (loop-- || more != 0);
1379
1380 taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1381
1382 /* Update interrupt rate */
1383 if (igb_enable_aim == TRUE)
1384 igb_update_aim(rxr);
1385
1386 /* Reenable this interrupt */
1387 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1388 return;
1389 }
1390
1391
1392 /*
1393 ** Routine to adjust the RX EITR value based on traffic,
1394 ** its a simple three state model, but seems to help.
1395 **
1396 ** Note that the three EITR values are tuneable using
1397 ** sysctl in real time. The feature can be effectively
1398 ** nullified by setting them equal.
1399 */
1400 #define BULK_THRESHOLD 10000
1401 #define AVE_THRESHOLD 1600
1402
1403 static void
1404 igb_update_aim(struct rx_ring *rxr)
1405 {
1406 struct adapter *adapter = rxr->adapter;
1407 u32 olditr, newitr;
1408
1409 /* Update interrupt moderation based on traffic */
1410 olditr = rxr->eitr_setting;
1411 newitr = olditr;
1412
1413 /* Idle, don't change setting */
1414 if (rxr->bytes == 0)
1415 return;
1416
1417 if (olditr == igb_low_latency) {
1418 if (rxr->bytes > AVE_THRESHOLD)
1419 newitr = igb_ave_latency;
1420 } else if (olditr == igb_ave_latency) {
1421 if (rxr->bytes < AVE_THRESHOLD)
1422 newitr = igb_low_latency;
1423 else if (rxr->bytes > BULK_THRESHOLD)
1424 newitr = igb_bulk_latency;
1425 } else if (olditr == igb_bulk_latency) {
1426 if (rxr->bytes < BULK_THRESHOLD)
1427 newitr = igb_ave_latency;
1428 }
1429
1430 if (olditr != newitr) {
1431 /* Change interrupt rate */
1432 rxr->eitr_setting = newitr;
1433 E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me),
1434 newitr | (newitr << 16));
1435 }
1436
1437 rxr->bytes = 0;
1438 return;
1439 }
1440
1441
1442 /*********************************************************************
1443 *
1444 * MSIX Link Interrupt Service routine
1445 *
1446 **********************************************************************/
1447
1448 static void
1449 igb_msix_link(void *arg)
1450 {
1451 struct adapter *adapter = arg;
1452 u32 icr;
1453
1454 ++adapter->link_irq;
1455 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1456 if (!(icr & E1000_ICR_LSC))
1457 goto spurious;
1458 adapter->hw.mac.get_link_status = 1;
1459 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1460
1461 spurious:
1462 /* Rearm */
1463 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1464 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1465 return;
1466 }
1467
1468
1469 /*********************************************************************
1470 *
1471 * Media Ioctl callback
1472 *
1473 * This routine is called whenever the user queries the status of
1474 * the interface using ifconfig.
1475 *
1476 **********************************************************************/
1477 static void
1478 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1479 {
1480 struct adapter *adapter = ifp->if_softc;
1481 u_char fiber_type = IFM_1000_SX;
1482
1483 INIT_DEBUGOUT("igb_media_status: begin");
1484
1485 IGB_CORE_LOCK(adapter);
1486 igb_update_link_status(adapter);
1487
1488 ifmr->ifm_status = IFM_AVALID;
1489 ifmr->ifm_active = IFM_ETHER;
1490
1491 if (!adapter->link_active) {
1492 IGB_CORE_UNLOCK(adapter);
1493 return;
1494 }
1495
1496 ifmr->ifm_status |= IFM_ACTIVE;
1497
1498 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1499 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1500 ifmr->ifm_active |= fiber_type | IFM_FDX;
1501 else {
1502 switch (adapter->link_speed) {
1503 case 10:
1504 ifmr->ifm_active |= IFM_10_T;
1505 break;
1506 case 100:
1507 ifmr->ifm_active |= IFM_100_TX;
1508 break;
1509 case 1000:
1510 ifmr->ifm_active |= IFM_1000_T;
1511 break;
1512 }
1513 if (adapter->link_duplex == FULL_DUPLEX)
1514 ifmr->ifm_active |= IFM_FDX;
1515 else
1516 ifmr->ifm_active |= IFM_HDX;
1517 }
1518 IGB_CORE_UNLOCK(adapter);
1519 }
1520
1521 /*********************************************************************
1522 *
1523 * Media Ioctl callback
1524 *
1525 * This routine is called when the user changes speed/duplex using
1526 * media/mediopt option with ifconfig.
1527 *
1528 **********************************************************************/
1529 static int
1530 igb_media_change(struct ifnet *ifp)
1531 {
1532 struct adapter *adapter = ifp->if_softc;
1533 struct ifmedia *ifm = &adapter->media;
1534
1535 INIT_DEBUGOUT("igb_media_change: begin");
1536
1537 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1538 return (EINVAL);
1539
1540 IGB_CORE_LOCK(adapter);
1541 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1542 case IFM_AUTO:
1543 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1544 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1545 break;
1546 case IFM_1000_LX:
1547 case IFM_1000_SX:
1548 case IFM_1000_T:
1549 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1550 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1551 break;
1552 case IFM_100_TX:
1553 adapter->hw.mac.autoneg = FALSE;
1554 adapter->hw.phy.autoneg_advertised = 0;
1555 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1556 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1557 else
1558 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1559 break;
1560 case IFM_10_T:
1561 adapter->hw.mac.autoneg = FALSE;
1562 adapter->hw.phy.autoneg_advertised = 0;
1563 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1564 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1565 else
1566 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1567 break;
1568 default:
1569 device_printf(adapter->dev, "Unsupported media type\n");
1570 }
1571
1572 /* As the speed/duplex settings my have changed we need to
1573 * reset the PHY.
1574 */
1575 adapter->hw.phy.reset_disable = FALSE;
1576
1577 igb_init_locked(adapter);
1578 IGB_CORE_UNLOCK(adapter);
1579
1580 return (0);
1581 }
1582
1583
1584 /*********************************************************************
1585 *
1586 * This routine maps the mbufs to Advanced TX descriptors.
1587 * used by the 82575 adapter.
1588 *
1589 **********************************************************************/
1590
1591 static int
1592 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1593 {
1594 struct adapter *adapter = txr->adapter;
1595 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1596 bus_dmamap_t map;
1597 struct igb_buffer *tx_buffer, *tx_buffer_mapped;
1598 union e1000_adv_tx_desc *txd = NULL;
1599 struct mbuf *m_head;
1600 u32 olinfo_status = 0, cmd_type_len = 0;
1601 int nsegs, i, j, error, first, last = 0;
1602 u32 hdrlen = 0, offload = 0;
1603
1604 m_head = *m_headp;
1605
1606
1607 /* Set basic descriptor constants */
1608 cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1609 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1610 if (m_head->m_flags & M_VLANTAG)
1611 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1612
1613 /*
1614 * Force a cleanup if number of TX descriptors
1615 * available hits the threshold
1616 */
1617 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1618 igb_txeof(txr);
1619 /* Now do we at least have a minimal? */
1620 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1621 txr->no_desc_avail++;
1622 return (ENOBUFS);
1623 }
1624 }
1625
1626 /*
1627 * Map the packet for DMA.
1628 *
1629 * Capture the first descriptor index,
1630 * this descriptor will have the index
1631 * of the EOP which is the only one that
1632 * now gets a DONE bit writeback.
1633 */
1634 first = txr->next_avail_desc;
1635 tx_buffer = &txr->tx_buffers[first];
1636 tx_buffer_mapped = tx_buffer;
1637 map = tx_buffer->map;
1638
1639 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1640 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1641
1642 if (error == EFBIG) {
1643 struct mbuf *m;
1644
1645 m = m_defrag(*m_headp, M_DONTWAIT);
1646 if (m == NULL) {
1647 adapter->mbuf_alloc_failed++;
1648 m_freem(*m_headp);
1649 *m_headp = NULL;
1650 return (ENOBUFS);
1651 }
1652 *m_headp = m;
1653
1654 /* Try it again */
1655 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1656 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1657
1658 if (error == ENOMEM) {
1659 adapter->no_tx_dma_setup++;
1660 return (error);
1661 } else if (error != 0) {
1662 adapter->no_tx_dma_setup++;
1663 m_freem(*m_headp);
1664 *m_headp = NULL;
1665 return (error);
1666 }
1667 } else if (error == ENOMEM) {
1668 adapter->no_tx_dma_setup++;
1669 return (error);
1670 } else if (error != 0) {
1671 adapter->no_tx_dma_setup++;
1672 m_freem(*m_headp);
1673 *m_headp = NULL;
1674 return (error);
1675 }
1676
1677 /* Check again to be sure we have enough descriptors */
1678 if (nsegs > (txr->tx_avail - 2)) {
1679 txr->no_desc_avail++;
1680 bus_dmamap_unload(txr->txtag, map);
1681 return (ENOBUFS);
1682 }
1683 m_head = *m_headp;
1684
1685 /*
1686 * Set up the context descriptor:
1687 * used when any hardware offload is done.
1688 * This includes CSUM, VLAN, and TSO. It
1689 * will use the first descriptor.
1690 */
1691 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1692 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1693 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1694 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1695 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1696 } else
1697 return (ENXIO);
1698 } else
1699 /* Do all other context descriptor setup */
1700 offload = igb_tx_ctx_setup(txr, m_head);
1701 if (offload == TRUE)
1702 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1703 #ifdef IGB_TIMESYNC
1704 if (offload == IGB_TIMESTAMP)
1705 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1706 #endif
1707 /* Calculate payload length */
1708 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1709 << E1000_ADVTXD_PAYLEN_SHIFT);
1710
1711 /* Set up our transmit descriptors */
1712 i = txr->next_avail_desc;
1713 for (j = 0; j < nsegs; j++) {
1714 bus_size_t seg_len;
1715 bus_addr_t seg_addr;
1716
1717 tx_buffer = &txr->tx_buffers[i];
1718 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1719 seg_addr = segs[j].ds_addr;
1720 seg_len = segs[j].ds_len;
1721
1722 txd->read.buffer_addr = htole64(seg_addr);
1723 txd->read.cmd_type_len = htole32(
1724 adapter->txd_cmd | cmd_type_len | seg_len);
1725 txd->read.olinfo_status = htole32(olinfo_status);
1726 last = i;
1727 if (++i == adapter->num_tx_desc)
1728 i = 0;
1729 tx_buffer->m_head = NULL;
1730 tx_buffer->next_eop = -1;
1731 }
1732
1733 txr->next_avail_desc = i;
1734 txr->tx_avail -= nsegs;
1735
1736 tx_buffer->m_head = m_head;
1737 tx_buffer_mapped->map = tx_buffer->map;
1738 tx_buffer->map = map;
1739 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1740
1741 /*
1742 * Last Descriptor of Packet
1743 * needs End Of Packet (EOP)
1744 * and Report Status (RS)
1745 */
1746 txd->read.cmd_type_len |=
1747 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1748 /*
1749 * Keep track in the first buffer which
1750 * descriptor will be written back
1751 */
1752 tx_buffer = &txr->tx_buffers[first];
1753 tx_buffer->next_eop = last;
1754
1755 /*
1756 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1757 * that this frame is available to transmit.
1758 */
1759 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1760 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1761 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1762 ++txr->tx_packets;
1763
1764 return (0);
1765
1766 }
1767
1768 static void
1769 igb_set_promisc(struct adapter *adapter)
1770 {
1771 struct ifnet *ifp = adapter->ifp;
1772 uint32_t reg_rctl;
1773
1774 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1775
1776 if (ifp->if_flags & IFF_PROMISC) {
1777 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1778 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1779 } else if (ifp->if_flags & IFF_ALLMULTI) {
1780 reg_rctl |= E1000_RCTL_MPE;
1781 reg_rctl &= ~E1000_RCTL_UPE;
1782 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1783 }
1784 }
1785
1786 static void
1787 igb_disable_promisc(struct adapter *adapter)
1788 {
1789 uint32_t reg_rctl;
1790
1791 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1792
1793 reg_rctl &= (~E1000_RCTL_UPE);
1794 reg_rctl &= (~E1000_RCTL_MPE);
1795 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1796 }
1797
1798
1799 /*********************************************************************
1800 * Multicast Update
1801 *
1802 * This routine is called whenever multicast address list is updated.
1803 *
1804 **********************************************************************/
1805
1806 static void
1807 igb_set_multi(struct adapter *adapter)
1808 {
1809 struct ifnet *ifp = adapter->ifp;
1810 struct ifmultiaddr *ifma;
1811 u32 reg_rctl = 0;
1812 u8 mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1813
1814 int mcnt = 0;
1815
1816 IOCTL_DEBUGOUT("igb_set_multi: begin");
1817
1818 IF_ADDR_LOCK(ifp);
1819 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1820 if (ifma->ifma_addr->sa_family != AF_LINK)
1821 continue;
1822
1823 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1824 break;
1825
1826 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1827 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1828 mcnt++;
1829 }
1830 IF_ADDR_UNLOCK(ifp);
1831
1832 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1833 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1834 reg_rctl |= E1000_RCTL_MPE;
1835 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1836 } else
1837 e1000_update_mc_addr_list(&adapter->hw, mta,
1838 mcnt, 1, adapter->hw.mac.rar_entry_count);
1839 }
1840
1841
1842 /*********************************************************************
1843 * Timer routine
1844 *
1845 * This routine checks for link status and updates statistics.
1846 *
1847 **********************************************************************/
1848
1849 static void
1850 igb_local_timer(void *arg)
1851 {
1852 struct adapter *adapter = arg;
1853 struct ifnet *ifp = adapter->ifp;
1854
1855 IGB_CORE_LOCK_ASSERT(adapter);
1856
1857 igb_update_link_status(adapter);
1858 igb_update_stats_counters(adapter);
1859
1860 if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1861 igb_print_hw_stats(adapter);
1862
1863 /*
1864 * Each second we check the watchdog to
1865 * protect against hardware hangs.
1866 */
1867 igb_watchdog(adapter);
1868
1869 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1870
1871 }
1872
1873 static void
1874 igb_update_link_status(struct adapter *adapter)
1875 {
1876 struct e1000_hw *hw = &adapter->hw;
1877 struct ifnet *ifp = adapter->ifp;
1878 device_t dev = adapter->dev;
1879 struct tx_ring *txr = adapter->tx_rings;
1880 u32 link_check = 0;
1881
1882 /* Get the cached link value or read for real */
1883 switch (hw->phy.media_type) {
1884 case e1000_media_type_copper:
1885 if (hw->mac.get_link_status) {
1886 /* Do the work to read phy */
1887 e1000_check_for_link(hw);
1888 link_check = !hw->mac.get_link_status;
1889 } else
1890 link_check = TRUE;
1891 break;
1892 case e1000_media_type_fiber:
1893 e1000_check_for_link(hw);
1894 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1895 E1000_STATUS_LU);
1896 break;
1897 case e1000_media_type_internal_serdes:
1898 e1000_check_for_link(hw);
1899 link_check = adapter->hw.mac.serdes_has_link;
1900 break;
1901 default:
1902 case e1000_media_type_unknown:
1903 break;
1904 }
1905
1906 /* Now we check if a transition has happened */
1907 if (link_check && (adapter->link_active == 0)) {
1908 e1000_get_speed_and_duplex(&adapter->hw,
1909 &adapter->link_speed, &adapter->link_duplex);
1910 if (bootverbose)
1911 device_printf(dev, "Link is up %d Mbps %s\n",
1912 adapter->link_speed,
1913 ((adapter->link_duplex == FULL_DUPLEX) ?
1914 "Full Duplex" : "Half Duplex"));
1915 adapter->link_active = 1;
1916 ifp->if_baudrate = adapter->link_speed * 1000000;
1917 if_link_state_change(ifp, LINK_STATE_UP);
1918 } else if (!link_check && (adapter->link_active == 1)) {
1919 ifp->if_baudrate = adapter->link_speed = 0;
1920 adapter->link_duplex = 0;
1921 if (bootverbose)
1922 device_printf(dev, "Link is Down\n");
1923 adapter->link_active = 0;
1924 if_link_state_change(ifp, LINK_STATE_DOWN);
1925 /* Turn off watchdogs */
1926 for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
1927 txr->watchdog_timer = FALSE;
1928 }
1929 }
1930
1931 /*********************************************************************
1932 *
1933 * This routine disables all traffic on the adapter by issuing a
1934 * global reset on the MAC and deallocates TX/RX buffers.
1935 *
1936 **********************************************************************/
1937
1938 static void
1939 igb_stop(void *arg)
1940 {
1941 struct adapter *adapter = arg;
1942 struct ifnet *ifp = adapter->ifp;
1943
1944 IGB_CORE_LOCK_ASSERT(adapter);
1945
1946 INIT_DEBUGOUT("igb_stop: begin");
1947
1948 igb_disable_intr(adapter);
1949
1950 callout_stop(&adapter->timer);
1951
1952 /* Tell the stack that the interface is no longer active */
1953 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1954
1955 #ifdef IGB_TIMESYNC
1956 /* Disable IEEE 1588 Time sync */
1957 if (adapter->hw.mac.type == e1000_82576)
1958 igb_tsync_disable(adapter);
1959 #endif
1960
1961 e1000_reset_hw(&adapter->hw);
1962 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1963 }
1964
1965
1966 /*********************************************************************
1967 *
1968 * Determine hardware revision.
1969 *
1970 **********************************************************************/
1971 static void
1972 igb_identify_hardware(struct adapter *adapter)
1973 {
1974 device_t dev = adapter->dev;
1975
1976 /* Make sure our PCI config space has the necessary stuff set */
1977 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1978 if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1979 (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1980 device_printf(dev, "Memory Access and/or Bus Master bits "
1981 "were not set!\n");
1982 adapter->hw.bus.pci_cmd_word |=
1983 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1984 pci_write_config(dev, PCIR_COMMAND,
1985 adapter->hw.bus.pci_cmd_word, 2);
1986 }
1987
1988 /* Save off the information about this board */
1989 adapter->hw.vendor_id = pci_get_vendor(dev);
1990 adapter->hw.device_id = pci_get_device(dev);
1991 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1992 adapter->hw.subsystem_vendor_id =
1993 pci_read_config(dev, PCIR_SUBVEND_0, 2);
1994 adapter->hw.subsystem_device_id =
1995 pci_read_config(dev, PCIR_SUBDEV_0, 2);
1996
1997 /* Do Shared Code Init and Setup */
1998 if (e1000_set_mac_type(&adapter->hw)) {
1999 device_printf(dev, "Setup init failure\n");
2000 return;
2001 }
2002 }
2003
2004 static int
2005 igb_allocate_pci_resources(struct adapter *adapter)
2006 {
2007 device_t dev = adapter->dev;
2008 int rid, error = 0;
2009
2010 rid = PCIR_BAR(0);
2011 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2012 &rid, RF_ACTIVE);
2013 if (adapter->pci_mem == NULL) {
2014 device_printf(dev, "Unable to allocate bus resource: memory\n");
2015 return (ENXIO);
2016 }
2017 adapter->osdep.mem_bus_space_tag =
2018 rman_get_bustag(adapter->pci_mem);
2019 adapter->osdep.mem_bus_space_handle =
2020 rman_get_bushandle(adapter->pci_mem);
2021 adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2022
2023 /*
2024 ** Init the resource arrays
2025 */
2026 for (int i = 0; i < IGB_MSIX_VEC; i++) {
2027 adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
2028 adapter->tag[i] = NULL;
2029 adapter->res[i] = NULL;
2030 }
2031
2032 adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
2033 adapter->num_rx_queues = 1;
2034
2035 /* This will setup either MSI/X or MSI */
2036 adapter->msix = igb_setup_msix(adapter);
2037
2038 adapter->hw.back = &adapter->osdep;
2039
2040 return (error);
2041 }
2042
2043 /*********************************************************************
2044 *
2045 * Setup the Legacy or MSI Interrupt handler
2046 *
2047 **********************************************************************/
2048 static int
2049 igb_allocate_legacy(struct adapter *adapter)
2050 {
2051 device_t dev = adapter->dev;
2052 int error;
2053
2054 /* Turn off all interrupts */
2055 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2056
2057 /* Legacy RID at 0 */
2058 if (adapter->msix == 0)
2059 adapter->rid[0] = 0;
2060
2061 /* We allocate a single interrupt resource */
2062 adapter->res[0] = bus_alloc_resource_any(dev,
2063 SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
2064 if (adapter->res[0] == NULL) {
2065 device_printf(dev, "Unable to allocate bus resource: "
2066 "interrupt\n");
2067 return (ENXIO);
2068 }
2069
2070 /*
2071 * Try allocating a fast interrupt and the associated deferred
2072 * processing contexts.
2073 */
2074 TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2075 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2076 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2077 taskqueue_thread_enqueue, &adapter->tq);
2078 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2079 device_get_nameunit(adapter->dev));
2080 if ((error = bus_setup_intr(dev, adapter->res[0],
2081 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter,
2082 &adapter->tag[0])) != 0) {
2083 device_printf(dev, "Failed to register fast interrupt "
2084 "handler: %d\n", error);
2085 taskqueue_free(adapter->tq);
2086 adapter->tq = NULL;
2087 return (error);
2088 }
2089
2090 return (0);
2091 }
2092
2093
2094 /*********************************************************************
2095 *
2096 * Setup the MSIX Interrupt handlers:
2097 *
2098 **********************************************************************/
2099 static int
2100 igb_allocate_msix(struct adapter *adapter)
2101 {
2102 device_t dev = adapter->dev;
2103 struct tx_ring *txr = adapter->tx_rings;
2104 struct rx_ring *rxr = adapter->rx_rings;
2105 int error, vector = 0;
2106
2107 /*
2108 * Setup the interrupt handlers
2109 */
2110
2111 /* TX Setup */
2112 for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
2113 adapter->res[vector] = bus_alloc_resource_any(dev,
2114 SYS_RES_IRQ, &adapter->rid[vector],
2115 RF_SHAREABLE | RF_ACTIVE);
2116 if (adapter->res[vector] == NULL) {
2117 device_printf(dev,
2118 "Unable to allocate bus resource: "
2119 "MSIX TX Interrupt\n");
2120 return (ENXIO);
2121 }
2122 error = bus_setup_intr(dev, adapter->res[vector],
2123 INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_tx,
2124 txr, &adapter->tag[vector]);
2125 if (error) {
2126 adapter->res[vector] = NULL;
2127 device_printf(dev, "Failed to register TX handler");
2128 return (error);
2129 }
2130 /* Make tasklet for deferred handling - one per queue */
2131 TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2132 if (adapter->hw.mac.type == e1000_82575) {
2133 txr->eims = E1000_EICR_TX_QUEUE0 << i;
2134 /* MSIXBM registers start at 0 */
2135 txr->msix = adapter->rid[vector] - 1;
2136 } else {
2137 txr->eims = 1 << vector;
2138 txr->msix = vector;
2139 }
2140 }
2141
2142 /* RX Setup */
2143 for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
2144 adapter->res[vector] = bus_alloc_resource_any(dev,
2145 SYS_RES_IRQ, &adapter->rid[vector],
2146 RF_SHAREABLE | RF_ACTIVE);
2147 if (adapter->res[vector] == NULL) {
2148 device_printf(dev,
2149 "Unable to allocate bus resource: "
2150 "MSIX RX Interrupt\n");
2151 return (ENXIO);
2152 }
2153 error = bus_setup_intr(dev, adapter->res[vector],
2154 INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_rx,
2155 rxr, &adapter->tag[vector]);
2156 if (error) {
2157 adapter->res[vector] = NULL;
2158 device_printf(dev, "Failed to register RX handler");
2159 return (error);
2160 }
2161 TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2162 if (adapter->hw.mac.type == e1000_82575) {
2163 rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2164 rxr->msix = adapter->rid[vector] - 1;
2165 } else {
2166 rxr->eims = 1 << vector;
2167 rxr->msix = vector;
2168 }
2169 }
2170
2171 /* And Link */
2172 adapter->res[vector] = bus_alloc_resource_any(dev,
2173 SYS_RES_IRQ, &adapter->rid[vector],
2174 RF_SHAREABLE | RF_ACTIVE);
2175 if (adapter->res[vector] == NULL) {
2176 device_printf(dev,
2177 "Unable to allocate bus resource: "
2178 "MSIX Link Interrupt\n");
2179 return (ENXIO);
2180 }
2181 if ((error = bus_setup_intr(dev, adapter->res[vector],
2182 INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link,
2183 adapter, &adapter->tag[vector])) != 0) {
2184 device_printf(dev, "Failed to register Link handler");
2185 return (error);
2186 }
2187 if (adapter->hw.mac.type == e1000_82575)
2188 adapter->linkvec = adapter->rid[vector] - 1;
2189 else
2190 adapter->linkvec = vector;
2191
2192 /* Make tasklet for deferred link interrupt handling */
2193 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2194
2195 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2196 taskqueue_thread_enqueue, &adapter->tq);
2197 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2198 device_get_nameunit(adapter->dev));
2199
2200 return (0);
2201 }
2202
2203 static void
2204 igb_configure_queues(struct adapter *adapter)
2205 {
2206 struct e1000_hw *hw = &adapter->hw;
2207 struct tx_ring *txr;
2208 struct rx_ring *rxr;
2209
2210 /* Turn on MSIX */
2211 /*
2212 ** 82576 uses IVARs to route MSI/X
2213 ** interrupts, its not very intuitive,
2214 ** study the code carefully :)
2215 */
2216 if (adapter->hw.mac.type == e1000_82576) {
2217 u32 ivar = 0;
2218 /* First turn on the capability */
2219 E1000_WRITE_REG(hw, E1000_GPIE,
2220 E1000_GPIE_MSIX_MODE |
2221 E1000_GPIE_EIAME |
2222 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2223 /* RX */
2224 for (int i = 0; i < adapter->num_rx_queues; i++) {
2225 u32 index = i & 0x7; /* Each IVAR has two entries */
2226 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2227 rxr = &adapter->rx_rings[i];
2228 if (i < 8) {
2229 ivar &= 0xFFFFFF00;
2230 ivar |= rxr->msix | E1000_IVAR_VALID;
2231 } else {
2232 ivar &= 0xFF00FFFF;
2233 ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2234 }
2235 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2236 adapter->eims_mask |= rxr->eims;
2237 }
2238 /* TX */
2239 for (int i = 0; i < adapter->num_tx_queues; i++) {
2240 u32 index = i & 0x7; /* Each IVAR has two entries */
2241 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2242 txr = &adapter->tx_rings[i];
2243 if (i < 8) {
2244 ivar &= 0xFFFF00FF;
2245 ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2246 } else {
2247 ivar &= 0x00FFFFFF;
2248 ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2249 }
2250 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2251 adapter->eims_mask |= txr->eims;
2252 }
2253
2254 /* And for the link interrupt */
2255 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2256 adapter->link_mask = 1 << adapter->linkvec;
2257 adapter->eims_mask |= adapter->link_mask;
2258 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2259 } else
2260 { /* 82575 */
2261 int tmp;
2262
2263 /* enable MSI-X PBA support*/
2264 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2265 tmp |= E1000_CTRL_EXT_PBA_CLR;
2266 /* Auto-Mask interrupts upon ICR read. */
2267 tmp |= E1000_CTRL_EXT_EIAME;
2268 tmp |= E1000_CTRL_EXT_IRCA;
2269 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2270
2271 /* TX */
2272 for (int i = 0; i < adapter->num_tx_queues; i++) {
2273 txr = &adapter->tx_rings[i];
2274 E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2275 txr->eims);
2276 adapter->eims_mask |= txr->eims;
2277 }
2278
2279 /* RX */
2280 for (int i = 0; i < adapter->num_rx_queues; i++) {
2281 rxr = &adapter->rx_rings[i];
2282 E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2283 rxr->eims);
2284 adapter->eims_mask |= rxr->eims;
2285 }
2286
2287 /* Link */
2288 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2289 E1000_EIMS_OTHER);
2290 adapter->link_mask |= E1000_EIMS_OTHER;
2291 adapter->eims_mask |= adapter->link_mask;
2292 }
2293 return;
2294 }
2295
2296
2297 static void
2298 igb_free_pci_resources(struct adapter *adapter)
2299 {
2300 device_t dev = adapter->dev;
2301
2302 /* Make sure the for loop below runs once */
2303 if (adapter->msix == 0)
2304 adapter->msix = 1;
2305
2306 /*
2307 * First release all the interrupt resources:
2308 * notice that since these are just kept
2309 * in an array we can do the same logic
2310 * whether its MSIX or just legacy.
2311 */
2312 for (int i = 0; i < adapter->msix; i++) {
2313 if (adapter->tag[i] != NULL) {
2314 bus_teardown_intr(dev, adapter->res[i],
2315 adapter->tag[i]);
2316 adapter->tag[i] = NULL;
2317 }
2318 if (adapter->res[i] != NULL) {
2319 bus_release_resource(dev, SYS_RES_IRQ,
2320 adapter->rid[i], adapter->res[i]);
2321 }
2322 }
2323
2324 if (adapter->msix)
2325 pci_release_msi(dev);
2326
2327 if (adapter->msix_mem != NULL)
2328 bus_release_resource(dev, SYS_RES_MEMORY,
2329 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2330
2331 if (adapter->pci_mem != NULL)
2332 bus_release_resource(dev, SYS_RES_MEMORY,
2333 PCIR_BAR(0), adapter->pci_mem);
2334
2335 }
2336
2337 /*
2338 * Setup Either MSI/X or MSI
2339 */
2340 static int
2341 igb_setup_msix(struct adapter *adapter)
2342 {
2343 device_t dev = adapter->dev;
2344 int rid, want, queues, msgs;
2345
2346 /* First try MSI/X */
2347 rid = PCIR_BAR(IGB_MSIX_BAR);
2348 adapter->msix_mem = bus_alloc_resource_any(dev,
2349 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2350 if (!adapter->msix_mem) {
2351 /* May not be enabled */
2352 device_printf(adapter->dev,
2353 "Unable to map MSIX table \n");
2354 goto msi;
2355 }
2356
2357 msgs = pci_msix_count(dev);
2358 if (msgs == 0) { /* system has msix disabled */
2359 bus_release_resource(dev, SYS_RES_MEMORY,
2360 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2361 adapter->msix_mem = NULL;
2362 goto msi;
2363 }
2364
2365 /* Limit by the number set in header */
2366 if (msgs > IGB_MSIX_VEC)
2367 msgs = IGB_MSIX_VEC;
2368
2369 /* Figure out a reasonable auto config value */
2370 queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2371
2372 if (igb_tx_queues == 0)
2373 igb_tx_queues = queues;
2374 if (igb_rx_queues == 0)
2375 igb_rx_queues = queues;
2376 want = igb_tx_queues + igb_rx_queues + 1;
2377 if (msgs >= want)
2378 msgs = want;
2379 else {
2380 device_printf(adapter->dev,
2381 "MSIX Configuration Problem, "
2382 "%d vectors configured, but %d queues wanted!\n",
2383 msgs, want);
2384 return (ENXIO);
2385 }
2386 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2387 device_printf(adapter->dev,
2388 "Using MSIX interrupts with %d vectors\n", msgs);
2389 adapter->num_tx_queues = igb_tx_queues;
2390 adapter->num_rx_queues = igb_rx_queues;
2391 return (msgs);
2392 }
2393 msi:
2394 msgs = pci_msi_count(dev);
2395 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2396 device_printf(adapter->dev,"Using MSI interrupt\n");
2397 return (msgs);
2398 }
2399
2400 /*********************************************************************
2401 *
2402 * Initialize the hardware to a configuration
2403 * as specified by the adapter structure.
2404 *
2405 **********************************************************************/
2406 static int
2407 igb_hardware_init(struct adapter *adapter)
2408 {
2409 device_t dev = adapter->dev;
2410 u32 rx_buffer_size;
2411
2412 INIT_DEBUGOUT("igb_hardware_init: begin");
2413
2414 /* Issue a global reset */
2415 e1000_reset_hw(&adapter->hw);
2416
2417 /* Let the firmware know the OS is in control */
2418 igb_get_hw_control(adapter);
2419
2420 /*
2421 * These parameters control the automatic generation (Tx) and
2422 * response (Rx) to Ethernet PAUSE frames.
2423 * - High water mark should allow for at least two frames to be
2424 * received after sending an XOFF.
2425 * - Low water mark works best when it is very near the high water mark.
2426 * This allows the receiver to restart by sending XON when it has
2427 * drained a bit. Here we use an arbitary value of 1500 which will
2428 * restart after one full frame is pulled from the buffer. There
2429 * could be several smaller frames in the buffer and if so they will
2430 * not trigger the XON until their total number reduces the buffer
2431 * by 1500.
2432 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2433 */
2434 if (adapter->hw.mac.type == e1000_82576)
2435 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2436 E1000_RXPBS) & 0xffff) << 10 );
2437 else
2438 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2439 E1000_PBA) & 0xffff) << 10 );
2440
2441 adapter->hw.fc.high_water = rx_buffer_size -
2442 roundup2(adapter->max_frame_size, 1024);
2443 adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2444
2445 adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2446 adapter->hw.fc.send_xon = TRUE;
2447
2448 /* Set Flow control, use the tunable location if sane */
2449 if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2450 adapter->hw.fc.requested_mode = igb_fc_setting;
2451 else
2452 adapter->hw.fc.requested_mode = e1000_fc_none;
2453
2454 if (e1000_init_hw(&adapter->hw) < 0) {
2455 device_printf(dev, "Hardware Initialization Failed\n");
2456 return (EIO);
2457 }
2458
2459 e1000_check_for_link(&adapter->hw);
2460
2461 return (0);
2462 }
2463
2464 /*********************************************************************
2465 *
2466 * Setup networking device structure and register an interface.
2467 *
2468 **********************************************************************/
2469 static void
2470 igb_setup_interface(device_t dev, struct adapter *adapter)
2471 {
2472 struct ifnet *ifp;
2473
2474 INIT_DEBUGOUT("igb_setup_interface: begin");
2475
2476 ifp = adapter->ifp = if_alloc(IFT_ETHER);
2477 if (ifp == NULL)
2478 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2479 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2480 ifp->if_mtu = ETHERMTU;
2481 ifp->if_init = igb_init;
2482 ifp->if_softc = adapter;
2483 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2484 ifp->if_ioctl = igb_ioctl;
2485 ifp->if_start = igb_start;
2486 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2487 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2488 IFQ_SET_READY(&ifp->if_snd);
2489
2490 ether_ifattach(ifp, adapter->hw.mac.addr);
2491
2492 ifp->if_capabilities = ifp->if_capenable = 0;
2493
2494 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2495 ifp->if_capabilities |= IFCAP_TSO4;
2496 ifp->if_capenable = ifp->if_capabilities;
2497
2498 /*
2499 * Tell the upper layer(s) what we support.
2500 */
2501 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2502 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
2503 ifp->if_capabilities |= IFCAP_VLAN_MTU;
2504 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
2505 ifp->if_capenable |= IFCAP_VLAN_MTU;
2506
2507 #ifdef IGB_HW_VLAN_SUPPORT
2508 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2509 ifp->if_capenable |= IFCAP_VLAN_HWFILTER;
2510 #endif
2511
2512 /*
2513 * Specify the media types supported by this adapter and register
2514 * callbacks to update media and link information
2515 */
2516 ifmedia_init(&adapter->media, IFM_IMASK,
2517 igb_media_change, igb_media_status);
2518 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2519 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2520 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2521 0, NULL);
2522 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2523 } else {
2524 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2525 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2526 0, NULL);
2527 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2528 0, NULL);
2529 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2530 0, NULL);
2531 if (adapter->hw.phy.type != e1000_phy_ife) {
2532 ifmedia_add(&adapter->media,
2533 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2534 ifmedia_add(&adapter->media,
2535 IFM_ETHER | IFM_1000_T, 0, NULL);
2536 }
2537 }
2538 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2539 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2540 }
2541
2542
2543 /*
2544 * Manage DMA'able memory.
2545 */
2546 static void
2547 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2548 {
2549 if (error)
2550 return;
2551 *(bus_addr_t *) arg = segs[0].ds_addr;
2552 }
2553
2554 static int
2555 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2556 struct igb_dma_alloc *dma, int mapflags)
2557 {
2558 int error;
2559
2560 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2561 IGB_DBA_ALIGN, 0, /* alignment, bounds */
2562 BUS_SPACE_MAXADDR, /* lowaddr */
2563 BUS_SPACE_MAXADDR, /* highaddr */
2564 NULL, NULL, /* filter, filterarg */
2565 size, /* maxsize */
2566 1, /* nsegments */
2567 size, /* maxsegsize */
2568 0, /* flags */
2569 NULL, /* lockfunc */
2570 NULL, /* lockarg */
2571 &dma->dma_tag);
2572 if (error) {
2573 device_printf(adapter->dev,
2574 "%s: bus_dma_tag_create failed: %d\n",
2575 __func__, error);
2576 goto fail_0;
2577 }
2578
2579 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2580 BUS_DMA_NOWAIT, &dma->dma_map);
2581 if (error) {
2582 device_printf(adapter->dev,
2583 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2584 __func__, (uintmax_t)size, error);
2585 goto fail_2;
2586 }
2587
2588 dma->dma_paddr = 0;
2589 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2590 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2591 if (error || dma->dma_paddr == 0) {
2592 device_printf(adapter->dev,
2593 "%s: bus_dmamap_load failed: %d\n",
2594 __func__, error);
2595 goto fail_3;
2596 }
2597
2598 return (0);
2599
2600 fail_3:
2601 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2602 fail_2:
2603 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2604 bus_dma_tag_destroy(dma->dma_tag);
2605 fail_0:
2606 dma->dma_map = NULL;
2607 dma->dma_tag = NULL;
2608
2609 return (error);
2610 }
2611
2612 static void
2613 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2614 {
2615 if (dma->dma_tag == NULL)
2616 return;
2617 if (dma->dma_map != NULL) {
2618 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2619 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2620 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2621 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2622 dma->dma_map = NULL;
2623 }
2624 bus_dma_tag_destroy(dma->dma_tag);
2625 dma->dma_tag = NULL;
2626 }
2627
2628
2629 /*********************************************************************
2630 *
2631 * Allocate memory for the transmit and receive rings, and then
2632 * the descriptors associated with each, called only once at attach.
2633 *
2634 **********************************************************************/
2635 static int
2636 igb_allocate_queues(struct adapter *adapter)
2637 {
2638 device_t dev = adapter->dev;
2639 struct tx_ring *txr;
2640 struct rx_ring *rxr;
2641 int rsize, tsize, error = E1000_SUCCESS;
2642 int txconf = 0, rxconf = 0;
2643
2644 /* First allocate the TX ring struct memory */
2645 if (!(adapter->tx_rings =
2646 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2647 adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2648 device_printf(dev, "Unable to allocate TX ring memory\n");
2649 error = ENOMEM;
2650 goto fail;
2651 }
2652 txr = adapter->tx_rings;
2653
2654 /* Next allocate the RX */
2655 if (!(adapter->rx_rings =
2656 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2657 adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2658 device_printf(dev, "Unable to allocate RX ring memory\n");
2659 error = ENOMEM;
2660 goto rx_fail;
2661 }
2662 rxr = adapter->rx_rings;
2663
2664 tsize = roundup2(adapter->num_tx_desc *
2665 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2666 /*
2667 * Now set up the TX queues, txconf is needed to handle the
2668 * possibility that things fail midcourse and we need to
2669 * undo memory gracefully
2670 */
2671 for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2672 /* Set up some basics */
2673 txr = &adapter->tx_rings[i];
2674 txr->adapter = adapter;
2675 txr->me = i;
2676
2677 /* Initialize the TX lock */
2678 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2679 device_get_nameunit(dev), txr->me);
2680 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2681
2682 if (igb_dma_malloc(adapter, tsize,
2683 &txr->txdma, BUS_DMA_NOWAIT)) {
2684 device_printf(dev,
2685 "Unable to allocate TX Descriptor memory\n");
2686 error = ENOMEM;
2687 goto err_tx_desc;
2688 }
2689 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2690 bzero((void *)txr->tx_base, tsize);
2691
2692 /* Now allocate transmit buffers for the ring */
2693 if (igb_allocate_transmit_buffers(txr)) {
2694 device_printf(dev,
2695 "Critical Failure setting up transmit buffers\n");
2696 error = ENOMEM;
2697 goto err_tx_desc;
2698 }
2699
2700 }
2701
2702 /*
2703 * Next the RX queues...
2704 */
2705 rsize = roundup2(adapter->num_rx_desc *
2706 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2707 for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2708 rxr = &adapter->rx_rings[i];
2709 rxr->adapter = adapter;
2710 rxr->me = i;
2711
2712 /* Initialize the RX lock */
2713 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2714 device_get_nameunit(dev), txr->me);
2715 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2716
2717 if (igb_dma_malloc(adapter, rsize,
2718 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2719 device_printf(dev,
2720 "Unable to allocate RxDescriptor memory\n");
2721 error = ENOMEM;
2722 goto err_rx_desc;
2723 }
2724 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2725 bzero((void *)rxr->rx_base, rsize);
2726
2727 /* Allocate receive buffers for the ring*/
2728 if (igb_allocate_receive_buffers(rxr)) {
2729 device_printf(dev,
2730 "Critical Failure setting up receive buffers\n");
2731 error = ENOMEM;
2732 goto err_rx_desc;
2733 }
2734 }
2735
2736 return (0);
2737
2738 err_rx_desc:
2739 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2740 igb_dma_free(adapter, &rxr->rxdma);
2741 err_tx_desc:
2742 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2743 igb_dma_free(adapter, &txr->txdma);
2744 free(adapter->rx_rings, M_DEVBUF);
2745 rx_fail:
2746 free(adapter->tx_rings, M_DEVBUF);
2747 fail:
2748 return (error);
2749 }
2750
2751 /*********************************************************************
2752 *
2753 * Allocate memory for tx_buffer structures. The tx_buffer stores all
2754 * the information needed to transmit a packet on the wire. This is
2755 * called only once at attach, setup is done every reset.
2756 *
2757 **********************************************************************/
2758 static int
2759 igb_allocate_transmit_buffers(struct tx_ring *txr)
2760 {
2761 struct adapter *adapter = txr->adapter;
2762 device_t dev = adapter->dev;
2763 struct igb_buffer *txbuf;
2764 int error, i;
2765
2766 /*
2767 * Setup DMA descriptor areas.
2768 */
2769 if ((error = bus_dma_tag_create(NULL, /* parent */
2770 PAGE_SIZE, 0, /* alignment, bounds */
2771 BUS_SPACE_MAXADDR, /* lowaddr */
2772 BUS_SPACE_MAXADDR, /* highaddr */
2773 NULL, NULL, /* filter, filterarg */
2774 IGB_TSO_SIZE, /* maxsize */
2775 IGB_MAX_SCATTER, /* nsegments */
2776 PAGE_SIZE, /* maxsegsize */
2777 0, /* flags */
2778 NULL, /* lockfunc */
2779 NULL, /* lockfuncarg */
2780 &txr->txtag))) {
2781 device_printf(dev,"Unable to allocate TX DMA tag\n");
2782 goto fail;
2783 }
2784
2785 if (!(txr->tx_buffers =
2786 (struct igb_buffer *) malloc(sizeof(struct igb_buffer) *
2787 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2788 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2789 error = ENOMEM;
2790 goto fail;
2791 }
2792
2793 /* Create the descriptor buffer dma maps */
2794 txbuf = txr->tx_buffers;
2795 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2796 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2797 if (error != 0) {
2798 device_printf(dev, "Unable to create TX DMA map\n");
2799 goto fail;
2800 }
2801 }
2802
2803 return 0;
2804 fail:
2805 /* We free all, it handles case where we are in the middle */
2806 igb_free_transmit_structures(adapter);
2807 return (error);
2808 }
2809
2810 /*********************************************************************
2811 *
2812 * Initialize a transmit ring.
2813 *
2814 **********************************************************************/
2815 static void
2816 igb_setup_transmit_ring(struct tx_ring *txr)
2817 {
2818 struct adapter *adapter = txr->adapter;
2819 struct igb_buffer *txbuf;
2820 int i;
2821
2822 /* Clear the old ring contents */
2823 bzero((void *)txr->tx_base,
2824 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2825 /* Reset indices */
2826 txr->next_avail_desc = 0;
2827 txr->next_to_clean = 0;
2828
2829 /* Free any existing tx buffers. */
2830 txbuf = txr->tx_buffers;
2831 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2832 if (txbuf->m_head != NULL) {
2833 bus_dmamap_sync(txr->txtag, txbuf->map,
2834 BUS_DMASYNC_POSTWRITE);
2835 bus_dmamap_unload(txr->txtag, txbuf->map);
2836 m_freem(txbuf->m_head);
2837 txbuf->m_head = NULL;
2838 }
2839 /* clear the watch index */
2840 txbuf->next_eop = -1;
2841 }
2842
2843 /* Set number of descriptors available */
2844 txr->tx_avail = adapter->num_tx_desc;
2845
2846 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2847 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2848
2849 }
2850
2851 /*********************************************************************
2852 *
2853 * Initialize all transmit rings.
2854 *
2855 **********************************************************************/
2856 static void
2857 igb_setup_transmit_structures(struct adapter *adapter)
2858 {
2859 struct tx_ring *txr = adapter->tx_rings;
2860
2861 for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2862 igb_setup_transmit_ring(txr);
2863
2864 return;
2865 }
2866
2867 /*********************************************************************
2868 *
2869 * Enable transmit unit.
2870 *
2871 **********************************************************************/
2872 static void
2873 igb_initialize_transmit_units(struct adapter *adapter)
2874 {
2875 struct tx_ring *txr = adapter->tx_rings;
2876 u32 tctl, txdctl;
2877
2878 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2879
2880 /* Setup the Base and Length of the Tx Descriptor Rings */
2881 for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2882 u64 bus_addr = txr->txdma.dma_paddr;
2883
2884 E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2885 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2886 E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2887 (uint32_t)(bus_addr >> 32));
2888 E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2889 (uint32_t)bus_addr);
2890
2891 /* Setup the HW Tx Head and Tail descriptor pointers */
2892 E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2893 E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2894
2895 HW_DEBUGOUT2("Base = %x, Length = %x\n",
2896 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2897 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2898
2899 /* Setup Transmit Descriptor Base Settings */
2900 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2901
2902 txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2903 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2904 E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2905 }
2906
2907 /* Program the Transmit Control Register */
2908 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2909 tctl &= ~E1000_TCTL_CT;
2910 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2911 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2912
2913 e1000_config_collision_dist(&adapter->hw);
2914
2915 /* This write will effectively turn on the transmit unit. */
2916 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2917
2918 }
2919
2920 /*********************************************************************
2921 *
2922 * Free all transmit rings.
2923 *
2924 **********************************************************************/
2925 static void
2926 igb_free_transmit_structures(struct adapter *adapter)
2927 {
2928 struct tx_ring *txr = adapter->tx_rings;
2929
2930 for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2931 IGB_TX_LOCK(txr);
2932 igb_free_transmit_buffers(txr);
2933 igb_dma_free(adapter, &txr->txdma);
2934 IGB_TX_UNLOCK(txr);
2935 IGB_TX_LOCK_DESTROY(txr);
2936 }
2937 free(adapter->tx_rings, M_DEVBUF);
2938 }
2939
2940 /*********************************************************************
2941 *
2942 * Free transmit ring related data structures.
2943 *
2944 **********************************************************************/
2945 static void
2946 igb_free_transmit_buffers(struct tx_ring *txr)
2947 {
2948 struct adapter *adapter = txr->adapter;
2949 struct igb_buffer *tx_buffer;
2950 int i;
2951
2952 INIT_DEBUGOUT("free_transmit_ring: begin");
2953
2954 if (txr->tx_buffers == NULL)
2955 return;
2956
2957 tx_buffer = txr->tx_buffers;
2958 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2959 if (tx_buffer->m_head != NULL) {
2960 bus_dmamap_sync(txr->txtag, tx_buffer->map,
2961 BUS_DMASYNC_POSTWRITE);
2962 bus_dmamap_unload(txr->txtag,
2963 tx_buffer->map);
2964 m_freem(tx_buffer->m_head);
2965 tx_buffer->m_head = NULL;
2966 if (tx_buffer->map != NULL) {
2967 bus_dmamap_destroy(txr->txtag,
2968 tx_buffer->map);
2969 tx_buffer->map = NULL;
2970 }
2971 } else if (tx_buffer->map != NULL) {
2972 bus_dmamap_unload(txr->txtag,
2973 tx_buffer->map);
2974 bus_dmamap_destroy(txr->txtag,
2975 tx_buffer->map);
2976 tx_buffer->map = NULL;
2977 }
2978 }
2979
2980 if (txr->tx_buffers != NULL) {
2981 free(txr->tx_buffers, M_DEVBUF);
2982 txr->tx_buffers = NULL;
2983 }
2984 if (txr->txtag != NULL) {
2985 bus_dma_tag_destroy(txr->txtag);
2986 txr->txtag = NULL;
2987 }
2988 return;
2989 }
2990
2991 /**********************************************************************
2992 *
2993 * Setup work for hardware segmentation offload (TSO) on
2994 * adapters using advanced tx descriptors (82575)
2995 *
2996 **********************************************************************/
2997 static boolean_t
2998 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
2999 {
3000 struct adapter *adapter = txr->adapter;
3001 struct e1000_adv_tx_context_desc *TXD;
3002 struct igb_buffer *tx_buffer;
3003 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3004 u32 mss_l4len_idx = 0;
3005 u16 vtag = 0;
3006 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3007 struct ether_vlan_header *eh;
3008 struct ip *ip;
3009 struct tcphdr *th;
3010
3011
3012 /*
3013 * Determine where frame payload starts.
3014 * Jump over vlan headers if already present
3015 */
3016 eh = mtod(mp, struct ether_vlan_header *);
3017 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3018 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3019 else
3020 ehdrlen = ETHER_HDR_LEN;
3021
3022 /* Ensure we have at least the IP+TCP header in the first mbuf. */
3023 if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3024 return FALSE;
3025
3026 /* Only supports IPV4 for now */
3027 ctxd = txr->next_avail_desc;
3028 tx_buffer = &txr->tx_buffers[ctxd];
3029 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3030
3031 ip = (struct ip *)(mp->m_data + ehdrlen);
3032 if (ip->ip_p != IPPROTO_TCP)
3033 return FALSE; /* 0 */
3034 ip->ip_sum = 0;
3035 ip_hlen = ip->ip_hl << 2;
3036 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3037 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3038 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3039 tcp_hlen = th->th_off << 2;
3040 /*
3041 * Calculate header length, this is used
3042 * in the transmit desc in igb_xmit
3043 */
3044 *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3045
3046 /* VLAN MACLEN IPLEN */
3047 if (mp->m_flags & M_VLANTAG) {
3048 vtag = htole16(mp->m_pkthdr.ether_vtag);
3049 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3050 }
3051
3052 vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3053 vlan_macip_lens |= ip_hlen;
3054 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3055
3056 /* ADV DTYPE TUCMD */
3057 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3058 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3059 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3060 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3061
3062 /* MSS L4LEN IDX */
3063 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3064 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3065 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3066
3067 TXD->seqnum_seed = htole32(0);
3068 tx_buffer->m_head = NULL;
3069 tx_buffer->next_eop = -1;
3070
3071 if (++ctxd == adapter->num_tx_desc)
3072 ctxd = 0;
3073
3074 txr->tx_avail--;
3075 txr->next_avail_desc = ctxd;
3076 return TRUE;
3077 }
3078
3079
3080 /*********************************************************************
3081 *
3082 * Context Descriptor setup for VLAN or CSUM
3083 *
3084 **********************************************************************/
3085
3086 static int
3087 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3088 {
3089 struct adapter *adapter = txr->adapter;
3090 struct e1000_adv_tx_context_desc *TXD;
3091 struct igb_buffer *tx_buffer;
3092 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3093 struct ether_vlan_header *eh;
3094 struct ip *ip = NULL;
3095 struct ip6_hdr *ip6;
3096 int ehdrlen, ip_hlen = 0;
3097 u16 etype;
3098 u8 ipproto = 0;
3099 bool offload = TRUE;
3100 u16 vtag = 0;
3101
3102 int ctxd = txr->next_avail_desc;
3103 tx_buffer = &txr->tx_buffers[ctxd];
3104 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3105
3106 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3107 offload = FALSE; /* Only here to handle VLANs */
3108 /*
3109 ** In advanced descriptors the vlan tag must
3110 ** be placed into the descriptor itself.
3111 */
3112 if (mp->m_flags & M_VLANTAG) {
3113 vtag = htole16(mp->m_pkthdr.ether_vtag);
3114 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3115 } else if (offload == FALSE)
3116 return FALSE;
3117 /*
3118 * Determine where frame payload starts.
3119 * Jump over vlan headers if already present,
3120 * helpful for QinQ too.
3121 */
3122 eh = mtod(mp, struct ether_vlan_header *);
3123 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3124 etype = ntohs(eh->evl_proto);
3125 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3126 } else {
3127 etype = ntohs(eh->evl_encap_proto);
3128 ehdrlen = ETHER_HDR_LEN;
3129 }
3130
3131 /* Set the ether header length */
3132 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3133
3134 switch (etype) {
3135 case ETHERTYPE_IP:
3136 ip = (struct ip *)(mp->m_data + ehdrlen);
3137 ip_hlen = ip->ip_hl << 2;
3138 if (mp->m_len < ehdrlen + ip_hlen) {
3139 offload = FALSE;
3140 break;
3141 }
3142 ipproto = ip->ip_p;
3143 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3144 break;
3145 case ETHERTYPE_IPV6:
3146 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3147 ip_hlen = sizeof(struct ip6_hdr);
3148 if (mp->m_len < ehdrlen + ip_hlen)
3149 return FALSE; /* failure */
3150 ipproto = ip6->ip6_nxt;
3151 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3152 break;
3153 #ifdef IGB_TIMESYNC
3154 case ETHERTYPE_IEEE1588:
3155 offload = IGB_TIMESTAMP;
3156 break;
3157 #endif
3158 default:
3159 offload = FALSE;
3160 break;
3161 }
3162
3163 vlan_macip_lens |= ip_hlen;
3164 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3165
3166 switch (ipproto) {
3167 case IPPROTO_TCP:
3168 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3169 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3170 break;
3171 case IPPROTO_UDP:
3172 {
3173 #ifdef IGB_TIMESYNC
3174 void *hdr = (caddr_t) ip + ip_hlen;
3175 struct udphdr *uh = (struct udphdr *)hdr;
3176
3177 if (uh->uh_dport == htons(TSYNC_PORT))
3178 offload = IGB_TIMESTAMP;
3179 #endif
3180 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3181 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3182 break;
3183 }
3184 default:
3185 offload = FALSE;
3186 break;
3187 }
3188
3189 /* Now copy bits into descriptor */
3190 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3191 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3192 TXD->seqnum_seed = htole32(0);
3193 TXD->mss_l4len_idx = htole32(0);
3194
3195 tx_buffer->m_head = NULL;
3196 tx_buffer->next_eop = -1;
3197
3198 /* We've consumed the first desc, adjust counters */
3199 if (++ctxd == adapter->num_tx_desc)
3200 ctxd = 0;
3201 txr->next_avail_desc = ctxd;
3202 --txr->tx_avail;
3203
3204 return (offload);
3205 }
3206
3207
3208 /**********************************************************************
3209 *
3210 * Examine each tx_buffer in the used queue. If the hardware is done
3211 * processing the packet then free associated resources. The
3212 * tx_buffer is put back on the free queue.
3213 *
3214 * TRUE return means there's work in the ring to clean, FALSE its empty.
3215 **********************************************************************/
3216 static bool
3217 igb_txeof(struct tx_ring *txr)
3218 {
3219 struct adapter *adapter = txr->adapter;
3220 int first, last, done, num_avail;
3221 struct igb_buffer *tx_buffer;
3222 struct e1000_tx_desc *tx_desc, *eop_desc;
3223 struct ifnet *ifp = adapter->ifp;
3224
3225 IGB_TX_LOCK_ASSERT(txr);
3226
3227 if (txr->tx_avail == adapter->num_tx_desc)
3228 return FALSE;
3229
3230 num_avail = txr->tx_avail;
3231 first = txr->next_to_clean;
3232 tx_desc = &txr->tx_base[first];
3233 tx_buffer = &txr->tx_buffers[first];
3234 last = tx_buffer->next_eop;
3235 eop_desc = &txr->tx_base[last];
3236
3237 /*
3238 * What this does is get the index of the
3239 * first descriptor AFTER the EOP of the
3240 * first packet, that way we can do the
3241 * simple comparison on the inner while loop.
3242 */
3243 if (++last == adapter->num_tx_desc)
3244 last = 0;
3245 done = last;
3246
3247 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3248 BUS_DMASYNC_POSTREAD);
3249
3250 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3251 /* We clean the range of the packet */
3252 while (first != done) {
3253 tx_desc->upper.data = 0;
3254 tx_desc->lower.data = 0;
3255 tx_desc->buffer_addr = 0;
3256 num_avail++;
3257
3258 if (tx_buffer->m_head) {
3259 ifp->if_opackets++;
3260 bus_dmamap_sync(txr->txtag,
3261 tx_buffer->map,
3262 BUS_DMASYNC_POSTWRITE);
3263 bus_dmamap_unload(txr->txtag,
3264 tx_buffer->map);
3265
3266 m_freem(tx_buffer->m_head);
3267 tx_buffer->m_head = NULL;
3268 }
3269 tx_buffer->next_eop = -1;
3270
3271 if (++first == adapter->num_tx_desc)
3272 first = 0;
3273
3274 tx_buffer = &txr->tx_buffers[first];
3275 tx_desc = &txr->tx_base[first];
3276 }
3277 /* See if we can continue to the next packet */
3278 last = tx_buffer->next_eop;
3279 if (last != -1) {
3280 eop_desc = &txr->tx_base[last];
3281 /* Get new done point */
3282 if (++last == adapter->num_tx_desc) last = 0;
3283 done = last;
3284 } else
3285 break;
3286 }
3287 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3288 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3289
3290 txr->next_to_clean = first;
3291
3292 /*
3293 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3294 * that it is OK to send packets.
3295 * If there are no pending descriptors, clear the timeout. Otherwise,
3296 * if some descriptors have been freed, restart the timeout.
3297 */
3298 if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3299 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3300 /* All clean, turn off the timer */
3301 if (num_avail == adapter->num_tx_desc) {
3302 txr->watchdog_timer = 0;
3303 txr->tx_avail = num_avail;
3304 return FALSE;
3305 }
3306 /* Some cleaned, reset the timer */
3307 else if (num_avail != txr->tx_avail)
3308 txr->watchdog_timer = IGB_TX_TIMEOUT;
3309 }
3310 txr->tx_avail = num_avail;
3311 return TRUE;
3312 }
3313
3314
3315 /*********************************************************************
3316 *
3317 * Get a buffer from system mbuf buffer pool.
3318 *
3319 **********************************************************************/
3320 static int
3321 igb_get_buf(struct rx_ring *rxr, int i)
3322 {
3323 struct adapter *adapter = rxr->adapter;
3324 struct mbuf *m;
3325 bus_dma_segment_t segs[1];
3326 bus_dmamap_t map;
3327 struct igb_buffer *rx_buffer;
3328 int error, nsegs;
3329
3330 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3331 if (m == NULL) {
3332 adapter->mbuf_cluster_failed++;
3333 return (ENOBUFS);
3334 }
3335 m->m_len = m->m_pkthdr.len = MCLBYTES;
3336
3337 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3338 m_adj(m, ETHER_ALIGN);
3339
3340 /*
3341 * Using memory from the mbuf cluster pool, invoke the
3342 * bus_dma machinery to arrange the memory mapping.
3343 */
3344 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3345 rxr->rx_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
3346 if (error != 0) {
3347 m_free(m);
3348 return (error);
3349 }
3350
3351 /* If nsegs is wrong then the stack is corrupt. */
3352 KASSERT(nsegs == 1, ("Too many segments returned!"));
3353
3354 rx_buffer = &rxr->rx_buffers[i];
3355 if (rx_buffer->m_head != NULL)
3356 bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3357
3358 map = rx_buffer->map;
3359 rx_buffer->map = rxr->rx_spare_map;
3360 rxr->rx_spare_map = map;
3361 bus_dmamap_sync(rxr->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3362 rx_buffer->m_head = m;
3363
3364 rxr->rx_base[i].read.pkt_addr = htole64(segs[0].ds_addr);
3365 return (0);
3366 }
3367
3368
3369 /*********************************************************************
3370 *
3371 * Allocate memory for rx_buffer structures. Since we use one
3372 * rx_buffer per received packet, the maximum number of rx_buffer's
3373 * that we'll need is equal to the number of receive descriptors
3374 * that we've allocated.
3375 *
3376 **********************************************************************/
3377 static int
3378 igb_allocate_receive_buffers(struct rx_ring *rxr)
3379 {
3380 struct adapter *adapter = rxr->adapter;
3381 device_t dev = adapter->dev;
3382 struct igb_buffer *rxbuf;
3383 int i, bsize, error;
3384
3385 bsize = sizeof(struct igb_buffer) * adapter->num_rx_desc;
3386 if (!(rxr->rx_buffers =
3387 (struct igb_buffer *) malloc(bsize,
3388 M_DEVBUF, M_NOWAIT | M_ZERO))) {
3389 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3390 error = ENOMEM;
3391 goto fail;
3392 }
3393
3394 if ((error = bus_dma_tag_create(NULL, /* parent */
3395 PAGE_SIZE, 0, /* alignment, bounds */
3396 BUS_SPACE_MAXADDR, /* lowaddr */
3397 BUS_SPACE_MAXADDR, /* highaddr */
3398 NULL, NULL, /* filter, filterarg */
3399 MCLBYTES, /* maxsize */
3400 1, /* nsegments */
3401 MCLBYTES, /* maxsegsize */
3402 0, /* flags */
3403 NULL, /* lockfunc */
3404 NULL, /* lockfuncarg */
3405 &rxr->rxtag))) {
3406 device_printf(dev, "Unable to create RX Small DMA tag\n");
3407 goto fail;
3408 }
3409
3410 /* Create the spare map (used by getbuf) */
3411 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3412 &rxr->rx_spare_map);
3413 if (error) {
3414 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3415 __func__, error);
3416 goto fail;
3417 }
3418
3419 for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3420 rxbuf = &rxr->rx_buffers[i];
3421 error = bus_dmamap_create(rxr->rxtag,
3422 BUS_DMA_NOWAIT, &rxbuf->map);
3423 if (error) {
3424 device_printf(dev, "Unable to create Small RX DMA map\n");
3425 goto fail;
3426 }
3427 }
3428
3429 return (0);
3430
3431 fail:
3432 /* Frees all, but can handle partial completion */
3433 igb_free_receive_structures(adapter);
3434 return (error);
3435 }
3436
3437 /*********************************************************************
3438 *
3439 * Initialize a receive ring and its buffers.
3440 *
3441 **********************************************************************/
3442 static int
3443 igb_setup_receive_ring(struct rx_ring *rxr)
3444 {
3445 struct adapter *adapter;
3446 device_t dev;
3447 struct igb_buffer *rxbuf;
3448 struct lro_ctrl *lro = &rxr->lro;
3449 int j, rsize;
3450
3451 adapter = rxr->adapter;
3452 dev = adapter->dev;
3453 rsize = roundup2(adapter->num_rx_desc *
3454 sizeof(union e1000_adv_rx_desc), 4096);
3455 /* Clear the ring contents */
3456 bzero((void *)rxr->rx_base, rsize);
3457
3458 /*
3459 ** Free current RX buffers: the size buffer
3460 ** that is loaded is indicated by the buffer
3461 ** bigbuf value.
3462 */
3463 for (int i = 0; i < adapter->num_rx_desc; i++) {
3464 rxbuf = &rxr->rx_buffers[i];
3465 if (rxbuf->m_head != NULL) {
3466 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3467 BUS_DMASYNC_POSTREAD);
3468 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3469 m_freem(rxbuf->m_head);
3470 rxbuf->m_head = NULL;
3471 }
3472 }
3473
3474 for (j = 0; j < adapter->num_rx_desc; j++) {
3475 if (igb_get_buf(rxr, j) == ENOBUFS) {
3476 rxr->rx_buffers[j].m_head = NULL;
3477 rxr->rx_base[j].read.pkt_addr = 0;
3478 goto fail;
3479 }
3480 }
3481
3482 /* Setup our descriptor indices */
3483 rxr->next_to_check = 0;
3484 rxr->last_cleaned = 0;
3485
3486 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3487 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3488
3489 /* Now set up the LRO interface */
3490 if (igb_enable_lro) {
3491 int err = tcp_lro_init(lro);
3492 if (err) {
3493 device_printf(dev,"LRO Initialization failed!\n");
3494 goto fail;
3495 }
3496 INIT_DEBUGOUT("RX LRO Initialized\n");
3497 lro->ifp = adapter->ifp;
3498 }
3499
3500 return (0);
3501 fail:
3502 /*
3503 * We need to clean up any buffers allocated
3504 * so far, 'j' is the failing index.
3505 */
3506 for (int i = 0; i < j; i++) {
3507 rxbuf = &rxr->rx_buffers[i];
3508 if (rxbuf->m_head != NULL) {
3509 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3510 BUS_DMASYNC_POSTREAD);
3511 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3512 m_freem(rxbuf->m_head);
3513 rxbuf->m_head = NULL;
3514 }
3515 }
3516 return (ENOBUFS);
3517 }
3518
3519 /*********************************************************************
3520 *
3521 * Initialize all receive rings.
3522 *
3523 **********************************************************************/
3524 static int
3525 igb_setup_receive_structures(struct adapter *adapter)
3526 {
3527 struct rx_ring *rxr = adapter->rx_rings;
3528 int j;
3529
3530 for (j = 0; j < adapter->num_rx_queues; j++, rxr++)
3531 if (igb_setup_receive_ring(rxr))
3532 goto fail;
3533
3534 return (0);
3535 fail:
3536 /*
3537 * Free RX buffers allocated so far, we will only handle
3538 * the rings that completed, the failing case will have
3539 * cleaned up for itself. Clean up til 'j', the failure.
3540 */
3541 for (int i = 0; i < j; i++) {
3542 rxr = &adapter->rx_rings[i];
3543 for (int n = 0; n < adapter->num_rx_desc; n++) {
3544 struct igb_buffer *rxbuf;
3545 rxbuf = &rxr->rx_buffers[n];
3546 if (rxbuf->m_head != NULL) {
3547 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3548 BUS_DMASYNC_POSTREAD);
3549 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3550 m_freem(rxbuf->m_head);
3551 rxbuf->m_head = NULL;
3552 }
3553 }
3554 }
3555
3556 return (ENOBUFS);
3557 }
3558
3559 /*********************************************************************
3560 *
3561 * Enable receive unit.
3562 *
3563 **********************************************************************/
3564 static void
3565 igb_initialize_receive_units(struct adapter *adapter)
3566 {
3567 struct rx_ring *rxr = adapter->rx_rings;
3568 struct ifnet *ifp = adapter->ifp;
3569 u32 rctl, rxcsum, psize;
3570
3571 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3572
3573 /*
3574 * Make sure receives are disabled while setting
3575 * up the descriptor ring
3576 */
3577 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3578 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3579
3580 /* Setup the Base and Length of the Rx Descriptor Rings */
3581 for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3582 u64 bus_addr = rxr->rxdma.dma_paddr;
3583 u32 rxdctl, srrctl;
3584
3585 E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3586 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3587 E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3588 (uint32_t)(bus_addr >> 32));
3589 E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3590 (uint32_t)bus_addr);
3591 /* Use Advanced Descriptor type */
3592 srrctl = E1000_READ_REG(&adapter->hw, E1000_SRRCTL(i));
3593 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3594 E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3595 /* Enable this Queue */
3596 rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3597 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3598 rxdctl &= 0xFFF00000;
3599 rxdctl |= IGB_RX_PTHRESH;
3600 rxdctl |= IGB_RX_HTHRESH << 8;
3601 rxdctl |= IGB_RX_WTHRESH << 16;
3602 E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3603 }
3604
3605 /*
3606 ** Setup for RX MultiQueue
3607 */
3608 if (adapter->num_rx_queues >1) {
3609 u32 random[10], mrqc, shift = 0;
3610 union igb_reta {
3611 u32 dword;
3612 u8 bytes[4];
3613 } reta;
3614
3615 arc4rand(&random, sizeof(random), 0);
3616 if (adapter->hw.mac.type == e1000_82575)
3617 shift = 6;
3618 /* Warning FM follows */
3619 for (int i = 0; i < 128; i++) {
3620 reta.bytes[i & 3] =
3621 (i % adapter->num_rx_queues) << shift;
3622 if ((i & 3) == 3)
3623 E1000_WRITE_REG(&adapter->hw,
3624 E1000_RETA(i & ~3), reta.dword);
3625 }
3626 /* Now fill in hash table */
3627 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3628 for (int i = 0; i < 10; i++)
3629 E1000_WRITE_REG_ARRAY(&adapter->hw,
3630 E1000_RSSRK(0), i, random[i]);
3631
3632 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3633 E1000_MRQC_RSS_FIELD_IPV4_TCP);
3634 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3635 E1000_MRQC_RSS_FIELD_IPV6_TCP);
3636 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3637 E1000_MRQC_RSS_FIELD_IPV6_UDP);
3638 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3639 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3640
3641 E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3642
3643 /*
3644 ** NOTE: Receive Full-Packet Checksum Offload
3645 ** is mutually exclusive with Multiqueue. However
3646 ** this is not the same as TCP/IP checksums which
3647 ** still work.
3648 */
3649 rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3650 rxcsum |= E1000_RXCSUM_PCSD;
3651 E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3652 } else if (ifp->if_capenable & IFCAP_RXCSUM) {
3653 rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3654 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3655 E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3656 }
3657
3658 /* Setup the Receive Control Register */
3659 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3660 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3661 E1000_RCTL_RDMTS_HALF |
3662 (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3663
3664 /* Make sure VLAN Filters are off */
3665 rctl &= ~E1000_RCTL_VFE;
3666
3667 rctl &= ~E1000_RCTL_SBP;
3668
3669 switch (adapter->rx_buffer_len) {
3670 default:
3671 case 2048:
3672 rctl |= E1000_RCTL_SZ_2048;
3673 break;
3674 case 4096:
3675 rctl |= E1000_RCTL_SZ_4096 |
3676 E1000_RCTL_BSEX | E1000_RCTL_LPE;
3677 break;
3678 case 8192:
3679 rctl |= E1000_RCTL_SZ_8192 |
3680 E1000_RCTL_BSEX | E1000_RCTL_LPE;
3681 break;
3682 case 16384:
3683 rctl |= E1000_RCTL_SZ_16384 |
3684 E1000_RCTL_BSEX | E1000_RCTL_LPE;
3685 break;
3686 }
3687
3688 if (ifp->if_mtu > ETHERMTU) {
3689 /* Set maximum packet len */
3690 psize = adapter->max_frame_size;
3691 /* are we on a vlan? */
3692 if (adapter->ifp->if_vlantrunk != NULL)
3693 psize += VLAN_TAG_SIZE;
3694 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3695 rctl |= E1000_RCTL_LPE;
3696 } else
3697 rctl &= ~E1000_RCTL_LPE;
3698
3699 /* Enable Receives */
3700 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3701
3702 /*
3703 * Setup the HW Rx Head and Tail Descriptor Pointers
3704 * - needs to be after enable
3705 */
3706 for (int i = 0; i < adapter->num_rx_queues; i++) {
3707 E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3708 E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3709 adapter->num_rx_desc - 1);
3710 }
3711 return;
3712 }
3713
3714 /*********************************************************************
3715 *
3716 * Free receive rings.
3717 *
3718 **********************************************************************/
3719 static void
3720 igb_free_receive_structures(struct adapter *adapter)
3721 {
3722 struct rx_ring *rxr = adapter->rx_rings;
3723
3724 for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3725 struct lro_ctrl *lro = &rxr->lro;
3726 igb_free_receive_buffers(rxr);
3727 tcp_lro_free(lro);
3728 igb_dma_free(adapter, &rxr->rxdma);
3729 }
3730
3731 free(adapter->rx_rings, M_DEVBUF);
3732 }
3733
3734 /*********************************************************************
3735 *
3736 * Free receive ring data structures.
3737 *
3738 **********************************************************************/
3739 static void
3740 igb_free_receive_buffers(struct rx_ring *rxr)
3741 {
3742 struct adapter *adapter = rxr->adapter;
3743 struct igb_buffer *rx_buffer;
3744
3745 INIT_DEBUGOUT("free_receive_structures: begin");
3746
3747 if (rxr->rx_spare_map) {
3748 bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3749 rxr->rx_spare_map = NULL;
3750 }
3751
3752 /* Cleanup any existing buffers */
3753 if (rxr->rx_buffers != NULL) {
3754 rx_buffer = &rxr->rx_buffers[0];
3755 for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3756 if (rx_buffer->m_head != NULL) {
3757 bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3758 BUS_DMASYNC_POSTREAD);
3759 bus_dmamap_unload(rxr->rxtag,
3760 rx_buffer->map);
3761 m_freem(rx_buffer->m_head);
3762 rx_buffer->m_head = NULL;
3763 } else if (rx_buffer->map != NULL)
3764 bus_dmamap_unload(rxr->rxtag,
3765 rx_buffer->map);
3766 if (rx_buffer->map != NULL) {
3767 bus_dmamap_destroy(rxr->rxtag,
3768 rx_buffer->map);
3769 rx_buffer->map = NULL;
3770 }
3771 }
3772 }
3773
3774 if (rxr->rx_buffers != NULL) {
3775 free(rxr->rx_buffers, M_DEVBUF);
3776 rxr->rx_buffers = NULL;
3777 }
3778
3779 if (rxr->rxtag != NULL) {
3780 bus_dma_tag_destroy(rxr->rxtag);
3781 rxr->rxtag = NULL;
3782 }
3783 }
3784 /*********************************************************************
3785 *
3786 * This routine executes in interrupt context. It replenishes
3787 * the mbufs in the descriptor and sends data which has been
3788 * dma'ed into host memory to upper layer.
3789 *
3790 * We loop at most count times if count is > 0, or until done if
3791 * count < 0.
3792 *
3793 * Return TRUE if all clean, FALSE otherwise
3794 *********************************************************************/
3795 static bool
3796 igb_rxeof(struct rx_ring *rxr, int count)
3797 {
3798 struct adapter *adapter = rxr->adapter;
3799 struct ifnet *ifp;
3800 struct lro_ctrl *lro = &rxr->lro;
3801 struct lro_entry *queued;
3802 struct mbuf *mp;
3803 uint8_t accept_frame = 0;
3804 uint8_t eop = 0;
3805 uint16_t len, desc_len, prev_len_adj;
3806 int i;
3807 u32 staterr;
3808 union e1000_adv_rx_desc *cur;
3809
3810 IGB_RX_LOCK(rxr);
3811 ifp = adapter->ifp;
3812 i = rxr->next_to_check;
3813 cur = &rxr->rx_base[i];
3814 staterr = cur->wb.upper.status_error;
3815
3816 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3817 BUS_DMASYNC_POSTREAD);
3818
3819 if (!(staterr & E1000_RXD_STAT_DD)) {
3820 IGB_RX_UNLOCK(rxr);
3821 return FALSE;
3822 }
3823
3824 while ((staterr & E1000_RXD_STAT_DD) &&
3825 (count != 0) &&
3826 (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3827 struct mbuf *m = NULL;
3828
3829 mp = rxr->rx_buffers[i].m_head;
3830 /*
3831 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3832 * needs to access the last received byte in the mbuf.
3833 */
3834 bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
3835 BUS_DMASYNC_POSTREAD);
3836
3837 accept_frame = 1;
3838 prev_len_adj = 0;
3839 desc_len = le16toh(cur->wb.upper.length);
3840 if (staterr & E1000_RXD_STAT_EOP) {
3841 count--;
3842 eop = 1;
3843 if (desc_len < ETHER_CRC_LEN) {
3844 len = 0;
3845 prev_len_adj = ETHER_CRC_LEN - desc_len;
3846 } else
3847 len = desc_len - ETHER_CRC_LEN;
3848 } else {
3849 eop = 0;
3850 len = desc_len;
3851 }
3852
3853 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
3854 u32 pkt_len = desc_len;
3855
3856 if (rxr->fmp != NULL)
3857 pkt_len += rxr->fmp->m_pkthdr.len;
3858
3859 accept_frame = 0;
3860 }
3861
3862 if (accept_frame) {
3863 if (igb_get_buf(rxr, i) != 0) {
3864 ifp->if_iqdrops++;
3865 goto discard;
3866 }
3867
3868 /* Assign correct length to the current fragment */
3869 mp->m_len = len;
3870
3871 if (rxr->fmp == NULL) {
3872 mp->m_pkthdr.len = len;
3873 rxr->fmp = mp; /* Store the first mbuf */
3874 rxr->lmp = mp;
3875 } else {
3876 /* Chain mbuf's together */
3877 mp->m_flags &= ~M_PKTHDR;
3878 /*
3879 * Adjust length of previous mbuf in chain if
3880 * we received less than 4 bytes in the last
3881 * descriptor.
3882 */
3883 if (prev_len_adj > 0) {
3884 rxr->lmp->m_len -= prev_len_adj;
3885 rxr->fmp->m_pkthdr.len -=
3886 prev_len_adj;
3887 }
3888 rxr->lmp->m_next = mp;
3889 rxr->lmp = rxr->lmp->m_next;
3890 rxr->fmp->m_pkthdr.len += len;
3891 }
3892
3893 if (eop) {
3894 rxr->fmp->m_pkthdr.rcvif = ifp;
3895 ifp->if_ipackets++;
3896 rxr->rx_packets++;
3897 rxr->bytes += rxr->fmp->m_pkthdr.len;
3898 rxr->rx_bytes += rxr->bytes;
3899
3900 igb_rx_checksum(staterr, rxr->fmp);
3901 #ifndef __NO_STRICT_ALIGNMENT
3902 if (adapter->max_frame_size >
3903 (MCLBYTES - ETHER_ALIGN) &&
3904 igb_fixup_rx(rxr) != 0)
3905 goto skip;
3906 #endif
3907 if (staterr & E1000_RXD_STAT_VP) {
3908 rxr->fmp->m_pkthdr.ether_vtag =
3909 le16toh(cur->wb.upper.vlan);
3910 rxr->fmp->m_flags |= M_VLANTAG;
3911 }
3912 #ifndef __NO_STRICT_ALIGNMENT
3913 skip:
3914 #endif
3915 m = rxr->fmp;
3916 rxr->fmp = NULL;
3917 rxr->lmp = NULL;
3918 }
3919 } else {
3920 ifp->if_ierrors++;
3921 discard:
3922 /* Reuse loaded DMA map and just update mbuf chain */
3923 mp = rxr->rx_buffers[i].m_head;
3924 mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3925 mp->m_data = mp->m_ext.ext_buf;
3926 mp->m_next = NULL;
3927 if (adapter->max_frame_size <=
3928 (MCLBYTES - ETHER_ALIGN))
3929 m_adj(mp, ETHER_ALIGN);
3930 if (rxr->fmp != NULL) {
3931 m_freem(rxr->fmp);
3932 rxr->fmp = NULL;
3933 rxr->lmp = NULL;
3934 }
3935 m = NULL;
3936 }
3937
3938 /* Zero out the receive descriptors status. */
3939 cur->wb.upper.status_error = 0;
3940 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3941 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3942
3943 rxr->last_cleaned = i; /* For updating tail */
3944
3945 /* Advance our pointers to the next descriptor. */
3946 if (++i == adapter->num_rx_desc)
3947 i = 0;
3948
3949 if (m != NULL) {
3950 rxr->next_to_check = i;
3951 /* Use LRO if possible */
3952 if ((!lro->lro_cnt) || (tcp_lro_rx(lro, m, 0))) {
3953 /* Pass up to the stack */
3954 (*ifp->if_input)(ifp, m);
3955 i = rxr->next_to_check;
3956 }
3957 }
3958 /* Get the next descriptor */
3959 cur = &rxr->rx_base[i];
3960 staterr = cur->wb.upper.status_error;
3961 }
3962 rxr->next_to_check = i;
3963
3964 /* Advance the E1000's Receive Queue #0 "Tail Pointer". */
3965 E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
3966
3967 /*
3968 * Flush any outstanding LRO work
3969 */
3970 while (!SLIST_EMPTY(&lro->lro_active)) {
3971 queued = SLIST_FIRST(&lro->lro_active);
3972 SLIST_REMOVE_HEAD(&lro->lro_active, next);
3973 tcp_lro_flush(lro, queued);
3974 }
3975
3976 IGB_RX_UNLOCK(rxr);
3977
3978 if (!((staterr) & E1000_RXD_STAT_DD))
3979 return FALSE;
3980
3981 return TRUE;
3982 }
3983
3984 #ifndef __NO_STRICT_ALIGNMENT
3985 /*
3986 * When jumbo frames are enabled we should realign entire payload on
3987 * architecures with strict alignment. This is serious design mistake of 8254x
3988 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3989 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3990 * payload. On architecures without strict alignment restrictions 8254x still
3991 * performs unaligned memory access which would reduce the performance too.
3992 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3993 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3994 * existing mbuf chain.
3995 *
3996 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3997 * not used at all on architectures with strict alignment.
3998 */
3999 static int
4000 igb_fixup_rx(struct rx_ring *rxr)
4001 {
4002 struct adapter *adapter = rxr->adapter;
4003 struct mbuf *m, *n;
4004 int error;
4005
4006 error = 0;
4007 m = rxr->fmp;
4008 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4009 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4010 m->m_data += ETHER_HDR_LEN;
4011 } else {
4012 MGETHDR(n, M_DONTWAIT, MT_DATA);
4013 if (n != NULL) {
4014 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4015 m->m_data += ETHER_HDR_LEN;
4016 m->m_len -= ETHER_HDR_LEN;
4017 n->m_len = ETHER_HDR_LEN;
4018 M_MOVE_PKTHDR(n, m);
4019 n->m_next = m;
4020 rxr->fmp = n;
4021 } else {
4022 adapter->dropped_pkts++;
4023 m_freem(rxr->fmp);
4024 rxr->fmp = NULL;
4025 error = ENOMEM;
4026 }
4027 }
4028
4029 return (error);
4030 }
4031 #endif
4032
4033 /*********************************************************************
4034 *
4035 * Verify that the hardware indicated that the checksum is valid.
4036 * Inform the stack about the status of checksum so that stack
4037 * doesn't spend time verifying the checksum.
4038 *
4039 *********************************************************************/
4040 static void
4041 igb_rx_checksum(u32 staterr, struct mbuf *mp)
4042 {
4043 u16 status = (u16)staterr;
4044 u8 errors = (u8) (staterr >> 24);
4045
4046 /* Ignore Checksum bit is set */
4047 if (status & E1000_RXD_STAT_IXSM) {
4048 mp->m_pkthdr.csum_flags = 0;
4049 return;
4050 }
4051
4052 if (status & E1000_RXD_STAT_IPCS) {
4053 /* Did it pass? */
4054 if (!(errors & E1000_RXD_ERR_IPE)) {
4055 /* IP Checksum Good */
4056 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4057 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4058
4059 } else
4060 mp->m_pkthdr.csum_flags = 0;
4061 }
4062
4063 if (status & E1000_RXD_STAT_TCPCS) {
4064 /* Did it pass? */
4065 if (!(errors & E1000_RXD_ERR_TCPE)) {
4066 mp->m_pkthdr.csum_flags |=
4067 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4068 mp->m_pkthdr.csum_data = htons(0xffff);
4069 }
4070 }
4071 return;
4072 }
4073
4074 #ifdef IGB_HW_VLAN_SUPPORT
4075 /*
4076 * This routine is run via an vlan
4077 * config EVENT
4078 */
4079 static void
4080 igb_register_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4081 {
4082 struct adapter *adapter = ifp->if_softc;
4083 u32 ctrl, rctl, index, vfta;
4084
4085 /* Shouldn't happen */
4086 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
4087 return;
4088
4089 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4090 ctrl |= E1000_CTRL_VME;
4091 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4092
4093 /* Setup for Hardware Filter */
4094 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4095 rctl |= E1000_RCTL_VFE;
4096 rctl &= ~E1000_RCTL_CFIEN;
4097 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4098
4099 /* Make entry in the hardware filter table */
4100 index = ((vtag >> 5) & 0x7F);
4101 vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4102 vfta |= (1 << (vtag & 0x1F));
4103 E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4104
4105 /* Update the frame size */
4106 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4107 adapter->max_frame_size + VLAN_TAG_SIZE);
4108
4109 }
4110
4111 /*
4112 * This routine is run via an vlan
4113 * unconfig EVENT
4114 */
4115 static void
4116 igb_unregister_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4117 {
4118 struct adapter *adapter = ifp->if_softc;
4119 u32 index, vfta;
4120
4121 /* Shouldn't happen */
4122 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
4123 return;
4124
4125 /* Remove entry in the hardware filter table */
4126 index = ((vtag >> 5) & 0x7F);
4127 vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4128 vfta &= ~(1 << (vtag & 0x1F));
4129 E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4130 /* Have all vlans unregistered? */
4131 if (adapter->ifp->if_vlantrunk == NULL) {
4132 u32 rctl;
4133 /* Turn off the filter table */
4134 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4135 rctl &= ~E1000_RCTL_VFE;
4136 rctl |= E1000_RCTL_CFIEN;
4137 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4138 /* Reset the frame size */
4139 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4140 adapter->max_frame_size);
4141 }
4142 }
4143 #endif /* IGB_HW_VLAN_SUPPORT */
4144
4145 static void
4146 igb_enable_intr(struct adapter *adapter)
4147 {
4148 /* With RSS set up what to auto clear */
4149 if (adapter->msix_mem) {
4150 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4151 adapter->eims_mask);
4152 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4153 adapter->eims_mask);
4154 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4155 adapter->eims_mask);
4156 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4157 E1000_IMS_LSC);
4158 } else {
4159 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4160 IMS_ENABLE_MASK);
4161 }
4162 E1000_WRITE_FLUSH(&adapter->hw);
4163
4164 return;
4165 }
4166
4167 static void
4168 igb_disable_intr(struct adapter *adapter)
4169 {
4170 if (adapter->msix_mem) {
4171 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4172 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4173 }
4174 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4175 E1000_WRITE_FLUSH(&adapter->hw);
4176 return;
4177 }
4178
4179 /*
4180 * Bit of a misnomer, what this really means is
4181 * to enable OS management of the system... aka
4182 * to disable special hardware management features
4183 */
4184 static void
4185 igb_init_manageability(struct adapter *adapter)
4186 {
4187 /* A shared code workaround */
4188 #define E1000_82542_MANC2H E1000_MANC2H
4189 if (adapter->has_manage) {
4190 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4191 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4192
4193 /* disable hardware interception of ARP */
4194 manc &= ~(E1000_MANC_ARP_EN);
4195
4196 /* enable receiving management packets to the host */
4197 manc |= E1000_MANC_EN_MNG2HOST;
4198 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4199 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4200 manc2h |= E1000_MNG2HOST_PORT_623;
4201 manc2h |= E1000_MNG2HOST_PORT_664;
4202 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4203
4204 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4205 }
4206 }
4207
4208 /*
4209 * Give control back to hardware management
4210 * controller if there is one.
4211 */
4212 static void
4213 igb_release_manageability(struct adapter *adapter)
4214 {
4215 if (adapter->has_manage) {
4216 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4217
4218 /* re-enable hardware interception of ARP */
4219 manc |= E1000_MANC_ARP_EN;
4220 manc &= ~E1000_MANC_EN_MNG2HOST;
4221
4222 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4223 }
4224 }
4225
4226 /*
4227 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4228 * For ASF and Pass Through versions of f/w this means that
4229 * the driver is loaded.
4230 *
4231 */
4232 static void
4233 igb_get_hw_control(struct adapter *adapter)
4234 {
4235 u32 ctrl_ext;
4236
4237 /* Let firmware know the driver has taken over */
4238 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4239 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4240 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4241 }
4242
4243 /*
4244 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4245 * For ASF and Pass Through versions of f/w this means that the
4246 * driver is no longer loaded.
4247 *
4248 */
4249 static void
4250 igb_release_hw_control(struct adapter *adapter)
4251 {
4252 u32 ctrl_ext;
4253
4254 /* Let firmware taken over control of h/w */
4255 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4256 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4257 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4258 }
4259
4260 static int
4261 igb_is_valid_ether_addr(uint8_t *addr)
4262 {
4263 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4264
4265 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4266 return (FALSE);
4267 }
4268
4269 return (TRUE);
4270 }
4271
4272
4273 /*
4274 * Enable PCI Wake On Lan capability
4275 */
4276 void
4277 igb_enable_wakeup(device_t dev)
4278 {
4279 u16 cap, status;
4280 u8 id;
4281
4282 /* First find the capabilities pointer*/
4283 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4284 /* Read the PM Capabilities */
4285 id = pci_read_config(dev, cap, 1);
4286 if (id != PCIY_PMG) /* Something wrong */
4287 return;
4288 /* OK, we have the power capabilities, so
4289 now get the status register */
4290 cap += PCIR_POWER_STATUS;
4291 status = pci_read_config(dev, cap, 2);
4292 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4293 pci_write_config(dev, cap, status, 2);
4294 return;
4295 }
4296
4297
4298 /**********************************************************************
4299 *
4300 * Update the board statistics counters.
4301 *
4302 **********************************************************************/
4303 static void
4304 igb_update_stats_counters(struct adapter *adapter)
4305 {
4306 struct ifnet *ifp;
4307
4308 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4309 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4310 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4311 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4312 }
4313 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4314 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4315 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4316 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4317
4318 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4319 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4320 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4321 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4322 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4323 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4324 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4325 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4326 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4327 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4328 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4329 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4330 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4331 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4332 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4333 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4334 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4335 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4336 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4337 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4338
4339 /* For the 64-bit byte counters the low dword must be read first. */
4340 /* Both registers clear on the read of the high dword */
4341
4342 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4343 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4344
4345 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4346 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4347 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4348 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4349 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4350
4351 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4352 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4353
4354 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4355 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4356 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4357 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4358 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4359 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4360 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4361 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4362 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4363 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4364
4365 adapter->stats.algnerrc +=
4366 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4367 adapter->stats.rxerrc +=
4368 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4369 adapter->stats.tncrs +=
4370 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4371 adapter->stats.cexterr +=
4372 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4373 adapter->stats.tsctc +=
4374 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4375 adapter->stats.tsctfc +=
4376 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4377 ifp = adapter->ifp;
4378
4379 ifp->if_collisions = adapter->stats.colc;
4380
4381 /* Rx Errors */
4382 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4383 adapter->stats.crcerrs + adapter->stats.algnerrc +
4384 adapter->stats.ruc + adapter->stats.roc +
4385 adapter->stats.mpc + adapter->stats.cexterr;
4386
4387 /* Tx Errors */
4388 ifp->if_oerrors = adapter->stats.ecol +
4389 adapter->stats.latecol + adapter->watchdog_events;
4390 }
4391
4392
4393 /**********************************************************************
4394 *
4395 * This routine is called only when igb_display_debug_stats is enabled.
4396 * This routine provides a way to take a look at important statistics
4397 * maintained by the driver and hardware.
4398 *
4399 **********************************************************************/
4400 static void
4401 igb_print_debug_info(struct adapter *adapter)
4402 {
4403 device_t dev = adapter->dev;
4404 struct rx_ring *rxr = adapter->rx_rings;
4405 struct tx_ring *txr = adapter->tx_rings;
4406 uint8_t *hw_addr = adapter->hw.hw_addr;
4407
4408 device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4409 device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4410 E1000_READ_REG(&adapter->hw, E1000_CTRL),
4411 E1000_READ_REG(&adapter->hw, E1000_RCTL));
4412
4413 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4414 device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4415 E1000_READ_REG(&adapter->hw, E1000_IMS),
4416 E1000_READ_REG(&adapter->hw, E1000_EIMS));
4417 #endif
4418
4419 device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4420 ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4421 (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4422 device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4423 adapter->hw.fc.high_water,
4424 adapter->hw.fc.low_water);
4425
4426 for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4427 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4428 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4429 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4430 device_printf(dev, "no descriptors avail event = %lld\n",
4431 (long long)txr->no_desc_avail);
4432 device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4433 (long long)txr->tx_irq);
4434 device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4435 (long long)txr->tx_packets);
4436 }
4437
4438 for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4439 struct lro_ctrl *lro = &rxr->lro;
4440 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4441 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4442 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4443 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4444 (long long)rxr->rx_packets);
4445 device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4446 (long long)rxr->rx_bytes);
4447 device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4448 (long long)rxr->rx_irq);
4449 device_printf(dev,"RX(%d) LRO Queued= %d\n",
4450 rxr->me, lro->lro_queued);
4451 device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4452 rxr->me, lro->lro_flushed);
4453 }
4454
4455 device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4456
4457 device_printf(dev, "Std mbuf failed = %ld\n",
4458 adapter->mbuf_alloc_failed);
4459 device_printf(dev, "Std mbuf cluster failed = %ld\n",
4460 adapter->mbuf_cluster_failed);
4461 device_printf(dev, "Driver dropped packets = %ld\n",
4462 adapter->dropped_pkts);
4463 device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4464 adapter->no_tx_dma_setup);
4465 }
4466
4467 static void
4468 igb_print_hw_stats(struct adapter *adapter)
4469 {
4470 device_t dev = adapter->dev;
4471
4472 device_printf(dev, "Excessive collisions = %lld\n",
4473 (long long)adapter->stats.ecol);
4474 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4475 device_printf(dev, "Symbol errors = %lld\n",
4476 (long long)adapter->stats.symerrs);
4477 #endif
4478 device_printf(dev, "Sequence errors = %lld\n",
4479 (long long)adapter->stats.sec);
4480 device_printf(dev, "Defer count = %lld\n",
4481 (long long)adapter->stats.dc);
4482 device_printf(dev, "Missed Packets = %lld\n",
4483 (long long)adapter->stats.mpc);
4484 device_printf(dev, "Receive No Buffers = %lld\n",
4485 (long long)adapter->stats.rnbc);
4486 /* RLEC is inaccurate on some hardware, calculate our own. */
4487 device_printf(dev, "Receive Length Errors = %lld\n",
4488 ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4489 device_printf(dev, "Receive errors = %lld\n",
4490 (long long)adapter->stats.rxerrc);
4491 device_printf(dev, "Crc errors = %lld\n",
4492 (long long)adapter->stats.crcerrs);
4493 device_printf(dev, "Alignment errors = %lld\n",
4494 (long long)adapter->stats.algnerrc);
4495 /* On 82575 these are collision counts */
4496 device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4497 (long long)adapter->stats.cexterr);
4498 device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4499 device_printf(dev, "watchdog timeouts = %ld\n",
4500 adapter->watchdog_events);
4501 device_printf(dev, "XON Rcvd = %lld\n",
4502 (long long)adapter->stats.xonrxc);
4503 device_printf(dev, "XON Xmtd = %lld\n",
4504 (long long)adapter->stats.xontxc);
4505 device_printf(dev, "XOFF Rcvd = %lld\n",
4506 (long long)adapter->stats.xoffrxc);
4507 device_printf(dev, "XOFF Xmtd = %lld\n",
4508 (long long)adapter->stats.xofftxc);
4509 device_printf(dev, "Good Packets Rcvd = %lld\n",
4510 (long long)adapter->stats.gprc);
4511 device_printf(dev, "Good Packets Xmtd = %lld\n",
4512 (long long)adapter->stats.gptc);
4513 device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4514 (long long)adapter->stats.tsctc);
4515 device_printf(dev, "TSO Contexts Failed = %lld\n",
4516 (long long)adapter->stats.tsctfc);
4517 }
4518
4519 /**********************************************************************
4520 *
4521 * This routine provides a way to dump out the adapter eeprom,
4522 * often a useful debug/service tool. This only dumps the first
4523 * 32 words, stuff that matters is in that extent.
4524 *
4525 **********************************************************************/
4526 static void
4527 igb_print_nvm_info(struct adapter *adapter)
4528 {
4529 u16 eeprom_data;
4530 int i, j, row = 0;
4531
4532 /* Its a bit crude, but it gets the job done */
4533 printf("\nInterface EEPROM Dump:\n");
4534 printf("Offset\n0x0000 ");
4535 for (i = 0, j = 0; i < 32; i++, j++) {
4536 if (j == 8) { /* Make the offset block */
4537 j = 0; ++row;
4538 printf("\n0x00%x0 ",row);
4539 }
4540 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4541 printf("%04x ", eeprom_data);
4542 }
4543 printf("\n");
4544 }
4545
4546 static int
4547 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4548 {
4549 struct adapter *adapter;
4550 int error;
4551 int result;
4552
4553 result = -1;
4554 error = sysctl_handle_int(oidp, &result, 0, req);
4555
4556 if (error || !req->newptr)
4557 return (error);
4558
4559 if (result == 1) {
4560 adapter = (struct adapter *)arg1;
4561 igb_print_debug_info(adapter);
4562 }
4563 /*
4564 * This value will cause a hex dump of the
4565 * first 32 16-bit words of the EEPROM to
4566 * the screen.
4567 */
4568 if (result == 2) {
4569 adapter = (struct adapter *)arg1;
4570 igb_print_nvm_info(adapter);
4571 }
4572
4573 return (error);
4574 }
4575
4576
4577 static int
4578 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4579 {
4580 struct adapter *adapter;
4581 int error;
4582 int result;
4583
4584 result = -1;
4585 error = sysctl_handle_int(oidp, &result, 0, req);
4586
4587 if (error || !req->newptr)
4588 return (error);
4589
4590 if (result == 1) {
4591 adapter = (struct adapter *)arg1;
4592 igb_print_hw_stats(adapter);
4593 }
4594
4595 return (error);
4596 }
4597
4598 static void
4599 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4600 const char *description, int *limit, int value)
4601 {
4602 *limit = value;
4603 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4604 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4605 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4606 }
4607
4608 #ifdef IGB_TIMESYNC
4609 /*
4610 * Initialize the Time Sync Feature
4611 */
4612 static int
4613 igb_tsync_init(struct adapter *adapter)
4614 {
4615 device_t dev = adapter->dev;
4616 u32 tx_ctl, rx_ctl, val;
4617
4618
4619 E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
4620 20833/PICOSECS_PER_TICK);
4621
4622 adapter->last_stamp = E1000_READ_REG(&adapter->hw, E1000_SYSTIML);
4623 adapter->last_stamp |= (u64)E1000_READ_REG(&adapter->hw,
4624 E1000_SYSTIMH) << 32ULL;
4625
4626 /* Enable the TX side */
4627 tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4628 tx_ctl |= 0x10;
4629 E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4630 E1000_WRITE_FLUSH(&adapter->hw);
4631
4632 tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4633 if ((tx_ctl & 0x10) == 0) {
4634 device_printf(dev, "Failed to enable TX timestamping\n");
4635 return (ENXIO);
4636 }
4637
4638 /* Enable RX */
4639 rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4640 rx_ctl |= 0x10; /* Enable the feature */
4641 rx_ctl |= 0x04; /* This value turns on Ver 1 and 2 */
4642 E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4643
4644 /*
4645 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7 (Ethertype)
4646 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4647 * Ethertype Filter Queue Filter[0][31] = 0x1 (Enable Timestamping)
4648 */
4649 E1000_WRITE_REG(&adapter->hw, E1000_ETQF(0), 0x440088f7);
4650 E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCFG, 0x0);
4651
4652 /*
4653 * Source Port Queue Filter Setup:
4654 * this is for UDP port filtering
4655 */
4656 E1000_WRITE_REG(&adapter->hw, E1000_SPQF(0), TSYNC_PORT);
4657 /* Protocol = UDP, enable Timestamp, and filter on source/protocol */
4658 val = (0x11 | (1 << 27) | (6 << 28));
4659 E1000_WRITE_REG(&adapter->hw, E1000_FTQF(0), val);
4660
4661 E1000_WRITE_FLUSH(&adapter->hw);
4662
4663 rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4664 if ((rx_ctl & 0x10) == 0) {
4665 device_printf(dev, "Failed to enable RX timestamping\n");
4666 return (ENXIO);
4667 }
4668
4669 device_printf(dev, "IEEE 1588 Precision Time Protocol enabled\n");
4670
4671 return (0);
4672 }
4673
4674 /*
4675 * Disable the Time Sync Feature
4676 */
4677 static void
4678 igb_tsync_disable(struct adapter *adapter)
4679 {
4680 u32 tx_ctl, rx_ctl;
4681
4682 tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4683 tx_ctl &= ~0x10;
4684 E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4685 E1000_WRITE_FLUSH(&adapter->hw);
4686
4687 /* Invalidate TX Timestamp */
4688 E1000_READ_REG(&adapter->hw, E1000_TXSTMPH);
4689
4690 tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4691 if (tx_ctl & 0x10)
4692 HW_DEBUGOUT("Failed to disable TX timestamping\n");
4693
4694 rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4695 rx_ctl &= ~0x10;
4696
4697 E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4698 E1000_WRITE_FLUSH(&adapter->hw);
4699
4700 /* Invalidate RX Timestamp */
4701 E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
4702
4703 rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4704 if (rx_ctl & 0x10)
4705 HW_DEBUGOUT("Failed to disable RX timestamping\n");
4706
4707 return;
4708 }
4709
4710 #endif /* IGB_TIMESYNC */
Cache object: c89e4e424372e17e44bf4a24903eee75
|