FreeBSD/Linux Kernel Cross Reference
sys/dev/e1000/if_igb.c
1 /******************************************************************************
2
3 Copyright (c) 2001-2009, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: releng/8.0/sys/dev/e1000/if_igb.c 199325 2009-11-16 18:46:33Z jfv $*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <sys/pcpu.h>
60 #include <sys/smp.h>
61 #include <machine/smp.h>
62 #include <machine/bus.h>
63 #include <machine/resource.h>
64
65 #ifdef IGB_IEEE1588
66 #include <sys/ieee1588.h>
67 #endif
68
69 #include <net/bpf.h>
70 #include <net/ethernet.h>
71 #include <net/if.h>
72 #include <net/if_arp.h>
73 #include <net/if_dl.h>
74 #include <net/if_media.h>
75
76 #include <net/if_types.h>
77 #include <net/if_vlan_var.h>
78
79 #include <netinet/in_systm.h>
80 #include <netinet/in.h>
81 #include <netinet/if_ether.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip6.h>
84 #include <netinet/tcp.h>
85 #include <netinet/tcp_lro.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97 * Set this to one to display debug statistics
98 *********************************************************************/
99 int igb_display_debug_stats = 0;
100
101 /*********************************************************************
102 * Driver version:
103 *********************************************************************/
104 char igb_driver_version[] = "version - 1.7.3";
105
106
107 /*********************************************************************
108 * PCI Device ID Table
109 *
110 * Used by probe to select devices to load on
111 * Last field stores an index into e1000_strings
112 * Last entry must be all 0s
113 *
114 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123 PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131 PCI_ANY_ID, PCI_ANY_ID, 0},
132 /* required last entry */
133 { 0, 0, 0, 0, 0}
134 };
135
136 /*********************************************************************
137 * Table of branding strings for all supported NICs.
138 *********************************************************************/
139
140 static char *igb_strings[] = {
141 "Intel(R) PRO/1000 Network Connection"
142 };
143
144 /*********************************************************************
145 * Function prototypes
146 *********************************************************************/
147 static int igb_probe(device_t);
148 static int igb_attach(device_t);
149 static int igb_detach(device_t);
150 static int igb_shutdown(device_t);
151 static int igb_suspend(device_t);
152 static int igb_resume(device_t);
153 static void igb_start(struct ifnet *);
154 static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
155 #if __FreeBSD_version >= 800000
156 static int igb_mq_start(struct ifnet *, struct mbuf *);
157 static int igb_mq_start_locked(struct ifnet *,
158 struct tx_ring *, struct mbuf *);
159 static void igb_qflush(struct ifnet *);
160 #endif
161 static int igb_ioctl(struct ifnet *, u_long, caddr_t);
162 static void igb_watchdog(struct adapter *);
163 static void igb_init(void *);
164 static void igb_init_locked(struct adapter *);
165 static void igb_stop(void *);
166 static void igb_media_status(struct ifnet *, struct ifmediareq *);
167 static int igb_media_change(struct ifnet *);
168 static void igb_identify_hardware(struct adapter *);
169 static int igb_allocate_pci_resources(struct adapter *);
170 static int igb_allocate_msix(struct adapter *);
171 static int igb_allocate_legacy(struct adapter *);
172 static int igb_setup_msix(struct adapter *);
173 static void igb_free_pci_resources(struct adapter *);
174 static void igb_local_timer(void *);
175 static int igb_hardware_init(struct adapter *);
176 static void igb_setup_interface(device_t, struct adapter *);
177 static int igb_allocate_queues(struct adapter *);
178 static void igb_configure_queues(struct adapter *);
179
180 static int igb_allocate_transmit_buffers(struct tx_ring *);
181 static void igb_setup_transmit_structures(struct adapter *);
182 static void igb_setup_transmit_ring(struct tx_ring *);
183 static void igb_initialize_transmit_units(struct adapter *);
184 static void igb_free_transmit_structures(struct adapter *);
185 static void igb_free_transmit_buffers(struct tx_ring *);
186
187 static int igb_allocate_receive_buffers(struct rx_ring *);
188 static int igb_setup_receive_structures(struct adapter *);
189 static int igb_setup_receive_ring(struct rx_ring *);
190 static void igb_initialize_receive_units(struct adapter *);
191 static void igb_free_receive_structures(struct adapter *);
192 static void igb_free_receive_buffers(struct rx_ring *);
193
194 static void igb_enable_intr(struct adapter *);
195 static void igb_disable_intr(struct adapter *);
196 static void igb_update_stats_counters(struct adapter *);
197 static bool igb_txeof(struct tx_ring *);
198 static bool igb_rxeof(struct rx_ring *, int);
199 static void igb_rx_checksum(u32, struct mbuf *, bool);
200 static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
201 static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
202 static void igb_set_promisc(struct adapter *);
203 static void igb_disable_promisc(struct adapter *);
204 static void igb_set_multi(struct adapter *);
205 static void igb_print_hw_stats(struct adapter *);
206 static void igb_update_link_status(struct adapter *);
207 static int igb_get_buf(struct rx_ring *, int, u8);
208
209 static void igb_register_vlan(void *, struct ifnet *, u16);
210 static void igb_unregister_vlan(void *, struct ifnet *, u16);
211 static void igb_setup_vlan_hw_support(struct adapter *);
212
213 static int igb_xmit(struct tx_ring *, struct mbuf **);
214 static int igb_dma_malloc(struct adapter *, bus_size_t,
215 struct igb_dma_alloc *, int);
216 static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
217 static void igb_print_debug_info(struct adapter *);
218 static void igb_print_nvm_info(struct adapter *);
219 static int igb_is_valid_ether_addr(u8 *);
220 static int igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
221 static int igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
222 /* Management and WOL Support */
223 static void igb_init_manageability(struct adapter *);
224 static void igb_release_manageability(struct adapter *);
225 static void igb_get_hw_control(struct adapter *);
226 static void igb_release_hw_control(struct adapter *);
227 static void igb_enable_wakeup(device_t);
228
229 static int igb_irq_fast(void *);
230 static void igb_add_rx_process_limit(struct adapter *, const char *,
231 const char *, int *, int);
232 static void igb_handle_rxtx(void *context, int pending);
233 static void igb_handle_tx(void *context, int pending);
234 static void igb_handle_rx(void *context, int pending);
235
236 /* These are MSIX only irq handlers */
237 static void igb_msix_rx(void *);
238 static void igb_msix_tx(void *);
239 static void igb_msix_link(void *);
240
241 /* Adaptive Interrupt Moderation */
242 static void igb_update_aim(struct rx_ring *);
243
244 /*********************************************************************
245 * FreeBSD Device Interface Entry Points
246 *********************************************************************/
247
248 static device_method_t igb_methods[] = {
249 /* Device interface */
250 DEVMETHOD(device_probe, igb_probe),
251 DEVMETHOD(device_attach, igb_attach),
252 DEVMETHOD(device_detach, igb_detach),
253 DEVMETHOD(device_shutdown, igb_shutdown),
254 DEVMETHOD(device_suspend, igb_suspend),
255 DEVMETHOD(device_resume, igb_resume),
256 {0, 0}
257 };
258
259 static driver_t igb_driver = {
260 "igb", igb_methods, sizeof(struct adapter),
261 };
262
263 static devclass_t igb_devclass;
264 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
265 MODULE_DEPEND(igb, pci, 1, 1, 1);
266 MODULE_DEPEND(igb, ether, 1, 1, 1);
267
268 /*********************************************************************
269 * Tunable default values.
270 *********************************************************************/
271
272 /* Descriptor defaults */
273 static int igb_rxd = IGB_DEFAULT_RXD;
274 static int igb_txd = IGB_DEFAULT_TXD;
275 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
276 TUNABLE_INT("hw.igb.txd", &igb_txd);
277
278 /*
279 ** These parameters are used in Adaptive
280 ** Interrupt Moderation. The value is set
281 ** into EITR and controls the interrupt
282 ** frequency. A variable static scheme can
283 ** be created by changing the assigned value
284 ** of igb_ave_latency to the desired value,
285 ** and then set igb_enable_aim to FALSE.
286 ** This will result in all EITR registers
287 ** getting set to that value statically.
288 */
289 static int igb_enable_aim = TRUE;
290 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
291 static int igb_low_latency = IGB_LOW_LATENCY;
292 TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
293 static int igb_ave_latency = IGB_AVE_LATENCY;
294 TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
295 static int igb_bulk_latency = IGB_BULK_LATENCY;
296 TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
297
298 /*
299 ** This will autoconfigure based on the number
300 ** of CPUs if set to 0. Only a matched pair of
301 ** TX and RX rings are allowed.
302 */
303 static int igb_num_queues = 1;
304 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
305
306 /* How many packets rxeof tries to clean at a time */
307 static int igb_rx_process_limit = 100;
308 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
309
310 /* Flow control setting - default to FULL */
311 static int igb_fc_setting = e1000_fc_full;
312 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
313
314 /*
315 ** Shadow VFTA table, this is needed because
316 ** the real filter table gets cleared during
317 ** a soft reset and the driver needs to be able
318 ** to repopulate it.
319 */
320 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
321
322
323 /*********************************************************************
324 * Device identification routine
325 *
326 * igb_probe determines if the driver should be loaded on
327 * adapter based on PCI vendor/device id of the adapter.
328 *
329 * return BUS_PROBE_DEFAULT on success, positive on failure
330 *********************************************************************/
331
332 static int
333 igb_probe(device_t dev)
334 {
335 char adapter_name[60];
336 uint16_t pci_vendor_id = 0;
337 uint16_t pci_device_id = 0;
338 uint16_t pci_subvendor_id = 0;
339 uint16_t pci_subdevice_id = 0;
340 igb_vendor_info_t *ent;
341
342 INIT_DEBUGOUT("igb_probe: begin");
343
344 pci_vendor_id = pci_get_vendor(dev);
345 if (pci_vendor_id != IGB_VENDOR_ID)
346 return (ENXIO);
347
348 pci_device_id = pci_get_device(dev);
349 pci_subvendor_id = pci_get_subvendor(dev);
350 pci_subdevice_id = pci_get_subdevice(dev);
351
352 ent = igb_vendor_info_array;
353 while (ent->vendor_id != 0) {
354 if ((pci_vendor_id == ent->vendor_id) &&
355 (pci_device_id == ent->device_id) &&
356
357 ((pci_subvendor_id == ent->subvendor_id) ||
358 (ent->subvendor_id == PCI_ANY_ID)) &&
359
360 ((pci_subdevice_id == ent->subdevice_id) ||
361 (ent->subdevice_id == PCI_ANY_ID))) {
362 sprintf(adapter_name, "%s %s",
363 igb_strings[ent->index],
364 igb_driver_version);
365 device_set_desc_copy(dev, adapter_name);
366 return (BUS_PROBE_DEFAULT);
367 }
368 ent++;
369 }
370
371 return (ENXIO);
372 }
373
374 /*********************************************************************
375 * Device initialization routine
376 *
377 * The attach entry point is called when the driver is being loaded.
378 * This routine identifies the type of hardware, allocates all resources
379 * and initializes the hardware.
380 *
381 * return 0 on success, positive on failure
382 *********************************************************************/
383
384 static int
385 igb_attach(device_t dev)
386 {
387 struct adapter *adapter;
388 int error = 0;
389 u16 eeprom_data;
390
391 INIT_DEBUGOUT("igb_attach: begin");
392
393 adapter = device_get_softc(dev);
394 adapter->dev = adapter->osdep.dev = dev;
395 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
396
397 /* SYSCTL stuff */
398 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
399 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
400 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
401 igb_sysctl_debug_info, "I", "Debug Information");
402
403 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
404 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
405 OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
406 igb_sysctl_stats, "I", "Statistics");
407
408 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
409 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
410 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
411 &igb_fc_setting, 0, "Flow Control");
412
413 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
414 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415 OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
416 &igb_enable_aim, 1, "Interrupt Moderation");
417
418 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
419 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420 OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
421 &igb_low_latency, 1, "Low Latency");
422
423 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
424 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
425 OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
426 &igb_ave_latency, 1, "Average Latency");
427
428 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430 OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
431 &igb_bulk_latency, 1, "Bulk Latency");
432
433 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435 /* Determine hardware and mac info */
436 igb_identify_hardware(adapter);
437
438 /* Setup PCI resources */
439 if (igb_allocate_pci_resources(adapter)) {
440 device_printf(dev, "Allocation of PCI resources failed\n");
441 error = ENXIO;
442 goto err_pci;
443 }
444
445 /* Do Shared Code initialization */
446 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447 device_printf(dev, "Setup of Shared code failed\n");
448 error = ENXIO;
449 goto err_pci;
450 }
451
452 e1000_get_bus_info(&adapter->hw);
453
454 /* Sysctls for limiting the amount of work done in the taskqueue */
455 igb_add_rx_process_limit(adapter, "rx_processing_limit",
456 "max number of rx packets to process", &adapter->rx_process_limit,
457 igb_rx_process_limit);
458
459 /*
460 * Validate number of transmit and receive descriptors. It
461 * must not exceed hardware maximum, and must be multiple
462 * of E1000_DBA_ALIGN.
463 */
464 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467 IGB_DEFAULT_TXD, igb_txd);
468 adapter->num_tx_desc = IGB_DEFAULT_TXD;
469 } else
470 adapter->num_tx_desc = igb_txd;
471 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474 IGB_DEFAULT_RXD, igb_rxd);
475 adapter->num_rx_desc = IGB_DEFAULT_RXD;
476 } else
477 adapter->num_rx_desc = igb_rxd;
478
479 adapter->hw.mac.autoneg = DO_AUTO_NEG;
480 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483 /* Copper options */
484 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486 adapter->hw.phy.disable_polarity_correction = FALSE;
487 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488 }
489
490 /*
491 * Set the frame limits assuming
492 * standard ethernet sized frames.
493 */
494 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495 adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497 /*
498 ** Allocate and Setup Queues
499 */
500 if (igb_allocate_queues(adapter)) {
501 error = ENOMEM;
502 goto err_pci;
503 }
504
505 /*
506 ** Start from a known state, this is
507 ** important in reading the nvm and
508 ** mac from that.
509 */
510 e1000_reset_hw(&adapter->hw);
511
512 /* Make sure we have a good EEPROM before we read from it */
513 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514 /*
515 ** Some PCI-E parts fail the first check due to
516 ** the link being in sleep state, call it again,
517 ** if it fails a second time its a real issue.
518 */
519 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520 device_printf(dev,
521 "The EEPROM Checksum Is Not Valid\n");
522 error = EIO;
523 goto err_late;
524 }
525 }
526
527 /*
528 ** Copy the permanent MAC address out of the EEPROM
529 */
530 if (e1000_read_mac_addr(&adapter->hw) < 0) {
531 device_printf(dev, "EEPROM read error while reading MAC"
532 " address\n");
533 error = EIO;
534 goto err_late;
535 }
536 /* Check its sanity */
537 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538 device_printf(dev, "Invalid MAC address\n");
539 error = EIO;
540 goto err_late;
541 }
542
543 /* Now Initialize the hardware */
544 if (igb_hardware_init(adapter)) {
545 device_printf(dev, "Unable to initialize the hardware\n");
546 error = EIO;
547 goto err_late;
548 }
549
550 /*
551 ** Configure Interrupts
552 */
553 if (adapter->msix > 1) /* MSIX */
554 error = igb_allocate_msix(adapter);
555 else /* MSI or Legacy */
556 error = igb_allocate_legacy(adapter);
557 if (error)
558 goto err_late;
559
560 /* Setup OS specific network interface */
561 igb_setup_interface(dev, adapter);
562
563 #ifdef IGB_IEEE1588
564 /*
565 ** Setup the timer: IEEE 1588 support
566 */
567 adapter->cycles.read = igb_read_clock;
568 adapter->cycles.mask = (u64)-1;
569 adapter->cycles.mult = 1;
570 adapter->cycles.shift = IGB_TSYNC_SHIFT;
571 E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
572 IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
573 E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
574 E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
575
576 // JFV - this is not complete yet
577 #endif
578
579 /* Initialize statistics */
580 igb_update_stats_counters(adapter);
581
582 adapter->hw.mac.get_link_status = 1;
583 igb_update_link_status(adapter);
584
585 /* Indicate SOL/IDER usage */
586 if (e1000_check_reset_block(&adapter->hw))
587 device_printf(dev,
588 "PHY reset is blocked due to SOL/IDER session.\n");
589
590 /* Determine if we have to control management hardware */
591 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
592
593 /*
594 * Setup Wake-on-Lan
595 */
596 /* APME bit in EEPROM is mapped to WUC.APME */
597 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
598 if (eeprom_data)
599 adapter->wol = E1000_WUFC_MAG;
600
601 /* Register for VLAN events */
602 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
603 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
604 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
605 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
606
607 /* Tell the stack that the interface is not active */
608 adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
609
610 INIT_DEBUGOUT("igb_attach: end");
611
612 return (0);
613
614 err_late:
615 igb_free_transmit_structures(adapter);
616 igb_free_receive_structures(adapter);
617 igb_release_hw_control(adapter);
618 err_pci:
619 igb_free_pci_resources(adapter);
620 IGB_CORE_LOCK_DESTROY(adapter);
621
622 return (error);
623 }
624
625 /*********************************************************************
626 * Device removal routine
627 *
628 * The detach entry point is called when the driver is being removed.
629 * This routine stops the adapter and deallocates all the resources
630 * that were allocated for driver operation.
631 *
632 * return 0 on success, positive on failure
633 *********************************************************************/
634
635 static int
636 igb_detach(device_t dev)
637 {
638 struct adapter *adapter = device_get_softc(dev);
639 struct ifnet *ifp = adapter->ifp;
640
641 INIT_DEBUGOUT("igb_detach: begin");
642
643 /* Make sure VLANS are not using driver */
644 if (adapter->ifp->if_vlantrunk != NULL) {
645 device_printf(dev,"Vlan in use, detach first\n");
646 return (EBUSY);
647 }
648
649 IGB_CORE_LOCK(adapter);
650 adapter->in_detach = 1;
651 igb_stop(adapter);
652 IGB_CORE_UNLOCK(adapter);
653
654 e1000_phy_hw_reset(&adapter->hw);
655
656 /* Give control back to firmware */
657 igb_release_manageability(adapter);
658 igb_release_hw_control(adapter);
659
660 if (adapter->wol) {
661 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
662 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
663 igb_enable_wakeup(dev);
664 }
665
666 /* Unregister VLAN events */
667 if (adapter->vlan_attach != NULL)
668 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
669 if (adapter->vlan_detach != NULL)
670 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
671
672 ether_ifdetach(adapter->ifp);
673
674 callout_drain(&adapter->timer);
675
676 igb_free_pci_resources(adapter);
677 bus_generic_detach(dev);
678 if_free(ifp);
679
680 igb_free_transmit_structures(adapter);
681 igb_free_receive_structures(adapter);
682
683 IGB_CORE_LOCK_DESTROY(adapter);
684
685 return (0);
686 }
687
688 /*********************************************************************
689 *
690 * Shutdown entry point
691 *
692 **********************************************************************/
693
694 static int
695 igb_shutdown(device_t dev)
696 {
697 return igb_suspend(dev);
698 }
699
700 /*
701 * Suspend/resume device methods.
702 */
703 static int
704 igb_suspend(device_t dev)
705 {
706 struct adapter *adapter = device_get_softc(dev);
707
708 IGB_CORE_LOCK(adapter);
709
710 igb_stop(adapter);
711
712 igb_release_manageability(adapter);
713 igb_release_hw_control(adapter);
714
715 if (adapter->wol) {
716 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
717 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
718 igb_enable_wakeup(dev);
719 }
720
721 IGB_CORE_UNLOCK(adapter);
722
723 return bus_generic_suspend(dev);
724 }
725
726 static int
727 igb_resume(device_t dev)
728 {
729 struct adapter *adapter = device_get_softc(dev);
730 struct ifnet *ifp = adapter->ifp;
731
732 IGB_CORE_LOCK(adapter);
733 igb_init_locked(adapter);
734 igb_init_manageability(adapter);
735
736 if ((ifp->if_flags & IFF_UP) &&
737 (ifp->if_drv_flags & IFF_DRV_RUNNING))
738 igb_start(ifp);
739
740 IGB_CORE_UNLOCK(adapter);
741
742 return bus_generic_resume(dev);
743 }
744
745
746 /*********************************************************************
747 * Transmit entry point
748 *
749 * igb_start is called by the stack to initiate a transmit.
750 * The driver will remain in this routine as long as there are
751 * packets to transmit and transmit resources are available.
752 * In case resources are not available stack is notified and
753 * the packet is requeued.
754 **********************************************************************/
755
756 static void
757 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
758 {
759 struct adapter *adapter = ifp->if_softc;
760 struct mbuf *m_head;
761
762 IGB_TX_LOCK_ASSERT(txr);
763
764 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
765 IFF_DRV_RUNNING)
766 return;
767 if (!adapter->link_active)
768 return;
769
770 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
771
772 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
773 if (m_head == NULL)
774 break;
775 /*
776 * Encapsulation can modify our pointer, and or make it
777 * NULL on failure. In that event, we can't requeue.
778 */
779 if (igb_xmit(txr, &m_head)) {
780 if (m_head == NULL)
781 break;
782 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
783 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
784 break;
785 }
786
787 /* Send a copy of the frame to the BPF listener */
788 ETHER_BPF_MTAP(ifp, m_head);
789
790 /* Set timeout in case hardware has problems transmitting. */
791 txr->watchdog_timer = IGB_TX_TIMEOUT;
792 }
793 }
794
795 /*
796 * Legacy TX driver routine, called from the
797 * stack, always uses tx[0], and spins for it.
798 * Should not be used with multiqueue tx
799 */
800 static void
801 igb_start(struct ifnet *ifp)
802 {
803 struct adapter *adapter = ifp->if_softc;
804 struct tx_ring *txr = adapter->tx_rings;
805
806 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
807 IGB_TX_LOCK(txr);
808 igb_start_locked(txr, ifp);
809 IGB_TX_UNLOCK(txr);
810 }
811 return;
812 }
813
814 #if __FreeBSD_version >= 800000
815 /*
816 ** Multiqueue Transmit driver
817 **
818 */
819 static int
820 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
821 {
822 struct adapter *adapter = ifp->if_softc;
823 struct tx_ring *txr;
824 int i = 0, err = 0;
825
826 /* Which queue to use */
827 if ((m->m_flags & M_FLOWID) != 0)
828 i = m->m_pkthdr.flowid % adapter->num_queues;
829 txr = &adapter->tx_rings[i];
830
831 if (IGB_TX_TRYLOCK(txr)) {
832 err = igb_mq_start_locked(ifp, txr, m);
833 IGB_TX_UNLOCK(txr);
834 } else
835 err = drbr_enqueue(ifp, txr->br, m);
836
837 return (err);
838 }
839
840 static int
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
842 {
843 struct adapter *adapter = txr->adapter;
844 struct mbuf *next;
845 int err = 0;
846
847 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
848 err = drbr_enqueue(ifp, txr->br, m);
849 return (err);
850 }
851
852 if (m == NULL) /* Called by tasklet */
853 goto process;
854
855 /* If nothing queued go right to xmit */
856 if (drbr_empty(ifp, txr->br)) {
857 if ((err = igb_xmit(txr, &m)) != 0) {
858 if (m != NULL)
859 err = drbr_enqueue(ifp, txr->br, m);
860 return (err);
861 } else {
862 /* Success, update stats */
863 drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
864 /* Send a copy of the frame to the BPF listener */
865 ETHER_BPF_MTAP(ifp, m);
866 /* Set the watchdog */
867 txr->watchdog_timer = IGB_TX_TIMEOUT;
868 }
869
870 } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
871 return (err);
872
873 process:
874 if (drbr_empty(ifp, txr->br))
875 return (err);
876
877 /* Process the queue */
878 while (TRUE) {
879 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
880 break;
881 next = drbr_dequeue(ifp, txr->br);
882 if (next == NULL)
883 break;
884 if ((err = igb_xmit(txr, &next)) != 0) {
885 if (next != NULL)
886 err = drbr_enqueue(ifp, txr->br, next);
887 break;
888 }
889 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
890 ETHER_BPF_MTAP(ifp, next);
891 /* Set the watchdog */
892 txr->watchdog_timer = IGB_TX_TIMEOUT;
893 }
894
895 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
896 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
897
898 return (err);
899 }
900
901 /*
902 ** Flush all ring buffers
903 */
904 static void
905 igb_qflush(struct ifnet *ifp)
906 {
907 struct adapter *adapter = ifp->if_softc;
908 struct tx_ring *txr = adapter->tx_rings;
909 struct mbuf *m;
910
911 for (int i = 0; i < adapter->num_queues; i++, txr++) {
912 IGB_TX_LOCK(txr);
913 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
914 m_freem(m);
915 IGB_TX_UNLOCK(txr);
916 }
917 if_qflush(ifp);
918 }
919 #endif /* __FreeBSD_version >= 800000 */
920
921 /*********************************************************************
922 * Ioctl entry point
923 *
924 * igb_ioctl is called when the user wants to configure the
925 * interface.
926 *
927 * return 0 on success, positive on failure
928 **********************************************************************/
929
930 static int
931 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
932 {
933 struct adapter *adapter = ifp->if_softc;
934 struct ifreq *ifr = (struct ifreq *)data;
935 #ifdef INET
936 struct ifaddr *ifa = (struct ifaddr *)data;
937 #endif
938 int error = 0;
939
940 if (adapter->in_detach)
941 return (error);
942
943 switch (command) {
944 case SIOCSIFADDR:
945 #ifdef INET
946 if (ifa->ifa_addr->sa_family == AF_INET) {
947 /*
948 * XXX
949 * Since resetting hardware takes a very long time
950 * and results in link renegotiation we only
951 * initialize the hardware only when it is absolutely
952 * required.
953 */
954 ifp->if_flags |= IFF_UP;
955 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
956 IGB_CORE_LOCK(adapter);
957 igb_init_locked(adapter);
958 IGB_CORE_UNLOCK(adapter);
959 }
960 if (!(ifp->if_flags & IFF_NOARP))
961 arp_ifinit(ifp, ifa);
962 } else
963 #endif
964 error = ether_ioctl(ifp, command, data);
965 break;
966 case SIOCSIFMTU:
967 {
968 int max_frame_size;
969
970 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
971
972 IGB_CORE_LOCK(adapter);
973 max_frame_size = 9234;
974 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
975 ETHER_CRC_LEN) {
976 IGB_CORE_UNLOCK(adapter);
977 error = EINVAL;
978 break;
979 }
980
981 ifp->if_mtu = ifr->ifr_mtu;
982 adapter->max_frame_size =
983 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
984 igb_init_locked(adapter);
985 IGB_CORE_UNLOCK(adapter);
986 break;
987 }
988 case SIOCSIFFLAGS:
989 IOCTL_DEBUGOUT("ioctl rcv'd:\
990 SIOCSIFFLAGS (Set Interface Flags)");
991 IGB_CORE_LOCK(adapter);
992 if (ifp->if_flags & IFF_UP) {
993 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
994 if ((ifp->if_flags ^ adapter->if_flags) &
995 (IFF_PROMISC | IFF_ALLMULTI)) {
996 igb_disable_promisc(adapter);
997 igb_set_promisc(adapter);
998 }
999 } else
1000 igb_init_locked(adapter);
1001 } else
1002 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1003 igb_stop(adapter);
1004 adapter->if_flags = ifp->if_flags;
1005 IGB_CORE_UNLOCK(adapter);
1006 break;
1007 case SIOCADDMULTI:
1008 case SIOCDELMULTI:
1009 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1010 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1011 IGB_CORE_LOCK(adapter);
1012 igb_disable_intr(adapter);
1013 igb_set_multi(adapter);
1014 igb_enable_intr(adapter);
1015 IGB_CORE_UNLOCK(adapter);
1016 }
1017 break;
1018 case SIOCSIFMEDIA:
1019 /* Check SOL/IDER usage */
1020 IGB_CORE_LOCK(adapter);
1021 if (e1000_check_reset_block(&adapter->hw)) {
1022 IGB_CORE_UNLOCK(adapter);
1023 device_printf(adapter->dev, "Media change is"
1024 " blocked due to SOL/IDER session.\n");
1025 break;
1026 }
1027 IGB_CORE_UNLOCK(adapter);
1028 case SIOCGIFMEDIA:
1029 IOCTL_DEBUGOUT("ioctl rcv'd: \
1030 SIOCxIFMEDIA (Get/Set Interface Media)");
1031 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1032 break;
1033 case SIOCSIFCAP:
1034 {
1035 int mask, reinit;
1036
1037 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1038 reinit = 0;
1039 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1040 if (mask & IFCAP_HWCSUM) {
1041 ifp->if_capenable ^= IFCAP_HWCSUM;
1042 reinit = 1;
1043 }
1044 if (mask & IFCAP_TSO4) {
1045 ifp->if_capenable ^= IFCAP_TSO4;
1046 reinit = 1;
1047 }
1048 if (mask & IFCAP_VLAN_HWTAGGING) {
1049 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1050 reinit = 1;
1051 }
1052 if (mask & IFCAP_LRO) {
1053 ifp->if_capenable ^= IFCAP_LRO;
1054 reinit = 1;
1055 }
1056 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1057 igb_init(adapter);
1058 VLAN_CAPABILITIES(ifp);
1059 break;
1060 }
1061
1062 #ifdef IGB_IEEE1588
1063 /*
1064 ** IOCTL support for Precision Time (IEEE 1588) Support
1065 */
1066 case SIOCSHWTSTAMP:
1067 error = igb_hwtstamp_ioctl(adapter, ifp);
1068 break;
1069 #endif
1070
1071 default:
1072 error = ether_ioctl(ifp, command, data);
1073 break;
1074 }
1075
1076 return (error);
1077 }
1078
1079 /*********************************************************************
1080 * Watchdog timer:
1081 *
1082 * This routine is called from the local timer every second.
1083 * As long as transmit descriptors are being cleaned the value
1084 * is non-zero and we do nothing. Reaching 0 indicates a tx hang
1085 * and we then reset the device.
1086 *
1087 **********************************************************************/
1088
1089 static void
1090 igb_watchdog(struct adapter *adapter)
1091 {
1092 struct tx_ring *txr = adapter->tx_rings;
1093 bool tx_hang = FALSE;
1094
1095 IGB_CORE_LOCK_ASSERT(adapter);
1096
1097 /*
1098 ** The timer is set to 5 every time start() queues a packet.
1099 ** Then txeof keeps resetting it as long as it cleans at
1100 ** least one descriptor.
1101 ** Finally, anytime all descriptors are clean the timer is
1102 ** set to 0.
1103 **
1104 ** With TX Multiqueue we need to check every queue's timer,
1105 ** if any time out we do the reset.
1106 */
1107 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1108 IGB_TX_LOCK(txr);
1109 if (txr->watchdog_timer == 0 ||
1110 (--txr->watchdog_timer)) {
1111 IGB_TX_UNLOCK(txr);
1112 continue;
1113 } else {
1114 tx_hang = TRUE;
1115 IGB_TX_UNLOCK(txr);
1116 break;
1117 }
1118 }
1119 if (tx_hang == FALSE)
1120 return;
1121
1122 /* If we are in this routine because of pause frames, then
1123 * don't reset the hardware.
1124 */
1125 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1126 E1000_STATUS_TXOFF) {
1127 txr = adapter->tx_rings; /* reset pointer */
1128 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1129 IGB_TX_LOCK(txr);
1130 txr->watchdog_timer = IGB_TX_TIMEOUT;
1131 IGB_TX_UNLOCK(txr);
1132 }
1133 return;
1134 }
1135
1136 if (e1000_check_for_link(&adapter->hw) == 0)
1137 device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1138
1139 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1140 device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1141 i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1142 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1143 device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1144 " Next Desc to Clean = %d\n", i, txr->tx_avail,
1145 txr->next_to_clean);
1146 }
1147
1148 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1149 adapter->watchdog_events++;
1150
1151 igb_init_locked(adapter);
1152 }
1153
1154 /*********************************************************************
1155 * Init entry point
1156 *
1157 * This routine is used in two ways. It is used by the stack as
1158 * init entry point in network interface structure. It is also used
1159 * by the driver as a hw/sw initialization routine to get to a
1160 * consistent state.
1161 *
1162 * return 0 on success, positive on failure
1163 **********************************************************************/
1164
1165 static void
1166 igb_init_locked(struct adapter *adapter)
1167 {
1168 struct rx_ring *rxr = adapter->rx_rings;
1169 struct tx_ring *txr = adapter->tx_rings;
1170 struct ifnet *ifp = adapter->ifp;
1171 device_t dev = adapter->dev;
1172 u32 pba = 0;
1173
1174 INIT_DEBUGOUT("igb_init: begin");
1175
1176 IGB_CORE_LOCK_ASSERT(adapter);
1177
1178 igb_stop(adapter);
1179
1180 /*
1181 * Packet Buffer Allocation (PBA)
1182 * Writing PBA sets the receive portion of the buffer
1183 * the remainder is used for the transmit buffer.
1184 */
1185 if (adapter->hw.mac.type == e1000_82575) {
1186 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1187 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1188 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1189 }
1190
1191 /* Get the latest mac address, User can use a LAA */
1192 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1193 ETHER_ADDR_LEN);
1194
1195 /* Put the address into the Receive Address Array */
1196 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1197
1198 /* Initialize the hardware */
1199 if (igb_hardware_init(adapter)) {
1200 device_printf(dev, "Unable to initialize the hardware\n");
1201 return;
1202 }
1203 igb_update_link_status(adapter);
1204
1205 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1206
1207 /* Set hardware offload abilities */
1208 ifp->if_hwassist = 0;
1209 if (ifp->if_capenable & IFCAP_TXCSUM) {
1210 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1211 #if __FreeBSD_version >= 800000
1212 if (adapter->hw.mac.type == e1000_82576)
1213 ifp->if_hwassist |= CSUM_SCTP;
1214 #endif
1215 }
1216
1217 if (ifp->if_capenable & IFCAP_TSO4)
1218 ifp->if_hwassist |= CSUM_TSO;
1219
1220 /* Configure for OS presence */
1221 igb_init_manageability(adapter);
1222
1223 /* Prepare transmit descriptors and buffers */
1224 igb_setup_transmit_structures(adapter);
1225 igb_initialize_transmit_units(adapter);
1226
1227 /* Setup Multicast table */
1228 igb_set_multi(adapter);
1229
1230 /*
1231 ** Figure out the desired mbuf pool
1232 ** for doing jumbo/packetsplit
1233 */
1234 if (ifp->if_mtu > ETHERMTU)
1235 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1236 else
1237 adapter->rx_mbuf_sz = MCLBYTES;
1238
1239 /* Prepare receive descriptors and buffers */
1240 if (igb_setup_receive_structures(adapter)) {
1241 device_printf(dev, "Could not setup receive structures\n");
1242 igb_stop(adapter);
1243 return;
1244 }
1245 igb_initialize_receive_units(adapter);
1246
1247 /* Don't lose promiscuous settings */
1248 igb_set_promisc(adapter);
1249
1250 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1251 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1252
1253 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1254 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1255
1256 if (adapter->msix > 1) /* Set up queue routing */
1257 igb_configure_queues(adapter);
1258
1259 /* Set up VLAN tag offload and filter */
1260 igb_setup_vlan_hw_support(adapter);
1261
1262 /* Set default RX interrupt moderation */
1263 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1264 E1000_WRITE_REG(&adapter->hw,
1265 E1000_EITR(rxr->msix), igb_ave_latency);
1266 rxr->eitr_setting = igb_ave_latency;
1267 }
1268
1269 /* Set TX interrupt rate & reset TX watchdog */
1270 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1271 E1000_WRITE_REG(&adapter->hw,
1272 E1000_EITR(txr->msix), igb_ave_latency);
1273 txr->watchdog_timer = FALSE;
1274 }
1275
1276 {
1277 /* this clears any pending interrupts */
1278 E1000_READ_REG(&adapter->hw, E1000_ICR);
1279 igb_enable_intr(adapter);
1280 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1281 }
1282
1283 /* Don't reset the phy next time init gets called */
1284 adapter->hw.phy.reset_disable = TRUE;
1285 }
1286
1287 static void
1288 igb_init(void *arg)
1289 {
1290 struct adapter *adapter = arg;
1291
1292 IGB_CORE_LOCK(adapter);
1293 igb_init_locked(adapter);
1294 IGB_CORE_UNLOCK(adapter);
1295 }
1296
1297
1298 static void
1299 igb_handle_rxtx(void *context, int pending)
1300 {
1301 struct adapter *adapter = context;
1302 struct tx_ring *txr = adapter->tx_rings;
1303 struct rx_ring *rxr = adapter->rx_rings;
1304 struct ifnet *ifp;
1305
1306 ifp = adapter->ifp;
1307
1308 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1309 if (igb_rxeof(rxr, adapter->rx_process_limit))
1310 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1311 IGB_TX_LOCK(txr);
1312 igb_txeof(txr);
1313
1314 #if __FreeBSD_version >= 800000
1315 if (!drbr_empty(ifp, txr->br))
1316 igb_mq_start_locked(ifp, txr, NULL);
1317 #else
1318 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1319 igb_start_locked(txr, ifp);
1320 #endif
1321 IGB_TX_UNLOCK(txr);
1322 }
1323
1324 igb_enable_intr(adapter);
1325 }
1326
1327 static void
1328 igb_handle_rx(void *context, int pending)
1329 {
1330 struct rx_ring *rxr = context;
1331 struct adapter *adapter = rxr->adapter;
1332 struct ifnet *ifp = adapter->ifp;
1333
1334 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1335 if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1336 /* More to clean, schedule another task */
1337 taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1338
1339 }
1340
1341 static void
1342 igb_handle_tx(void *context, int pending)
1343 {
1344 struct tx_ring *txr = context;
1345 struct adapter *adapter = txr->adapter;
1346 struct ifnet *ifp = adapter->ifp;
1347
1348 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1349 IGB_TX_LOCK(txr);
1350 igb_txeof(txr);
1351 #if __FreeBSD_version >= 800000
1352 if (!drbr_empty(ifp, txr->br))
1353 igb_mq_start_locked(ifp, txr, NULL);
1354 #else
1355 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1356 igb_start_locked(txr, ifp);
1357 #endif
1358 IGB_TX_UNLOCK(txr);
1359 }
1360 }
1361
1362
1363 /*********************************************************************
1364 *
1365 * MSI/Legacy Deferred
1366 * Interrupt Service routine
1367 *
1368 *********************************************************************/
1369 static int
1370 igb_irq_fast(void *arg)
1371 {
1372 struct adapter *adapter = arg;
1373 uint32_t reg_icr;
1374
1375
1376 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1377
1378 /* Hot eject? */
1379 if (reg_icr == 0xffffffff)
1380 return FILTER_STRAY;
1381
1382 /* Definitely not our interrupt. */
1383 if (reg_icr == 0x0)
1384 return FILTER_STRAY;
1385
1386 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1387 return FILTER_STRAY;
1388
1389 /*
1390 * Mask interrupts until the taskqueue is finished running. This is
1391 * cheap, just assume that it is needed. This also works around the
1392 * MSI message reordering errata on certain systems.
1393 */
1394 igb_disable_intr(adapter);
1395 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1396
1397 /* Link status change */
1398 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1399 adapter->hw.mac.get_link_status = 1;
1400 igb_update_link_status(adapter);
1401 }
1402
1403 if (reg_icr & E1000_ICR_RXO)
1404 adapter->rx_overruns++;
1405 return FILTER_HANDLED;
1406 }
1407
1408
1409 /*********************************************************************
1410 *
1411 * MSIX TX Interrupt Service routine
1412 *
1413 **********************************************************************/
1414 static void
1415 igb_msix_tx(void *arg)
1416 {
1417 struct tx_ring *txr = arg;
1418 struct adapter *adapter = txr->adapter;
1419 u32 loop = IGB_MAX_LOOP;
1420 bool more;
1421
1422 ++txr->tx_irq;
1423 IGB_TX_LOCK(txr);
1424
1425 do {
1426 more = igb_txeof(txr);
1427 } while (loop-- && more);
1428
1429 IGB_TX_UNLOCK(txr);
1430
1431 /* Schedule a clean task */
1432 taskqueue_enqueue(adapter->tq, &txr->tx_task);
1433
1434 /* Reenable this interrupt */
1435 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1436 return;
1437 }
1438
1439 /*********************************************************************
1440 *
1441 * MSIX RX Interrupt Service routine
1442 *
1443 **********************************************************************/
1444
1445 static void
1446 igb_msix_rx(void *arg)
1447 {
1448 struct rx_ring *rxr = arg;
1449 struct adapter *adapter = rxr->adapter;
1450 u32 loop = IGB_MAX_LOOP;
1451 bool more;
1452
1453 ++rxr->rx_irq;
1454 do {
1455 more = igb_rxeof(rxr, adapter->rx_process_limit);
1456 } while (loop-- && more);
1457
1458 /* Update interrupt rate */
1459 if (igb_enable_aim == TRUE)
1460 igb_update_aim(rxr);
1461
1462 /* Schedule another clean */
1463 taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1464
1465 /* Reenable this interrupt */
1466 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1467 return;
1468 }
1469
1470
1471 /*********************************************************************
1472 *
1473 * MSIX Link Interrupt Service routine
1474 *
1475 **********************************************************************/
1476
1477 static void
1478 igb_msix_link(void *arg)
1479 {
1480 struct adapter *adapter = arg;
1481 u32 icr;
1482
1483 ++adapter->link_irq;
1484 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485 if (!(icr & E1000_ICR_LSC))
1486 goto spurious;
1487 adapter->hw.mac.get_link_status = 1;
1488 igb_update_link_status(adapter);
1489
1490 spurious:
1491 /* Rearm */
1492 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1493 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1494 return;
1495 }
1496
1497
1498 /*
1499 ** Routine to adjust the RX EITR value based on traffic,
1500 ** its a simple three state model, but seems to help.
1501 **
1502 ** Note that the three EITR values are tuneable using
1503 ** sysctl in real time. The feature can be effectively
1504 ** nullified by setting them equal.
1505 */
1506 #define BULK_THRESHOLD 10000
1507 #define AVE_THRESHOLD 1600
1508
1509 static void
1510 igb_update_aim(struct rx_ring *rxr)
1511 {
1512 struct adapter *adapter = rxr->adapter;
1513 u32 olditr, newitr;
1514
1515 /* Update interrupt moderation based on traffic */
1516 olditr = rxr->eitr_setting;
1517 newitr = olditr;
1518
1519 /* Idle, don't change setting */
1520 if (rxr->bytes == 0)
1521 return;
1522
1523 if (olditr == igb_low_latency) {
1524 if (rxr->bytes > AVE_THRESHOLD)
1525 newitr = igb_ave_latency;
1526 } else if (olditr == igb_ave_latency) {
1527 if (rxr->bytes < AVE_THRESHOLD)
1528 newitr = igb_low_latency;
1529 else if (rxr->bytes > BULK_THRESHOLD)
1530 newitr = igb_bulk_latency;
1531 } else if (olditr == igb_bulk_latency) {
1532 if (rxr->bytes < BULK_THRESHOLD)
1533 newitr = igb_ave_latency;
1534 }
1535
1536 if (olditr != newitr) {
1537 /* Change interrupt rate */
1538 rxr->eitr_setting = newitr;
1539 if (adapter->hw.mac.type == e1000_82575)
1540 newitr |= newitr << 16;
1541 else
1542 newitr |= 0x8000000;
1543 E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me), newitr);
1544 }
1545
1546 rxr->bytes = 0;
1547 return;
1548 }
1549
1550
1551 /*********************************************************************
1552 *
1553 * Media Ioctl callback
1554 *
1555 * This routine is called whenever the user queries the status of
1556 * the interface using ifconfig.
1557 *
1558 **********************************************************************/
1559 static void
1560 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1561 {
1562 struct adapter *adapter = ifp->if_softc;
1563 u_char fiber_type = IFM_1000_SX;
1564
1565 INIT_DEBUGOUT("igb_media_status: begin");
1566
1567 IGB_CORE_LOCK(adapter);
1568 igb_update_link_status(adapter);
1569
1570 ifmr->ifm_status = IFM_AVALID;
1571 ifmr->ifm_active = IFM_ETHER;
1572
1573 if (!adapter->link_active) {
1574 IGB_CORE_UNLOCK(adapter);
1575 return;
1576 }
1577
1578 ifmr->ifm_status |= IFM_ACTIVE;
1579
1580 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1581 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1582 ifmr->ifm_active |= fiber_type | IFM_FDX;
1583 else {
1584 switch (adapter->link_speed) {
1585 case 10:
1586 ifmr->ifm_active |= IFM_10_T;
1587 break;
1588 case 100:
1589 ifmr->ifm_active |= IFM_100_TX;
1590 break;
1591 case 1000:
1592 ifmr->ifm_active |= IFM_1000_T;
1593 break;
1594 }
1595 if (adapter->link_duplex == FULL_DUPLEX)
1596 ifmr->ifm_active |= IFM_FDX;
1597 else
1598 ifmr->ifm_active |= IFM_HDX;
1599 }
1600 IGB_CORE_UNLOCK(adapter);
1601 }
1602
1603 /*********************************************************************
1604 *
1605 * Media Ioctl callback
1606 *
1607 * This routine is called when the user changes speed/duplex using
1608 * media/mediopt option with ifconfig.
1609 *
1610 **********************************************************************/
1611 static int
1612 igb_media_change(struct ifnet *ifp)
1613 {
1614 struct adapter *adapter = ifp->if_softc;
1615 struct ifmedia *ifm = &adapter->media;
1616
1617 INIT_DEBUGOUT("igb_media_change: begin");
1618
1619 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1620 return (EINVAL);
1621
1622 IGB_CORE_LOCK(adapter);
1623 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1624 case IFM_AUTO:
1625 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1626 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1627 break;
1628 case IFM_1000_LX:
1629 case IFM_1000_SX:
1630 case IFM_1000_T:
1631 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1632 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1633 break;
1634 case IFM_100_TX:
1635 adapter->hw.mac.autoneg = FALSE;
1636 adapter->hw.phy.autoneg_advertised = 0;
1637 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1638 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1639 else
1640 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1641 break;
1642 case IFM_10_T:
1643 adapter->hw.mac.autoneg = FALSE;
1644 adapter->hw.phy.autoneg_advertised = 0;
1645 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1646 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1647 else
1648 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1649 break;
1650 default:
1651 device_printf(adapter->dev, "Unsupported media type\n");
1652 }
1653
1654 /* As the speed/duplex settings my have changed we need to
1655 * reset the PHY.
1656 */
1657 adapter->hw.phy.reset_disable = FALSE;
1658
1659 igb_init_locked(adapter);
1660 IGB_CORE_UNLOCK(adapter);
1661
1662 return (0);
1663 }
1664
1665
1666 /*********************************************************************
1667 *
1668 * This routine maps the mbufs to Advanced TX descriptors.
1669 * used by the 82575 adapter.
1670 *
1671 **********************************************************************/
1672
1673 static int
1674 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1675 {
1676 struct adapter *adapter = txr->adapter;
1677 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1678 bus_dmamap_t map;
1679 struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
1680 union e1000_adv_tx_desc *txd = NULL;
1681 struct mbuf *m_head;
1682 u32 olinfo_status = 0, cmd_type_len = 0;
1683 int nsegs, i, j, error, first, last = 0;
1684 u32 hdrlen = 0;
1685
1686 m_head = *m_headp;
1687
1688
1689 /* Set basic descriptor constants */
1690 cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1691 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1692 if (m_head->m_flags & M_VLANTAG)
1693 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1694
1695 /*
1696 * Force a cleanup if number of TX descriptors
1697 * available hits the threshold
1698 */
1699 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1700 igb_txeof(txr);
1701 /* Now do we at least have a minimal? */
1702 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1703 txr->no_desc_avail++;
1704 return (ENOBUFS);
1705 }
1706 }
1707
1708 /*
1709 * Map the packet for DMA.
1710 *
1711 * Capture the first descriptor index,
1712 * this descriptor will have the index
1713 * of the EOP which is the only one that
1714 * now gets a DONE bit writeback.
1715 */
1716 first = txr->next_avail_desc;
1717 tx_buffer = &txr->tx_buffers[first];
1718 tx_buffer_mapped = tx_buffer;
1719 map = tx_buffer->map;
1720
1721 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1722 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1723
1724 if (error == EFBIG) {
1725 struct mbuf *m;
1726
1727 m = m_defrag(*m_headp, M_DONTWAIT);
1728 if (m == NULL) {
1729 adapter->mbuf_defrag_failed++;
1730 m_freem(*m_headp);
1731 *m_headp = NULL;
1732 return (ENOBUFS);
1733 }
1734 *m_headp = m;
1735
1736 /* Try it again */
1737 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1738 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1739
1740 if (error == ENOMEM) {
1741 adapter->no_tx_dma_setup++;
1742 return (error);
1743 } else if (error != 0) {
1744 adapter->no_tx_dma_setup++;
1745 m_freem(*m_headp);
1746 *m_headp = NULL;
1747 return (error);
1748 }
1749 } else if (error == ENOMEM) {
1750 adapter->no_tx_dma_setup++;
1751 return (error);
1752 } else if (error != 0) {
1753 adapter->no_tx_dma_setup++;
1754 m_freem(*m_headp);
1755 *m_headp = NULL;
1756 return (error);
1757 }
1758
1759 /* Check again to be sure we have enough descriptors */
1760 if (nsegs > (txr->tx_avail - 2)) {
1761 txr->no_desc_avail++;
1762 bus_dmamap_unload(txr->txtag, map);
1763 return (ENOBUFS);
1764 }
1765 m_head = *m_headp;
1766
1767 /*
1768 * Set up the context descriptor:
1769 * used when any hardware offload is done.
1770 * This includes CSUM, VLAN, and TSO. It
1771 * will use the first descriptor.
1772 */
1773 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1774 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1775 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1776 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1777 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1778 } else
1779 return (ENXIO);
1780 } else if (igb_tx_ctx_setup(txr, m_head))
1781 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1782
1783 #ifdef IGB_IEEE1588
1784 /* This is changing soon to an mtag detection */
1785 if (we detect this mbuf has a TSTAMP mtag)
1786 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1787 #endif
1788 /* Calculate payload length */
1789 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1790 << E1000_ADVTXD_PAYLEN_SHIFT);
1791
1792 /* Set up our transmit descriptors */
1793 i = txr->next_avail_desc;
1794 for (j = 0; j < nsegs; j++) {
1795 bus_size_t seg_len;
1796 bus_addr_t seg_addr;
1797
1798 tx_buffer = &txr->tx_buffers[i];
1799 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1800 seg_addr = segs[j].ds_addr;
1801 seg_len = segs[j].ds_len;
1802
1803 txd->read.buffer_addr = htole64(seg_addr);
1804 txd->read.cmd_type_len = htole32(
1805 adapter->txd_cmd | cmd_type_len | seg_len);
1806 txd->read.olinfo_status = htole32(olinfo_status);
1807 last = i;
1808 if (++i == adapter->num_tx_desc)
1809 i = 0;
1810 tx_buffer->m_head = NULL;
1811 tx_buffer->next_eop = -1;
1812 }
1813
1814 txr->next_avail_desc = i;
1815 txr->tx_avail -= nsegs;
1816
1817 tx_buffer->m_head = m_head;
1818 tx_buffer_mapped->map = tx_buffer->map;
1819 tx_buffer->map = map;
1820 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1821
1822 /*
1823 * Last Descriptor of Packet
1824 * needs End Of Packet (EOP)
1825 * and Report Status (RS)
1826 */
1827 txd->read.cmd_type_len |=
1828 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1829 /*
1830 * Keep track in the first buffer which
1831 * descriptor will be written back
1832 */
1833 tx_buffer = &txr->tx_buffers[first];
1834 tx_buffer->next_eop = last;
1835
1836 /*
1837 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1838 * that this frame is available to transmit.
1839 */
1840 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1841 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1842 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1843 ++txr->tx_packets;
1844
1845 return (0);
1846
1847 }
1848
1849 static void
1850 igb_set_promisc(struct adapter *adapter)
1851 {
1852 struct ifnet *ifp = adapter->ifp;
1853 uint32_t reg_rctl;
1854
1855 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1856
1857 if (ifp->if_flags & IFF_PROMISC) {
1858 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1859 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1860 } else if (ifp->if_flags & IFF_ALLMULTI) {
1861 reg_rctl |= E1000_RCTL_MPE;
1862 reg_rctl &= ~E1000_RCTL_UPE;
1863 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1864 }
1865 }
1866
1867 static void
1868 igb_disable_promisc(struct adapter *adapter)
1869 {
1870 uint32_t reg_rctl;
1871
1872 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1873
1874 reg_rctl &= (~E1000_RCTL_UPE);
1875 reg_rctl &= (~E1000_RCTL_MPE);
1876 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1877 }
1878
1879
1880 /*********************************************************************
1881 * Multicast Update
1882 *
1883 * This routine is called whenever multicast address list is updated.
1884 *
1885 **********************************************************************/
1886
1887 static void
1888 igb_set_multi(struct adapter *adapter)
1889 {
1890 struct ifnet *ifp = adapter->ifp;
1891 struct ifmultiaddr *ifma;
1892 u32 reg_rctl = 0;
1893 u8 mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1894
1895 int mcnt = 0;
1896
1897 IOCTL_DEBUGOUT("igb_set_multi: begin");
1898
1899 if_maddr_rlock(ifp);
1900 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1901 if (ifma->ifma_addr->sa_family != AF_LINK)
1902 continue;
1903
1904 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1905 break;
1906
1907 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1908 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1909 mcnt++;
1910 }
1911 if_maddr_runlock(ifp);
1912
1913 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1914 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1915 reg_rctl |= E1000_RCTL_MPE;
1916 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1917 } else
1918 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1919 }
1920
1921
1922 /*********************************************************************
1923 * Timer routine
1924 *
1925 * This routine checks for link status and updates statistics.
1926 *
1927 **********************************************************************/
1928
1929 static void
1930 igb_local_timer(void *arg)
1931 {
1932 struct adapter *adapter = arg;
1933 struct ifnet *ifp = adapter->ifp;
1934
1935 IGB_CORE_LOCK_ASSERT(adapter);
1936
1937 igb_update_link_status(adapter);
1938 igb_update_stats_counters(adapter);
1939
1940 if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1941 igb_print_hw_stats(adapter);
1942
1943 /*
1944 * Each second we check the watchdog to
1945 * protect against hardware hangs.
1946 */
1947 igb_watchdog(adapter);
1948
1949 /* Trigger an RX interrupt on all queues */
1950 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1951
1952 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1953
1954 }
1955
1956 static void
1957 igb_update_link_status(struct adapter *adapter)
1958 {
1959 struct e1000_hw *hw = &adapter->hw;
1960 struct ifnet *ifp = adapter->ifp;
1961 device_t dev = adapter->dev;
1962 struct tx_ring *txr = adapter->tx_rings;
1963 u32 link_check = 0;
1964
1965 /* Get the cached link value or read for real */
1966 switch (hw->phy.media_type) {
1967 case e1000_media_type_copper:
1968 if (hw->mac.get_link_status) {
1969 /* Do the work to read phy */
1970 e1000_check_for_link(hw);
1971 link_check = !hw->mac.get_link_status;
1972 } else
1973 link_check = TRUE;
1974 break;
1975 case e1000_media_type_fiber:
1976 e1000_check_for_link(hw);
1977 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1978 E1000_STATUS_LU);
1979 break;
1980 case e1000_media_type_internal_serdes:
1981 e1000_check_for_link(hw);
1982 link_check = adapter->hw.mac.serdes_has_link;
1983 break;
1984 default:
1985 case e1000_media_type_unknown:
1986 break;
1987 }
1988
1989 /* Now we check if a transition has happened */
1990 if (link_check && (adapter->link_active == 0)) {
1991 e1000_get_speed_and_duplex(&adapter->hw,
1992 &adapter->link_speed, &adapter->link_duplex);
1993 if (bootverbose)
1994 device_printf(dev, "Link is up %d Mbps %s\n",
1995 adapter->link_speed,
1996 ((adapter->link_duplex == FULL_DUPLEX) ?
1997 "Full Duplex" : "Half Duplex"));
1998 adapter->link_active = 1;
1999 ifp->if_baudrate = adapter->link_speed * 1000000;
2000 if_link_state_change(ifp, LINK_STATE_UP);
2001 } else if (!link_check && (adapter->link_active == 1)) {
2002 ifp->if_baudrate = adapter->link_speed = 0;
2003 adapter->link_duplex = 0;
2004 if (bootverbose)
2005 device_printf(dev, "Link is Down\n");
2006 adapter->link_active = 0;
2007 if_link_state_change(ifp, LINK_STATE_DOWN);
2008 /* Turn off watchdogs */
2009 for (int i = 0; i < adapter->num_queues; i++, txr++)
2010 txr->watchdog_timer = FALSE;
2011 }
2012 }
2013
2014 /*********************************************************************
2015 *
2016 * This routine disables all traffic on the adapter by issuing a
2017 * global reset on the MAC and deallocates TX/RX buffers.
2018 *
2019 **********************************************************************/
2020
2021 static void
2022 igb_stop(void *arg)
2023 {
2024 struct adapter *adapter = arg;
2025 struct ifnet *ifp = adapter->ifp;
2026
2027 IGB_CORE_LOCK_ASSERT(adapter);
2028
2029 INIT_DEBUGOUT("igb_stop: begin");
2030
2031 igb_disable_intr(adapter);
2032
2033 callout_stop(&adapter->timer);
2034
2035 /* Tell the stack that the interface is no longer active */
2036 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2037
2038 e1000_reset_hw(&adapter->hw);
2039 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2040 }
2041
2042
2043 /*********************************************************************
2044 *
2045 * Determine hardware revision.
2046 *
2047 **********************************************************************/
2048 static void
2049 igb_identify_hardware(struct adapter *adapter)
2050 {
2051 device_t dev = adapter->dev;
2052
2053 /* Make sure our PCI config space has the necessary stuff set */
2054 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055 if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2056 (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2057 device_printf(dev, "Memory Access and/or Bus Master bits "
2058 "were not set!\n");
2059 adapter->hw.bus.pci_cmd_word |=
2060 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2061 pci_write_config(dev, PCIR_COMMAND,
2062 adapter->hw.bus.pci_cmd_word, 2);
2063 }
2064
2065 /* Save off the information about this board */
2066 adapter->hw.vendor_id = pci_get_vendor(dev);
2067 adapter->hw.device_id = pci_get_device(dev);
2068 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2069 adapter->hw.subsystem_vendor_id =
2070 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2071 adapter->hw.subsystem_device_id =
2072 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2073
2074 /* Do Shared Code Init and Setup */
2075 if (e1000_set_mac_type(&adapter->hw)) {
2076 device_printf(dev, "Setup init failure\n");
2077 return;
2078 }
2079 }
2080
2081 static int
2082 igb_allocate_pci_resources(struct adapter *adapter)
2083 {
2084 device_t dev = adapter->dev;
2085 int rid;
2086
2087 rid = PCIR_BAR(0);
2088 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2089 &rid, RF_ACTIVE);
2090 if (adapter->pci_mem == NULL) {
2091 device_printf(dev, "Unable to allocate bus resource: memory\n");
2092 return (ENXIO);
2093 }
2094 adapter->osdep.mem_bus_space_tag =
2095 rman_get_bustag(adapter->pci_mem);
2096 adapter->osdep.mem_bus_space_handle =
2097 rman_get_bushandle(adapter->pci_mem);
2098 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2099
2100 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2101
2102 /* This will setup either MSI/X or MSI */
2103 adapter->msix = igb_setup_msix(adapter);
2104 adapter->hw.back = &adapter->osdep;
2105
2106 return (0);
2107 }
2108
2109 /*********************************************************************
2110 *
2111 * Setup the Legacy or MSI Interrupt handler
2112 *
2113 **********************************************************************/
2114 static int
2115 igb_allocate_legacy(struct adapter *adapter)
2116 {
2117 device_t dev = adapter->dev;
2118 int error, rid = 0;
2119
2120 /* Turn off all interrupts */
2121 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2122
2123 /* MSI RID is 1 */
2124 if (adapter->msix == 1)
2125 rid = 1;
2126
2127 /* We allocate a single interrupt resource */
2128 adapter->res = bus_alloc_resource_any(dev,
2129 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2130 if (adapter->res == NULL) {
2131 device_printf(dev, "Unable to allocate bus resource: "
2132 "interrupt\n");
2133 return (ENXIO);
2134 }
2135
2136 /*
2137 * Try allocating a fast interrupt and the associated deferred
2138 * processing contexts.
2139 */
2140 TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2141 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2142 taskqueue_thread_enqueue, &adapter->tq);
2143 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2144 device_get_nameunit(adapter->dev));
2145 if ((error = bus_setup_intr(dev, adapter->res,
2146 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2147 adapter, &adapter->tag)) != 0) {
2148 device_printf(dev, "Failed to register fast interrupt "
2149 "handler: %d\n", error);
2150 taskqueue_free(adapter->tq);
2151 adapter->tq = NULL;
2152 return (error);
2153 }
2154
2155 return (0);
2156 }
2157
2158
2159 /*********************************************************************
2160 *
2161 * Setup the MSIX Interrupt handlers:
2162 *
2163 **********************************************************************/
2164 static int
2165 igb_allocate_msix(struct adapter *adapter)
2166 {
2167 device_t dev = adapter->dev;
2168 struct tx_ring *txr = adapter->tx_rings;
2169 struct rx_ring *rxr = adapter->rx_rings;
2170 int error, rid, vector = 0;
2171
2172 /*
2173 * Setup the interrupt handlers
2174 */
2175
2176 /* TX Setup */
2177 for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
2178 rid = vector +1;
2179 txr->res = bus_alloc_resource_any(dev,
2180 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2181 if (txr->res == NULL) {
2182 device_printf(dev,
2183 "Unable to allocate bus resource: "
2184 "MSIX TX Interrupt\n");
2185 return (ENXIO);
2186 }
2187 error = bus_setup_intr(dev, txr->res,
2188 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2189 igb_msix_tx, txr, &txr->tag);
2190 if (error) {
2191 txr->res = NULL;
2192 device_printf(dev, "Failed to register TX handler");
2193 return (error);
2194 }
2195 /* Make tasklet for deferred handling - one per queue */
2196 TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2197 txr->msix = vector;
2198 if (adapter->hw.mac.type == e1000_82575)
2199 txr->eims = E1000_EICR_TX_QUEUE0 << i;
2200 else
2201 txr->eims = 1 << vector;
2202 /*
2203 ** Bind the msix vector, and thus the
2204 ** ring to the corresponding cpu.
2205 */
2206 if (adapter->num_queues > 1)
2207 bus_bind_intr(dev, txr->res, i);
2208 }
2209
2210 /* RX Setup */
2211 for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
2212 rid = vector +1;
2213 rxr->res = bus_alloc_resource_any(dev,
2214 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2215 if (rxr->res == NULL) {
2216 device_printf(dev,
2217 "Unable to allocate bus resource: "
2218 "MSIX RX Interrupt\n");
2219 return (ENXIO);
2220 }
2221 error = bus_setup_intr(dev, rxr->res,
2222 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2223 igb_msix_rx, rxr, &rxr->tag);
2224 if (error) {
2225 rxr->res = NULL;
2226 device_printf(dev, "Failed to register RX handler");
2227 return (error);
2228 }
2229 /* Make tasklet for deferred handling - one per queue */
2230 TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2231 rxr->msix = vector;
2232 if (adapter->hw.mac.type == e1000_82575)
2233 rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2234 else
2235 rxr->eims = 1 << vector;
2236 /* Get a mask for local timer */
2237 adapter->rx_mask |= rxr->eims;
2238 /*
2239 ** Bind the msix vector, and thus the
2240 ** ring to the corresponding cpu.
2241 ** Notice that this makes an RX/TX pair
2242 ** bound to each CPU, limited by the MSIX
2243 ** vectors.
2244 */
2245 if (adapter->num_queues > 1)
2246 bus_bind_intr(dev, rxr->res, i);
2247 }
2248
2249 /* And Link */
2250 rid = vector +1;
2251 adapter->res = bus_alloc_resource_any(dev,
2252 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2253 if (adapter->res == NULL) {
2254 device_printf(dev,
2255 "Unable to allocate bus resource: "
2256 "MSIX Link Interrupt\n");
2257 return (ENXIO);
2258 }
2259 if ((error = bus_setup_intr(dev, adapter->res,
2260 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2261 igb_msix_link, adapter, &adapter->tag)) != 0) {
2262 device_printf(dev, "Failed to register Link handler");
2263 return (error);
2264 }
2265 adapter->linkvec = vector;
2266 adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2267 taskqueue_thread_enqueue, &adapter->tq);
2268 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2269 device_get_nameunit(adapter->dev));
2270
2271 return (0);
2272 }
2273
2274
2275 static void
2276 igb_configure_queues(struct adapter *adapter)
2277 {
2278 struct e1000_hw *hw = &adapter->hw;
2279 struct tx_ring *txr;
2280 struct rx_ring *rxr;
2281
2282 /* Turn on MSIX */
2283 /*
2284 ** 82576 uses IVARs to route MSI/X
2285 ** interrupts, its not very intuitive,
2286 ** study the code carefully :)
2287 */
2288 if (adapter->hw.mac.type == e1000_82576) {
2289 u32 ivar = 0;
2290 /* First turn on the capability */
2291 E1000_WRITE_REG(hw, E1000_GPIE,
2292 E1000_GPIE_MSIX_MODE |
2293 E1000_GPIE_EIAME |
2294 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2295 /* RX */
2296 for (int i = 0; i < adapter->num_queues; i++) {
2297 u32 index = i & 0x7; /* Each IVAR has two entries */
2298 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2299 rxr = &adapter->rx_rings[i];
2300 if (i < 8) {
2301 ivar &= 0xFFFFFF00;
2302 ivar |= rxr->msix | E1000_IVAR_VALID;
2303 } else {
2304 ivar &= 0xFF00FFFF;
2305 ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2306 }
2307 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2308 adapter->eims_mask |= rxr->eims;
2309 }
2310 /* TX */
2311 for (int i = 0; i < adapter->num_queues; i++) {
2312 u32 index = i & 0x7; /* Each IVAR has two entries */
2313 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2314 txr = &adapter->tx_rings[i];
2315 if (i < 8) {
2316 ivar &= 0xFFFF00FF;
2317 ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2318 } else {
2319 ivar &= 0x00FFFFFF;
2320 ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2321 }
2322 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2323 adapter->eims_mask |= txr->eims;
2324 }
2325
2326 /* And for the link interrupt */
2327 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2328 adapter->link_mask = 1 << adapter->linkvec;
2329 adapter->eims_mask |= adapter->link_mask;
2330 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2331 } else
2332 { /* 82575 */
2333 int tmp;
2334
2335 /* enable MSI-X PBA support*/
2336 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2337 tmp |= E1000_CTRL_EXT_PBA_CLR;
2338 /* Auto-Mask interrupts upon ICR read. */
2339 tmp |= E1000_CTRL_EXT_EIAME;
2340 tmp |= E1000_CTRL_EXT_IRCA;
2341 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2342
2343 /* TX */
2344 for (int i = 0; i < adapter->num_queues; i++) {
2345 txr = &adapter->tx_rings[i];
2346 E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2347 txr->eims);
2348 adapter->eims_mask |= txr->eims;
2349 }
2350
2351 /* RX */
2352 for (int i = 0; i < adapter->num_queues; i++) {
2353 rxr = &adapter->rx_rings[i];
2354 E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2355 rxr->eims);
2356 adapter->eims_mask |= rxr->eims;
2357 }
2358
2359 /* Link */
2360 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2361 E1000_EIMS_OTHER);
2362 adapter->link_mask |= E1000_EIMS_OTHER;
2363 adapter->eims_mask |= adapter->link_mask;
2364 }
2365 return;
2366 }
2367
2368
2369 static void
2370 igb_free_pci_resources(struct adapter *adapter)
2371 {
2372 struct tx_ring *txr = adapter->tx_rings;
2373 struct rx_ring *rxr = adapter->rx_rings;
2374 device_t dev = adapter->dev;
2375 int rid;
2376
2377 /*
2378 ** There is a slight possibility of a failure mode
2379 ** in attach that will result in entering this function
2380 ** before interrupt resources have been initialized, and
2381 ** in that case we do not want to execute the loops below
2382 ** We can detect this reliably by the state of the adapter
2383 ** res pointer.
2384 */
2385 if (adapter->res == NULL)
2386 goto mem;
2387
2388 /*
2389 * First release all the TX/RX interrupt resources:
2390 */
2391 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2392 rid = txr->msix + 1;
2393 if (txr->tag != NULL) {
2394 bus_teardown_intr(dev, txr->res, txr->tag);
2395 txr->tag = NULL;
2396 }
2397 if (txr->res != NULL)
2398 bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
2399 }
2400
2401 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2402 rid = rxr->msix + 1;
2403 if (rxr->tag != NULL) {
2404 bus_teardown_intr(dev, rxr->res, rxr->tag);
2405 rxr->tag = NULL;
2406 }
2407 if (rxr->res != NULL)
2408 bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
2409 }
2410
2411 /* Clean the Legacy or Link interrupt last */
2412 if (adapter->linkvec) /* we are doing MSIX */
2413 rid = adapter->linkvec + 1;
2414 else
2415 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2416
2417 if (adapter->tag != NULL) {
2418 bus_teardown_intr(dev, adapter->res, adapter->tag);
2419 adapter->tag = NULL;
2420 }
2421 if (adapter->res != NULL)
2422 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2423
2424 mem:
2425 if (adapter->msix)
2426 pci_release_msi(dev);
2427
2428 if (adapter->msix_mem != NULL)
2429 bus_release_resource(dev, SYS_RES_MEMORY,
2430 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2431
2432 if (adapter->pci_mem != NULL)
2433 bus_release_resource(dev, SYS_RES_MEMORY,
2434 PCIR_BAR(0), adapter->pci_mem);
2435
2436 }
2437
2438 /*
2439 * Setup Either MSI/X or MSI
2440 */
2441 static int
2442 igb_setup_msix(struct adapter *adapter)
2443 {
2444 device_t dev = adapter->dev;
2445 int rid, want, queues, msgs;
2446
2447 /* First try MSI/X */
2448 rid = PCIR_BAR(IGB_MSIX_BAR);
2449 adapter->msix_mem = bus_alloc_resource_any(dev,
2450 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2451 if (!adapter->msix_mem) {
2452 /* May not be enabled */
2453 device_printf(adapter->dev,
2454 "Unable to map MSIX table \n");
2455 goto msi;
2456 }
2457
2458 msgs = pci_msix_count(dev);
2459 if (msgs == 0) { /* system has msix disabled */
2460 bus_release_resource(dev, SYS_RES_MEMORY,
2461 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2462 adapter->msix_mem = NULL;
2463 goto msi;
2464 }
2465
2466 /* Figure out a reasonable auto config value */
2467 queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2468
2469 if (igb_num_queues == 0)
2470 igb_num_queues = queues;
2471 /*
2472 ** Two vectors (RX/TX pair) per queue
2473 ** plus an additional for Link interrupt
2474 */
2475 want = (igb_num_queues * 2) + 1;
2476 if (msgs >= want)
2477 msgs = want;
2478 else {
2479 device_printf(adapter->dev,
2480 "MSIX Configuration Problem, "
2481 "%d vectors configured, but %d queues wanted!\n",
2482 msgs, want);
2483 return (ENXIO);
2484 }
2485 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2486 device_printf(adapter->dev,
2487 "Using MSIX interrupts with %d vectors\n", msgs);
2488 adapter->num_queues = igb_num_queues;
2489 return (msgs);
2490 }
2491 msi:
2492 msgs = pci_msi_count(dev);
2493 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2494 device_printf(adapter->dev,"Using MSI interrupt\n");
2495 return (msgs);
2496 }
2497
2498 /*********************************************************************
2499 *
2500 * Initialize the hardware to a configuration
2501 * as specified by the adapter structure.
2502 *
2503 **********************************************************************/
2504 static int
2505 igb_hardware_init(struct adapter *adapter)
2506 {
2507 device_t dev = adapter->dev;
2508 u32 rx_buffer_size;
2509
2510 INIT_DEBUGOUT("igb_hardware_init: begin");
2511
2512 /* Issue a global reset */
2513 e1000_reset_hw(&adapter->hw);
2514
2515 /* Let the firmware know the OS is in control */
2516 igb_get_hw_control(adapter);
2517
2518 /*
2519 * These parameters control the automatic generation (Tx) and
2520 * response (Rx) to Ethernet PAUSE frames.
2521 * - High water mark should allow for at least two frames to be
2522 * received after sending an XOFF.
2523 * - Low water mark works best when it is very near the high water mark.
2524 * This allows the receiver to restart by sending XON when it has
2525 * drained a bit. Here we use an arbitary value of 1500 which will
2526 * restart after one full frame is pulled from the buffer. There
2527 * could be several smaller frames in the buffer and if so they will
2528 * not trigger the XON until their total number reduces the buffer
2529 * by 1500.
2530 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2531 */
2532 if (adapter->hw.mac.type == e1000_82576)
2533 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2534 E1000_RXPBS) & 0xffff) << 10 );
2535 else
2536 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2537 E1000_PBA) & 0xffff) << 10 );
2538
2539 adapter->hw.fc.high_water = rx_buffer_size -
2540 roundup2(adapter->max_frame_size, 1024);
2541 adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2542
2543 adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2544 adapter->hw.fc.send_xon = TRUE;
2545
2546 /* Set Flow control, use the tunable location if sane */
2547 if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2548 adapter->hw.fc.requested_mode = igb_fc_setting;
2549 else
2550 adapter->hw.fc.requested_mode = e1000_fc_none;
2551
2552 if (e1000_init_hw(&adapter->hw) < 0) {
2553 device_printf(dev, "Hardware Initialization Failed\n");
2554 return (EIO);
2555 }
2556
2557 e1000_check_for_link(&adapter->hw);
2558
2559 return (0);
2560 }
2561
2562 /*********************************************************************
2563 *
2564 * Setup networking device structure and register an interface.
2565 *
2566 **********************************************************************/
2567 static void
2568 igb_setup_interface(device_t dev, struct adapter *adapter)
2569 {
2570 struct ifnet *ifp;
2571
2572 INIT_DEBUGOUT("igb_setup_interface: begin");
2573
2574 ifp = adapter->ifp = if_alloc(IFT_ETHER);
2575 if (ifp == NULL)
2576 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2577 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2578 ifp->if_mtu = ETHERMTU;
2579 ifp->if_init = igb_init;
2580 ifp->if_softc = adapter;
2581 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2582 ifp->if_ioctl = igb_ioctl;
2583 ifp->if_start = igb_start;
2584 #if __FreeBSD_version >= 800000
2585 ifp->if_transmit = igb_mq_start;
2586 ifp->if_qflush = igb_qflush;
2587 #endif
2588 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2589 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2590 IFQ_SET_READY(&ifp->if_snd);
2591
2592 ether_ifattach(ifp, adapter->hw.mac.addr);
2593
2594 ifp->if_capabilities = ifp->if_capenable = 0;
2595
2596 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2597 ifp->if_capabilities |= IFCAP_TSO4;
2598 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2599 ifp->if_capenable = ifp->if_capabilities;
2600
2601 /*
2602 * Tell the upper layer(s) we support long frames.
2603 */
2604 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2605 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2606 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2607
2608 /*
2609 * Specify the media types supported by this adapter and register
2610 * callbacks to update media and link information
2611 */
2612 ifmedia_init(&adapter->media, IFM_IMASK,
2613 igb_media_change, igb_media_status);
2614 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2615 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2616 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2617 0, NULL);
2618 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2619 } else {
2620 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2621 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2622 0, NULL);
2623 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2624 0, NULL);
2625 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2626 0, NULL);
2627 if (adapter->hw.phy.type != e1000_phy_ife) {
2628 ifmedia_add(&adapter->media,
2629 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2630 ifmedia_add(&adapter->media,
2631 IFM_ETHER | IFM_1000_T, 0, NULL);
2632 }
2633 }
2634 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2635 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2636 }
2637
2638
2639 /*
2640 * Manage DMA'able memory.
2641 */
2642 static void
2643 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2644 {
2645 if (error)
2646 return;
2647 *(bus_addr_t *) arg = segs[0].ds_addr;
2648 }
2649
2650 static int
2651 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2652 struct igb_dma_alloc *dma, int mapflags)
2653 {
2654 int error;
2655
2656 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2657 1, 0, /* alignment, bounds */
2658 BUS_SPACE_MAXADDR, /* lowaddr */
2659 BUS_SPACE_MAXADDR, /* highaddr */
2660 NULL, NULL, /* filter, filterarg */
2661 size, /* maxsize */
2662 1, /* nsegments */
2663 size, /* maxsegsize */
2664 0, /* flags */
2665 NULL, /* lockfunc */
2666 NULL, /* lockarg */
2667 &dma->dma_tag);
2668 if (error) {
2669 device_printf(adapter->dev,
2670 "%s: bus_dma_tag_create failed: %d\n",
2671 __func__, error);
2672 goto fail_0;
2673 }
2674
2675 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2676 BUS_DMA_NOWAIT, &dma->dma_map);
2677 if (error) {
2678 device_printf(adapter->dev,
2679 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2680 __func__, (uintmax_t)size, error);
2681 goto fail_2;
2682 }
2683
2684 dma->dma_paddr = 0;
2685 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2686 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2687 if (error || dma->dma_paddr == 0) {
2688 device_printf(adapter->dev,
2689 "%s: bus_dmamap_load failed: %d\n",
2690 __func__, error);
2691 goto fail_3;
2692 }
2693
2694 return (0);
2695
2696 fail_3:
2697 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2698 fail_2:
2699 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2700 bus_dma_tag_destroy(dma->dma_tag);
2701 fail_0:
2702 dma->dma_map = NULL;
2703 dma->dma_tag = NULL;
2704
2705 return (error);
2706 }
2707
2708 static void
2709 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2710 {
2711 if (dma->dma_tag == NULL)
2712 return;
2713 if (dma->dma_map != NULL) {
2714 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2715 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2716 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2717 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2718 dma->dma_map = NULL;
2719 }
2720 bus_dma_tag_destroy(dma->dma_tag);
2721 dma->dma_tag = NULL;
2722 }
2723
2724
2725 /*********************************************************************
2726 *
2727 * Allocate memory for the transmit and receive rings, and then
2728 * the descriptors associated with each, called only once at attach.
2729 *
2730 **********************************************************************/
2731 static int
2732 igb_allocate_queues(struct adapter *adapter)
2733 {
2734 device_t dev = adapter->dev;
2735 struct tx_ring *txr;
2736 struct rx_ring *rxr;
2737 int rsize, tsize, error = E1000_SUCCESS;
2738 int txconf = 0, rxconf = 0;
2739
2740 /* First allocate the TX ring struct memory */
2741 if (!(adapter->tx_rings =
2742 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2743 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2744 device_printf(dev, "Unable to allocate TX ring memory\n");
2745 error = ENOMEM;
2746 goto fail;
2747 }
2748 txr = adapter->tx_rings;
2749
2750 /* Next allocate the RX */
2751 if (!(adapter->rx_rings =
2752 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2753 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2754 device_printf(dev, "Unable to allocate RX ring memory\n");
2755 error = ENOMEM;
2756 goto rx_fail;
2757 }
2758 rxr = adapter->rx_rings;
2759
2760 tsize = roundup2(adapter->num_tx_desc *
2761 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2762 /*
2763 * Now set up the TX queues, txconf is needed to handle the
2764 * possibility that things fail midcourse and we need to
2765 * undo memory gracefully
2766 */
2767 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2768 /* Set up some basics */
2769 txr = &adapter->tx_rings[i];
2770 txr->adapter = adapter;
2771 txr->me = i;
2772
2773 /* Initialize the TX lock */
2774 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2775 device_get_nameunit(dev), txr->me);
2776 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2777
2778 if (igb_dma_malloc(adapter, tsize,
2779 &txr->txdma, BUS_DMA_NOWAIT)) {
2780 device_printf(dev,
2781 "Unable to allocate TX Descriptor memory\n");
2782 error = ENOMEM;
2783 goto err_tx_desc;
2784 }
2785 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2786 bzero((void *)txr->tx_base, tsize);
2787
2788 /* Now allocate transmit buffers for the ring */
2789 if (igb_allocate_transmit_buffers(txr)) {
2790 device_printf(dev,
2791 "Critical Failure setting up transmit buffers\n");
2792 error = ENOMEM;
2793 goto err_tx_desc;
2794 }
2795 #if __FreeBSD_version >= 800000
2796 /* Allocate a buf ring */
2797 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2798 M_WAITOK, &txr->tx_mtx);
2799 #endif
2800 }
2801
2802 /*
2803 * Next the RX queues...
2804 */
2805 rsize = roundup2(adapter->num_rx_desc *
2806 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2807 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2808 rxr = &adapter->rx_rings[i];
2809 rxr->adapter = adapter;
2810 rxr->me = i;
2811
2812 /* Initialize the RX lock */
2813 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2814 device_get_nameunit(dev), txr->me);
2815 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2816
2817 if (igb_dma_malloc(adapter, rsize,
2818 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2819 device_printf(dev,
2820 "Unable to allocate RxDescriptor memory\n");
2821 error = ENOMEM;
2822 goto err_rx_desc;
2823 }
2824 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2825 bzero((void *)rxr->rx_base, rsize);
2826
2827 /* Allocate receive buffers for the ring*/
2828 if (igb_allocate_receive_buffers(rxr)) {
2829 device_printf(dev,
2830 "Critical Failure setting up receive buffers\n");
2831 error = ENOMEM;
2832 goto err_rx_desc;
2833 }
2834 }
2835
2836 return (0);
2837
2838 err_rx_desc:
2839 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2840 igb_dma_free(adapter, &rxr->rxdma);
2841 err_tx_desc:
2842 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2843 igb_dma_free(adapter, &txr->txdma);
2844 free(adapter->rx_rings, M_DEVBUF);
2845 rx_fail:
2846 free(adapter->tx_rings, M_DEVBUF);
2847 fail:
2848 return (error);
2849 }
2850
2851 /*********************************************************************
2852 *
2853 * Allocate memory for tx_buffer structures. The tx_buffer stores all
2854 * the information needed to transmit a packet on the wire. This is
2855 * called only once at attach, setup is done every reset.
2856 *
2857 **********************************************************************/
2858 static int
2859 igb_allocate_transmit_buffers(struct tx_ring *txr)
2860 {
2861 struct adapter *adapter = txr->adapter;
2862 device_t dev = adapter->dev;
2863 struct igb_tx_buffer *txbuf;
2864 int error, i;
2865
2866 /*
2867 * Setup DMA descriptor areas.
2868 */
2869 if ((error = bus_dma_tag_create(NULL, /* parent */
2870 1, 0, /* alignment, bounds */
2871 BUS_SPACE_MAXADDR, /* lowaddr */
2872 BUS_SPACE_MAXADDR, /* highaddr */
2873 NULL, NULL, /* filter, filterarg */
2874 IGB_TSO_SIZE, /* maxsize */
2875 IGB_MAX_SCATTER, /* nsegments */
2876 PAGE_SIZE, /* maxsegsize */
2877 0, /* flags */
2878 NULL, /* lockfunc */
2879 NULL, /* lockfuncarg */
2880 &txr->txtag))) {
2881 device_printf(dev,"Unable to allocate TX DMA tag\n");
2882 goto fail;
2883 }
2884
2885 if (!(txr->tx_buffers =
2886 (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2887 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2888 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2889 error = ENOMEM;
2890 goto fail;
2891 }
2892
2893 /* Create the descriptor buffer dma maps */
2894 txbuf = txr->tx_buffers;
2895 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2896 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2897 if (error != 0) {
2898 device_printf(dev, "Unable to create TX DMA map\n");
2899 goto fail;
2900 }
2901 }
2902
2903 return 0;
2904 fail:
2905 /* We free all, it handles case where we are in the middle */
2906 igb_free_transmit_structures(adapter);
2907 return (error);
2908 }
2909
2910 /*********************************************************************
2911 *
2912 * Initialize a transmit ring.
2913 *
2914 **********************************************************************/
2915 static void
2916 igb_setup_transmit_ring(struct tx_ring *txr)
2917 {
2918 struct adapter *adapter = txr->adapter;
2919 struct igb_tx_buffer *txbuf;
2920 int i;
2921
2922 /* Clear the old descriptor contents */
2923 bzero((void *)txr->tx_base,
2924 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2925 /* Reset indices */
2926 txr->next_avail_desc = 0;
2927 txr->next_to_clean = 0;
2928
2929 /* Free any existing tx buffers. */
2930 txbuf = txr->tx_buffers;
2931 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2932 if (txbuf->m_head != NULL) {
2933 bus_dmamap_sync(txr->txtag, txbuf->map,
2934 BUS_DMASYNC_POSTWRITE);
2935 bus_dmamap_unload(txr->txtag, txbuf->map);
2936 m_freem(txbuf->m_head);
2937 txbuf->m_head = NULL;
2938 }
2939 /* clear the watch index */
2940 txbuf->next_eop = -1;
2941 }
2942
2943 /* Set number of descriptors available */
2944 txr->tx_avail = adapter->num_tx_desc;
2945
2946 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2947 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2948
2949 }
2950
2951 /*********************************************************************
2952 *
2953 * Initialize all transmit rings.
2954 *
2955 **********************************************************************/
2956 static void
2957 igb_setup_transmit_structures(struct adapter *adapter)
2958 {
2959 struct tx_ring *txr = adapter->tx_rings;
2960
2961 for (int i = 0; i < adapter->num_queues; i++, txr++)
2962 igb_setup_transmit_ring(txr);
2963
2964 return;
2965 }
2966
2967 /*********************************************************************
2968 *
2969 * Enable transmit unit.
2970 *
2971 **********************************************************************/
2972 static void
2973 igb_initialize_transmit_units(struct adapter *adapter)
2974 {
2975 struct tx_ring *txr = adapter->tx_rings;
2976 u32 tctl, txdctl;
2977
2978 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2979
2980 /* Setup the Base and Length of the Tx Descriptor Rings */
2981 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2982 u64 bus_addr = txr->txdma.dma_paddr;
2983
2984 E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2985 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2986 E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2987 (uint32_t)(bus_addr >> 32));
2988 E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2989 (uint32_t)bus_addr);
2990
2991 /* Setup the HW Tx Head and Tail descriptor pointers */
2992 E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2993 E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2994
2995 HW_DEBUGOUT2("Base = %x, Length = %x\n",
2996 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2997 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2998
2999 /* Setup Transmit Descriptor Base Settings */
3000 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3001
3002 txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
3003 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3004 E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
3005 }
3006
3007 /* Program the Transmit Control Register */
3008 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3009 tctl &= ~E1000_TCTL_CT;
3010 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3011 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3012
3013 e1000_config_collision_dist(&adapter->hw);
3014
3015 /* This write will effectively turn on the transmit unit. */
3016 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3017
3018 }
3019
3020 /*********************************************************************
3021 *
3022 * Free all transmit rings.
3023 *
3024 **********************************************************************/
3025 static void
3026 igb_free_transmit_structures(struct adapter *adapter)
3027 {
3028 struct tx_ring *txr = adapter->tx_rings;
3029
3030 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3031 IGB_TX_LOCK(txr);
3032 igb_free_transmit_buffers(txr);
3033 igb_dma_free(adapter, &txr->txdma);
3034 IGB_TX_UNLOCK(txr);
3035 IGB_TX_LOCK_DESTROY(txr);
3036 }
3037 free(adapter->tx_rings, M_DEVBUF);
3038 }
3039
3040 /*********************************************************************
3041 *
3042 * Free transmit ring related data structures.
3043 *
3044 **********************************************************************/
3045 static void
3046 igb_free_transmit_buffers(struct tx_ring *txr)
3047 {
3048 struct adapter *adapter = txr->adapter;
3049 struct igb_tx_buffer *tx_buffer;
3050 int i;
3051
3052 INIT_DEBUGOUT("free_transmit_ring: begin");
3053
3054 if (txr->tx_buffers == NULL)
3055 return;
3056
3057 tx_buffer = txr->tx_buffers;
3058 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3059 if (tx_buffer->m_head != NULL) {
3060 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3061 BUS_DMASYNC_POSTWRITE);
3062 bus_dmamap_unload(txr->txtag,
3063 tx_buffer->map);
3064 m_freem(tx_buffer->m_head);
3065 tx_buffer->m_head = NULL;
3066 if (tx_buffer->map != NULL) {
3067 bus_dmamap_destroy(txr->txtag,
3068 tx_buffer->map);
3069 tx_buffer->map = NULL;
3070 }
3071 } else if (tx_buffer->map != NULL) {
3072 bus_dmamap_unload(txr->txtag,
3073 tx_buffer->map);
3074 bus_dmamap_destroy(txr->txtag,
3075 tx_buffer->map);
3076 tx_buffer->map = NULL;
3077 }
3078 }
3079 #if __FreeBSD_version >= 800000
3080 if (txr->br != NULL)
3081 buf_ring_free(txr->br, M_DEVBUF);
3082 #endif
3083 if (txr->tx_buffers != NULL) {
3084 free(txr->tx_buffers, M_DEVBUF);
3085 txr->tx_buffers = NULL;
3086 }
3087 if (txr->txtag != NULL) {
3088 bus_dma_tag_destroy(txr->txtag);
3089 txr->txtag = NULL;
3090 }
3091 return;
3092 }
3093
3094 /**********************************************************************
3095 *
3096 * Setup work for hardware segmentation offload (TSO) on
3097 * adapters using advanced tx descriptors (82575)
3098 *
3099 **********************************************************************/
3100 static boolean_t
3101 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3102 {
3103 struct adapter *adapter = txr->adapter;
3104 struct e1000_adv_tx_context_desc *TXD;
3105 struct igb_tx_buffer *tx_buffer;
3106 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3107 u32 mss_l4len_idx = 0;
3108 u16 vtag = 0;
3109 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3110 struct ether_vlan_header *eh;
3111 struct ip *ip;
3112 struct tcphdr *th;
3113
3114
3115 /*
3116 * Determine where frame payload starts.
3117 * Jump over vlan headers if already present
3118 */
3119 eh = mtod(mp, struct ether_vlan_header *);
3120 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3121 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3122 else
3123 ehdrlen = ETHER_HDR_LEN;
3124
3125 /* Ensure we have at least the IP+TCP header in the first mbuf. */
3126 if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3127 return FALSE;
3128
3129 /* Only supports IPV4 for now */
3130 ctxd = txr->next_avail_desc;
3131 tx_buffer = &txr->tx_buffers[ctxd];
3132 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3133
3134 ip = (struct ip *)(mp->m_data + ehdrlen);
3135 if (ip->ip_p != IPPROTO_TCP)
3136 return FALSE; /* 0 */
3137 ip->ip_sum = 0;
3138 ip_hlen = ip->ip_hl << 2;
3139 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3140 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3141 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3142 tcp_hlen = th->th_off << 2;
3143 /*
3144 * Calculate header length, this is used
3145 * in the transmit desc in igb_xmit
3146 */
3147 *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3148
3149 /* VLAN MACLEN IPLEN */
3150 if (mp->m_flags & M_VLANTAG) {
3151 vtag = htole16(mp->m_pkthdr.ether_vtag);
3152 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3153 }
3154
3155 vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3156 vlan_macip_lens |= ip_hlen;
3157 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3158
3159 /* ADV DTYPE TUCMD */
3160 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3161 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3162 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3163 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3164
3165 /* MSS L4LEN IDX */
3166 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3167 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3168 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3169
3170 TXD->seqnum_seed = htole32(0);
3171 tx_buffer->m_head = NULL;
3172 tx_buffer->next_eop = -1;
3173
3174 if (++ctxd == adapter->num_tx_desc)
3175 ctxd = 0;
3176
3177 txr->tx_avail--;
3178 txr->next_avail_desc = ctxd;
3179 return TRUE;
3180 }
3181
3182
3183 /*********************************************************************
3184 *
3185 * Context Descriptor setup for VLAN or CSUM
3186 *
3187 **********************************************************************/
3188
3189 static bool
3190 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3191 {
3192 struct adapter *adapter = txr->adapter;
3193 struct e1000_adv_tx_context_desc *TXD;
3194 struct igb_tx_buffer *tx_buffer;
3195 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3196 struct ether_vlan_header *eh;
3197 struct ip *ip = NULL;
3198 struct ip6_hdr *ip6;
3199 int ehdrlen, ctxd, ip_hlen = 0;
3200 u16 etype, vtag = 0;
3201 u8 ipproto = 0;
3202 bool offload = TRUE;
3203
3204 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3205 offload = FALSE;
3206
3207 ctxd = txr->next_avail_desc;
3208 tx_buffer = &txr->tx_buffers[ctxd];
3209 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3210
3211 /*
3212 ** In advanced descriptors the vlan tag must
3213 ** be placed into the context descriptor, thus
3214 ** we need to be here just for that setup.
3215 */
3216 if (mp->m_flags & M_VLANTAG) {
3217 vtag = htole16(mp->m_pkthdr.ether_vtag);
3218 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3219 } else if (offload == FALSE)
3220 return FALSE;
3221
3222 /*
3223 * Determine where frame payload starts.
3224 * Jump over vlan headers if already present,
3225 * helpful for QinQ too.
3226 */
3227 eh = mtod(mp, struct ether_vlan_header *);
3228 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3229 etype = ntohs(eh->evl_proto);
3230 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3231 } else {
3232 etype = ntohs(eh->evl_encap_proto);
3233 ehdrlen = ETHER_HDR_LEN;
3234 }
3235
3236 /* Set the ether header length */
3237 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3238
3239 switch (etype) {
3240 case ETHERTYPE_IP:
3241 ip = (struct ip *)(mp->m_data + ehdrlen);
3242 ip_hlen = ip->ip_hl << 2;
3243 if (mp->m_len < ehdrlen + ip_hlen) {
3244 offload = FALSE;
3245 break;
3246 }
3247 ipproto = ip->ip_p;
3248 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3249 break;
3250 case ETHERTYPE_IPV6:
3251 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3252 ip_hlen = sizeof(struct ip6_hdr);
3253 if (mp->m_len < ehdrlen + ip_hlen)
3254 return (FALSE);
3255 ipproto = ip6->ip6_nxt;
3256 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3257 break;
3258 default:
3259 offload = FALSE;
3260 break;
3261 }
3262
3263 vlan_macip_lens |= ip_hlen;
3264 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3265
3266 switch (ipproto) {
3267 case IPPROTO_TCP:
3268 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3269 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3270 break;
3271 case IPPROTO_UDP:
3272 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3273 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3274 break;
3275 #if __FreeBSD_version >= 800000
3276 case IPPROTO_SCTP:
3277 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3278 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3279 break;
3280 #endif
3281 default:
3282 offload = FALSE;
3283 break;
3284 }
3285
3286 /* Now copy bits into descriptor */
3287 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3288 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3289 TXD->seqnum_seed = htole32(0);
3290 TXD->mss_l4len_idx = htole32(0);
3291
3292 tx_buffer->m_head = NULL;
3293 tx_buffer->next_eop = -1;
3294
3295 /* We've consumed the first desc, adjust counters */
3296 if (++ctxd == adapter->num_tx_desc)
3297 ctxd = 0;
3298 txr->next_avail_desc = ctxd;
3299 --txr->tx_avail;
3300
3301 return (offload);
3302 }
3303
3304
3305 /**********************************************************************
3306 *
3307 * Examine each tx_buffer in the used queue. If the hardware is done
3308 * processing the packet then free associated resources. The
3309 * tx_buffer is put back on the free queue.
3310 *
3311 * TRUE return means there's work in the ring to clean, FALSE its empty.
3312 **********************************************************************/
3313 static bool
3314 igb_txeof(struct tx_ring *txr)
3315 {
3316 struct adapter *adapter = txr->adapter;
3317 int first, last, done, num_avail;
3318 u32 cleaned = 0;
3319 struct igb_tx_buffer *tx_buffer;
3320 struct e1000_tx_desc *tx_desc, *eop_desc;
3321 struct ifnet *ifp = adapter->ifp;
3322
3323 IGB_TX_LOCK_ASSERT(txr);
3324
3325 if (txr->tx_avail == adapter->num_tx_desc)
3326 return FALSE;
3327
3328 num_avail = txr->tx_avail;
3329 first = txr->next_to_clean;
3330 tx_desc = &txr->tx_base[first];
3331 tx_buffer = &txr->tx_buffers[first];
3332 last = tx_buffer->next_eop;
3333 eop_desc = &txr->tx_base[last];
3334
3335 /*
3336 * What this does is get the index of the
3337 * first descriptor AFTER the EOP of the
3338 * first packet, that way we can do the
3339 * simple comparison on the inner while loop.
3340 */
3341 if (++last == adapter->num_tx_desc)
3342 last = 0;
3343 done = last;
3344
3345 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3346 BUS_DMASYNC_POSTREAD);
3347
3348 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3349 /* We clean the range of the packet */
3350 while (first != done) {
3351 tx_desc->upper.data = 0;
3352 tx_desc->lower.data = 0;
3353 tx_desc->buffer_addr = 0;
3354 ++num_avail; ++cleaned;
3355
3356 if (tx_buffer->m_head) {
3357 ifp->if_opackets++;
3358 bus_dmamap_sync(txr->txtag,
3359 tx_buffer->map,
3360 BUS_DMASYNC_POSTWRITE);
3361 bus_dmamap_unload(txr->txtag,
3362 tx_buffer->map);
3363
3364 m_freem(tx_buffer->m_head);
3365 tx_buffer->m_head = NULL;
3366 }
3367 tx_buffer->next_eop = -1;
3368
3369 if (++first == adapter->num_tx_desc)
3370 first = 0;
3371
3372 tx_buffer = &txr->tx_buffers[first];
3373 tx_desc = &txr->tx_base[first];
3374 }
3375 /* See if we can continue to the next packet */
3376 last = tx_buffer->next_eop;
3377 if (last != -1) {
3378 eop_desc = &txr->tx_base[last];
3379 /* Get new done point */
3380 if (++last == adapter->num_tx_desc) last = 0;
3381 done = last;
3382 } else
3383 break;
3384 }
3385 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3386 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3387
3388 txr->next_to_clean = first;
3389
3390 /*
3391 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3392 * that it is OK to send packets.
3393 * If there are no pending descriptors, clear the timeout. Otherwise,
3394 * if some descriptors have been freed, restart the timeout.
3395 */
3396 if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3397 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3398 /* All clean, turn off the timer */
3399 if (num_avail == adapter->num_tx_desc) {
3400 txr->watchdog_timer = 0;
3401 txr->tx_avail = num_avail;
3402 return FALSE;
3403 }
3404 }
3405
3406 /* Some cleaned, reset the timer */
3407 if (cleaned)
3408 txr->watchdog_timer = IGB_TX_TIMEOUT;
3409 txr->tx_avail = num_avail;
3410 return TRUE;
3411 }
3412
3413
3414 /*********************************************************************
3415 *
3416 * Setup descriptor buffer(s) from system mbuf buffer pools.
3417 * i - designates the ring index
3418 * clean - tells the function whether to update
3419 * the header, the packet buffer, or both.
3420 *
3421 **********************************************************************/
3422 static int
3423 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3424 {
3425 struct adapter *adapter = rxr->adapter;
3426 struct mbuf *mh, *mp;
3427 bus_dma_segment_t seg[2];
3428 bus_dmamap_t map;
3429 struct igb_rx_buffer *rx_buffer;
3430 int error, nsegs;
3431 int merr = 0;
3432
3433
3434 rx_buffer = &rxr->rx_buffers[i];
3435
3436 /* First get our header and payload mbuf */
3437 if (clean & IGB_CLEAN_HEADER) {
3438 mh = m_gethdr(M_DONTWAIT, MT_DATA);
3439 if (mh == NULL)
3440 goto remap;
3441 } else /* reuse */
3442 mh = rxr->rx_buffers[i].m_head;
3443
3444 mh->m_len = MHLEN;
3445 mh->m_flags |= M_PKTHDR;
3446
3447 if (clean & IGB_CLEAN_PAYLOAD) {
3448 mp = m_getjcl(M_DONTWAIT, MT_DATA,
3449 M_PKTHDR, adapter->rx_mbuf_sz);
3450 if (mp == NULL)
3451 goto remap;
3452 mp->m_len = adapter->rx_mbuf_sz;
3453 mp->m_flags &= ~M_PKTHDR;
3454 } else { /* reusing */
3455 mp = rxr->rx_buffers[i].m_pack;
3456 mp->m_len = adapter->rx_mbuf_sz;
3457 mp->m_flags &= ~M_PKTHDR;
3458 }
3459 /*
3460 ** Need to create a chain for the following
3461 ** dmamap call at this point.
3462 */
3463 mh->m_next = mp;
3464 mh->m_pkthdr.len = mh->m_len + mp->m_len;
3465
3466 /* Get the memory mapping */
3467 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3468 rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3469 if (error != 0) {
3470 printf("GET BUF: dmamap load failure - %d\n", error);
3471 m_free(mh);
3472 return (error);
3473 }
3474
3475 /* Unload old mapping and update buffer struct */
3476 if (rx_buffer->m_head != NULL)
3477 bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3478 map = rx_buffer->map;
3479 rx_buffer->map = rxr->rx_spare_map;
3480 rxr->rx_spare_map = map;
3481 rx_buffer->m_head = mh;
3482 rx_buffer->m_pack = mp;
3483 bus_dmamap_sync(rxr->rxtag,
3484 rx_buffer->map, BUS_DMASYNC_PREREAD);
3485
3486 /* Update descriptor */
3487 rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3488 rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3489
3490 return (0);
3491
3492 /*
3493 ** If we get here, we have an mbuf resource
3494 ** issue, so we discard the incoming packet
3495 ** and attempt to reuse existing mbufs next
3496 ** pass thru the ring, but to do so we must
3497 ** fix up the descriptor which had the address
3498 ** clobbered with writeback info.
3499 */
3500 remap:
3501 adapter->mbuf_header_failed++;
3502 merr = ENOBUFS;
3503 /* Is there a reusable buffer? */
3504 mh = rxr->rx_buffers[i].m_head;
3505 if (mh == NULL) /* Nope, init error */
3506 return (merr);
3507 mp = rxr->rx_buffers[i].m_pack;
3508 if (mp == NULL) /* Nope, init error */
3509 return (merr);
3510 /* Get our old mapping */
3511 rx_buffer = &rxr->rx_buffers[i];
3512 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3513 rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3514 if (error != 0) {
3515 /* We really have a problem */
3516 m_free(mh);
3517 return (error);
3518 }
3519 /* Now fix the descriptor as needed */
3520 rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3521 rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3522 return (merr);
3523 }
3524
3525
3526 /*********************************************************************
3527 *
3528 * Allocate memory for rx_buffer structures. Since we use one
3529 * rx_buffer per received packet, the maximum number of rx_buffer's
3530 * that we'll need is equal to the number of receive descriptors
3531 * that we've allocated.
3532 *
3533 **********************************************************************/
3534 static int
3535 igb_allocate_receive_buffers(struct rx_ring *rxr)
3536 {
3537 struct adapter *adapter = rxr->adapter;
3538 device_t dev = adapter->dev;
3539 struct igb_rx_buffer *rxbuf;
3540 int i, bsize, error;
3541
3542 bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
3543 if (!(rxr->rx_buffers =
3544 (struct igb_rx_buffer *) malloc(bsize,
3545 M_DEVBUF, M_NOWAIT | M_ZERO))) {
3546 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3547 error = ENOMEM;
3548 goto fail;
3549 }
3550
3551 /*
3552 ** The tag is made to accomodate the largest buffer size
3553 ** with packet split (hence the two segments, even though
3554 ** it may not always use this.
3555 */
3556 if ((error = bus_dma_tag_create(NULL, /* parent */
3557 1, 0, /* alignment, bounds */
3558 BUS_SPACE_MAXADDR, /* lowaddr */
3559 BUS_SPACE_MAXADDR, /* highaddr */
3560 NULL, NULL, /* filter, filterarg */
3561 MJUM16BYTES, /* maxsize */
3562 2, /* nsegments */
3563 MJUMPAGESIZE, /* maxsegsize */
3564 0, /* flags */
3565 NULL, /* lockfunc */
3566 NULL, /* lockfuncarg */
3567 &rxr->rxtag))) {
3568 device_printf(dev, "Unable to create RX DMA tag\n");
3569 goto fail;
3570 }
3571
3572 /* Create the spare map (used by getbuf) */
3573 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3574 &rxr->rx_spare_map);
3575 if (error) {
3576 device_printf(dev,
3577 "%s: bus_dmamap_create header spare failed: %d\n",
3578 __func__, error);
3579 goto fail;
3580 }
3581
3582 for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3583 rxbuf = &rxr->rx_buffers[i];
3584 error = bus_dmamap_create(rxr->rxtag,
3585 BUS_DMA_NOWAIT, &rxbuf->map);
3586 if (error) {
3587 device_printf(dev, "Unable to create RX DMA maps\n");
3588 goto fail;
3589 }
3590 }
3591
3592 return (0);
3593
3594 fail:
3595 /* Frees all, but can handle partial completion */
3596 igb_free_receive_structures(adapter);
3597 return (error);
3598 }
3599
3600 /*********************************************************************
3601 *
3602 * Initialize a receive ring and its buffers.
3603 *
3604 **********************************************************************/
3605 static int
3606 igb_setup_receive_ring(struct rx_ring *rxr)
3607 {
3608 struct adapter *adapter;
3609 struct ifnet *ifp;
3610 device_t dev;
3611 struct igb_rx_buffer *rxbuf;
3612 struct lro_ctrl *lro = &rxr->lro;
3613 int j, rsize;
3614
3615 adapter = rxr->adapter;
3616 dev = adapter->dev;
3617 ifp = adapter->ifp;
3618 rxr->lro_enabled = FALSE;
3619 rxr->hdr_split = FALSE;
3620
3621 /* Clear the ring contents */
3622 rsize = roundup2(adapter->num_rx_desc *
3623 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3624 bzero((void *)rxr->rx_base, rsize);
3625
3626 /*
3627 ** Free current RX buffer structures and their mbufs
3628 */
3629 for (int i = 0; i < adapter->num_rx_desc; i++) {
3630 rxbuf = &rxr->rx_buffers[i];
3631 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3632 BUS_DMASYNC_POSTREAD);
3633 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3634 if (rxbuf->m_head) {
3635 rxbuf->m_head->m_next = rxbuf->m_pack;
3636 m_freem(rxbuf->m_head);
3637 }
3638 rxbuf->m_head = NULL;
3639 rxbuf->m_pack = NULL;
3640 }
3641
3642 /* Next replenish the ring */
3643 for (j = 0; j < adapter->num_rx_desc; j++) {
3644 if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
3645 rxr->rx_buffers[j].m_head = NULL;
3646 rxr->rx_buffers[j].m_pack = NULL;
3647 rxr->rx_base[j].read.hdr_addr = 0;
3648 rxr->rx_base[j].read.pkt_addr = 0;
3649 goto fail;
3650 }
3651 }
3652
3653 /* Setup our descriptor indices */
3654 rxr->next_to_check = 0;
3655 rxr->last_cleaned = 0;
3656
3657 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3658 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3659
3660 /*
3661 ** Now set up the LRO interface, we
3662 ** also only do head split when LRO
3663 ** is enabled, since so often they
3664 ** are undesireable in similar setups.
3665 */
3666 if (ifp->if_capenable & IFCAP_LRO) {
3667 int err = tcp_lro_init(lro);
3668 if (err) {
3669 device_printf(dev,"LRO Initialization failed!\n");
3670 goto fail;
3671 }
3672 INIT_DEBUGOUT("RX LRO Initialized\n");
3673 rxr->lro_enabled = TRUE;
3674 rxr->hdr_split = TRUE;
3675 lro->ifp = adapter->ifp;
3676 }
3677
3678 return (0);
3679 fail:
3680 /*
3681 * We need to clean up any buffers allocated
3682 * so far, 'j' is the failing index.
3683 */
3684 for (int i = 0; i < j; i++) {
3685 rxbuf = &rxr->rx_buffers[i];
3686 if (rxbuf->m_head != NULL) {
3687 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3688 BUS_DMASYNC_POSTREAD);
3689 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3690 m_freem(rxbuf->m_head);
3691 rxbuf->m_head = NULL;
3692 }
3693 }
3694 return (ENOBUFS);
3695 }
3696
3697 /*********************************************************************
3698 *
3699 * Initialize all receive rings.
3700 *
3701 **********************************************************************/
3702 static int
3703 igb_setup_receive_structures(struct adapter *adapter)
3704 {
3705 struct rx_ring *rxr = adapter->rx_rings;
3706 int i, j;
3707
3708 for (i = 0; i < adapter->num_queues; i++, rxr++)
3709 if (igb_setup_receive_ring(rxr))
3710 goto fail;
3711
3712 return (0);
3713 fail:
3714 /*
3715 * Free RX buffers allocated so far, we will only handle
3716 * the rings that completed, the failing case will have
3717 * cleaned up for itself. The value of 'i' will be the
3718 * failed ring so we must pre-decrement it.
3719 */
3720 rxr = adapter->rx_rings;
3721 for (--i; i > 0; i--, rxr++) {
3722 for (j = 0; j < adapter->num_rx_desc; j++) {
3723 struct igb_rx_buffer *rxbuf;
3724 rxbuf = &rxr->rx_buffers[j];
3725 if (rxbuf->m_head != NULL) {
3726 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3727 BUS_DMASYNC_POSTREAD);
3728 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3729 m_freem(rxbuf->m_head);
3730 rxbuf->m_head = NULL;
3731 }
3732 }
3733 }
3734
3735 return (ENOBUFS);
3736 }
3737
3738 /*********************************************************************
3739 *
3740 * Enable receive unit.
3741 *
3742 **********************************************************************/
3743 static void
3744 igb_initialize_receive_units(struct adapter *adapter)
3745 {
3746 struct rx_ring *rxr = adapter->rx_rings;
3747 struct ifnet *ifp = adapter->ifp;
3748 u32 rctl, rxcsum, psize, srrctl = 0;
3749
3750 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3751
3752 /*
3753 * Make sure receives are disabled while setting
3754 * up the descriptor ring
3755 */
3756 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3757 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3758
3759 /*
3760 ** Set up for header split
3761 */
3762 if (rxr->hdr_split) {
3763 /* Use a standard mbuf for the header */
3764 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3765 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3766 } else
3767 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3768
3769 /*
3770 ** Set up for jumbo frames
3771 */
3772 if (ifp->if_mtu > ETHERMTU) {
3773 rctl |= E1000_RCTL_LPE;
3774 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3775 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3776
3777 /* Set maximum packet len */
3778 psize = adapter->max_frame_size;
3779 /* are we on a vlan? */
3780 if (adapter->ifp->if_vlantrunk != NULL)
3781 psize += VLAN_TAG_SIZE;
3782 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3783 } else {
3784 rctl &= ~E1000_RCTL_LPE;
3785 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3786 rctl |= E1000_RCTL_SZ_2048;
3787 }
3788
3789 /* Setup the Base and Length of the Rx Descriptor Rings */
3790 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3791 u64 bus_addr = rxr->rxdma.dma_paddr;
3792 u32 rxdctl;
3793
3794 E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3795 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3796 E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3797 (uint32_t)(bus_addr >> 32));
3798 E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3799 (uint32_t)bus_addr);
3800 E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3801 /* Enable this Queue */
3802 rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3803 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3804 rxdctl &= 0xFFF00000;
3805 rxdctl |= IGB_RX_PTHRESH;
3806 rxdctl |= IGB_RX_HTHRESH << 8;
3807 rxdctl |= IGB_RX_WTHRESH << 16;
3808 E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3809 }
3810
3811 /*
3812 ** Setup for RX MultiQueue
3813 */
3814 rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3815 if (adapter->num_queues >1) {
3816 u32 random[10], mrqc, shift = 0;
3817 union igb_reta {
3818 u32 dword;
3819 u8 bytes[4];
3820 } reta;
3821
3822 arc4rand(&random, sizeof(random), 0);
3823 if (adapter->hw.mac.type == e1000_82575)
3824 shift = 6;
3825 /* Warning FM follows */
3826 for (int i = 0; i < 128; i++) {
3827 reta.bytes[i & 3] =
3828 (i % adapter->num_queues) << shift;
3829 if ((i & 3) == 3)
3830 E1000_WRITE_REG(&adapter->hw,
3831 E1000_RETA(i >> 2), reta.dword);
3832 }
3833 /* Now fill in hash table */
3834 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3835 for (int i = 0; i < 10; i++)
3836 E1000_WRITE_REG_ARRAY(&adapter->hw,
3837 E1000_RSSRK(0), i, random[i]);
3838
3839 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3840 E1000_MRQC_RSS_FIELD_IPV4_TCP);
3841 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3842 E1000_MRQC_RSS_FIELD_IPV6_TCP);
3843 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3844 E1000_MRQC_RSS_FIELD_IPV6_UDP);
3845 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3846 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3847
3848 E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3849
3850 /*
3851 ** NOTE: Receive Full-Packet Checksum Offload
3852 ** is mutually exclusive with Multiqueue. However
3853 ** this is not the same as TCP/IP checksums which
3854 ** still work.
3855 */
3856 rxcsum |= E1000_RXCSUM_PCSD;
3857 #if __FreeBSD_version >= 800000
3858 /* For SCTP Offload */
3859 if ((adapter->hw.mac.type == e1000_82576)
3860 && (ifp->if_capenable & IFCAP_RXCSUM))
3861 rxcsum |= E1000_RXCSUM_CRCOFL;
3862 #endif
3863 } else {
3864 /* Non RSS setup */
3865 if (ifp->if_capenable & IFCAP_RXCSUM) {
3866 rxcsum |= E1000_RXCSUM_IPPCSE;
3867 #if __FreeBSD_version >= 800000
3868 if (adapter->hw.mac.type == e1000_82576)
3869 rxcsum |= E1000_RXCSUM_CRCOFL;
3870 #endif
3871 } else
3872 rxcsum &= ~E1000_RXCSUM_TUOFL;
3873 }
3874 E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3875
3876 /* Setup the Receive Control Register */
3877 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3878 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3879 E1000_RCTL_RDMTS_HALF |
3880 (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3881
3882 /* Make sure VLAN Filters are off */
3883 rctl &= ~E1000_RCTL_VFE;
3884 /* Don't store bad packets */
3885 rctl &= ~E1000_RCTL_SBP;
3886
3887 /* Enable Receives */
3888 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3889
3890 /*
3891 * Setup the HW Rx Head and Tail Descriptor Pointers
3892 * - needs to be after enable
3893 */
3894 for (int i = 0; i < adapter->num_queues; i++) {
3895 E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3896 E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3897 adapter->num_rx_desc - 1);
3898 }
3899 return;
3900 }
3901
3902 /*********************************************************************
3903 *
3904 * Free receive rings.
3905 *
3906 **********************************************************************/
3907 static void
3908 igb_free_receive_structures(struct adapter *adapter)
3909 {
3910 struct rx_ring *rxr = adapter->rx_rings;
3911
3912 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3913 struct lro_ctrl *lro = &rxr->lro;
3914 igb_free_receive_buffers(rxr);
3915 tcp_lro_free(lro);
3916 igb_dma_free(adapter, &rxr->rxdma);
3917 }
3918
3919 free(adapter->rx_rings, M_DEVBUF);
3920 }
3921
3922 /*********************************************************************
3923 *
3924 * Free receive ring data structures.
3925 *
3926 **********************************************************************/
3927 static void
3928 igb_free_receive_buffers(struct rx_ring *rxr)
3929 {
3930 struct adapter *adapter = rxr->adapter;
3931 struct igb_rx_buffer *rx_buffer;
3932
3933 INIT_DEBUGOUT("free_receive_structures: begin");
3934
3935 if (rxr->rx_spare_map) {
3936 bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3937 rxr->rx_spare_map = NULL;
3938 }
3939
3940 /* Cleanup any existing buffers */
3941 if (rxr->rx_buffers != NULL) {
3942 rx_buffer = &rxr->rx_buffers[0];
3943 for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3944 if (rx_buffer->m_head != NULL) {
3945 bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3946 BUS_DMASYNC_POSTREAD);
3947 bus_dmamap_unload(rxr->rxtag,
3948 rx_buffer->map);
3949 m_freem(rx_buffer->m_head);
3950 rx_buffer->m_head = NULL;
3951 } else if (rx_buffer->map != NULL)
3952 bus_dmamap_unload(rxr->rxtag,
3953 rx_buffer->map);
3954 if (rx_buffer->map != NULL) {
3955 bus_dmamap_destroy(rxr->rxtag,
3956 rx_buffer->map);
3957 rx_buffer->map = NULL;
3958 }
3959 }
3960 }
3961
3962 if (rxr->rx_buffers != NULL) {
3963 free(rxr->rx_buffers, M_DEVBUF);
3964 rxr->rx_buffers = NULL;
3965 }
3966
3967 if (rxr->rxtag != NULL) {
3968 bus_dma_tag_destroy(rxr->rxtag);
3969 rxr->rxtag = NULL;
3970 }
3971 }
3972 /*********************************************************************
3973 *
3974 * This routine executes in interrupt context. It replenishes
3975 * the mbufs in the descriptor and sends data which has been
3976 * dma'ed into host memory to upper layer.
3977 *
3978 * We loop at most count times if count is > 0, or until done if
3979 * count < 0.
3980 *
3981 * Return TRUE if more to clean, FALSE otherwise
3982 *********************************************************************/
3983 static bool
3984 igb_rxeof(struct rx_ring *rxr, int count)
3985 {
3986 struct adapter *adapter = rxr->adapter;
3987 struct ifnet *ifp;
3988 struct lro_ctrl *lro = &rxr->lro;
3989 struct lro_entry *queued;
3990 int i;
3991 u32 staterr;
3992 union e1000_adv_rx_desc *cur;
3993
3994
3995 IGB_RX_LOCK(rxr);
3996 ifp = adapter->ifp;
3997 i = rxr->next_to_check;
3998 cur = &rxr->rx_base[i];
3999 staterr = cur->wb.upper.status_error;
4000
4001 if (!(staterr & E1000_RXD_STAT_DD)) {
4002 IGB_RX_UNLOCK(rxr);
4003 return FALSE;
4004 }
4005
4006 /* Sync the ring */
4007 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4008 BUS_DMASYNC_POSTREAD);
4009
4010 /* Main clean loop */
4011 while ((staterr & E1000_RXD_STAT_DD) &&
4012 (count != 0) &&
4013 (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4014 struct mbuf *sendmp, *mh, *mp;
4015 u16 hlen, plen, hdr, ptype, len_adj, vtag;
4016 u8 dopayload, accept_frame, eop;
4017
4018 accept_frame = 1;
4019 hlen = plen = len_adj = vtag = 0;
4020 sendmp = mh = mp = NULL;
4021 ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
4022
4023 /* Sync the buffers */
4024 bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4025 BUS_DMASYNC_POSTREAD);
4026
4027 /*
4028 ** The way the hardware is configured to
4029 ** split, it will ONLY use the header buffer
4030 ** when header split is enabled, otherwise we
4031 ** get normal behavior, ie, both header and
4032 ** payload are DMA'd into the payload buffer.
4033 **
4034 ** The fmp test is to catch the case where a
4035 ** packet spans multiple descriptors, in that
4036 ** case only the first header is valid.
4037 */
4038 if ((rxr->hdr_split) && (rxr->fmp == NULL)){
4039 hdr = le16toh(cur->
4040 wb.lower.lo_dword.hs_rss.hdr_info);
4041 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4042 E1000_RXDADV_HDRBUFLEN_SHIFT;
4043 if (hlen > IGB_HDR_BUF)
4044 hlen = IGB_HDR_BUF;
4045 plen = le16toh(cur->wb.upper.length);
4046 /* Handle the header mbuf */
4047 mh = rxr->rx_buffers[i].m_head;
4048 mh->m_len = hlen;
4049 dopayload = IGB_CLEAN_HEADER;
4050 /*
4051 ** Get the payload length, this
4052 ** could be zero if its a small
4053 ** packet.
4054 */
4055 if (plen) {
4056 mp = rxr->rx_buffers[i].m_pack;
4057 mp->m_len = plen;
4058 mp->m_next = NULL;
4059 mp->m_flags &= ~M_PKTHDR;
4060 mh->m_next = mp;
4061 mh->m_flags |= M_PKTHDR;
4062 dopayload = IGB_CLEAN_BOTH;
4063 rxr->rx_split_packets++;
4064 } else { /* small packets */
4065 mh->m_flags &= ~M_PKTHDR;
4066 mh->m_next = NULL;
4067 }
4068 } else {
4069 /*
4070 ** Either no header split, or a
4071 ** secondary piece of a fragmented
4072 ** split packet.
4073 */
4074 mh = rxr->rx_buffers[i].m_pack;
4075 mh->m_flags |= M_PKTHDR;
4076 mh->m_len = le16toh(cur->wb.upper.length);
4077 dopayload = IGB_CLEAN_PAYLOAD;
4078 }
4079
4080 if (staterr & E1000_RXD_STAT_EOP) {
4081 count--;
4082 eop = 1;
4083 /*
4084 ** Strip CRC and account for frag
4085 */
4086 if (mp) {
4087 if (mp->m_len < ETHER_CRC_LEN) {
4088 /* a frag, how much is left? */
4089 len_adj = ETHER_CRC_LEN - mp->m_len;
4090 mp->m_len = 0;
4091 } else
4092 mp->m_len -= ETHER_CRC_LEN;
4093 } else { /* not split */
4094 if (mh->m_len < ETHER_CRC_LEN) {
4095 len_adj = ETHER_CRC_LEN - mh->m_len;
4096 mh->m_len = 0;
4097 } else
4098 mh->m_len -= ETHER_CRC_LEN;
4099 }
4100 } else
4101 eop = 0;
4102
4103 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4104 accept_frame = 0;
4105 #ifdef IGB_IEEE1588
4106 This linux code needs to be converted to work here
4107 -----------------------------------------------------
4108 if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4109 u64 regval;
4110 u64 ns;
4111 // Create an mtag and set it up
4112 struct skb_shared_hwtstamps *shhwtstamps =
4113 skb_hwtstamps(skb);
4114
4115 rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4116 "igb: no RX time stamp available for time stamped packet");
4117 regval = rd32(E1000_RXSTMPL);
4118 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4119 // Do time conversion from the register
4120 ns = timecounter_cyc2time(&adapter->clock, regval);
4121 clocksync_update(&adapter->sync, ns);
4122 memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4123 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4124 shhwtstamps->syststamp =
4125 clocksync_hw2sys(&adapter->sync, ns);
4126 }
4127 #endif
4128 if (accept_frame) {
4129 /*
4130 ** get_buf will overwrite the writeback
4131 ** descriptor so save the VLAN tag now.
4132 */
4133 vtag = le16toh(cur->wb.upper.vlan);
4134 if (igb_get_buf(rxr, i, dopayload) != 0) {
4135 ifp->if_iqdrops++;
4136 goto discard;
4137 }
4138 /* Initial frame - setup */
4139 if (rxr->fmp == NULL) {
4140 mh->m_flags |= M_PKTHDR;
4141 mh->m_pkthdr.len = mh->m_len;
4142 rxr->fmp = mh; /* Store the first mbuf */
4143 rxr->lmp = mh;
4144 if (mp) { /* Add payload if split */
4145 mh->m_pkthdr.len += mp->m_len;
4146 rxr->lmp = mh->m_next;
4147 }
4148 } else {
4149 /* Chain mbuf's together */
4150 mh->m_flags &= ~M_PKTHDR;
4151 rxr->lmp->m_next = mh;
4152 rxr->lmp = rxr->lmp->m_next;
4153 rxr->fmp->m_pkthdr.len += mh->m_len;
4154 /* Adjust for CRC frag */
4155 if (len_adj) {
4156 rxr->lmp->m_len -= len_adj;
4157 rxr->fmp->m_pkthdr.len -= len_adj;
4158 }
4159 }
4160
4161 if (eop) {
4162 bool sctp = ((ptype & 0x40) != 0);
4163 rxr->fmp->m_pkthdr.rcvif = ifp;
4164 ifp->if_ipackets++;
4165 rxr->rx_packets++;
4166 /* capture data for AIM */
4167 rxr->bytes += rxr->fmp->m_pkthdr.len;
4168 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4169
4170 igb_rx_checksum(staterr, rxr->fmp, sctp);
4171 if (staterr & E1000_RXD_STAT_VP) {
4172 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4173 rxr->fmp->m_flags |= M_VLANTAG;
4174 }
4175 #if __FreeBSD_version >= 800000
4176 rxr->fmp->m_pkthdr.flowid = curcpu;
4177 rxr->fmp->m_flags |= M_FLOWID;
4178 #endif
4179 sendmp = rxr->fmp;
4180 rxr->fmp = NULL;
4181 rxr->lmp = NULL;
4182 }
4183 } else {
4184 ifp->if_ierrors++;
4185 discard:
4186 /* Reuse loaded DMA map and just update mbuf chain */
4187 if (hlen) {
4188 mh = rxr->rx_buffers[i].m_head;
4189 mh->m_len = MHLEN;
4190 mh->m_next = NULL;
4191 }
4192 mp = rxr->rx_buffers[i].m_pack;
4193 mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4194 mp->m_data = mp->m_ext.ext_buf;
4195 mp->m_next = NULL;
4196 if (adapter->max_frame_size <=
4197 (MCLBYTES - ETHER_ALIGN))
4198 m_adj(mp, ETHER_ALIGN);
4199 if (rxr->fmp != NULL) {
4200 /* handles the whole chain */
4201 m_freem(rxr->fmp);
4202 rxr->fmp = NULL;
4203 rxr->lmp = NULL;
4204 }
4205 sendmp = NULL;
4206 }
4207
4208 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4209 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4210
4211 rxr->last_cleaned = i; /* For updating tail */
4212
4213 /* Advance our pointers to the next descriptor. */
4214 if (++i == adapter->num_rx_desc)
4215 i = 0;
4216
4217 /*
4218 ** Note that we hold the RX lock thru
4219 ** the following call so this ring's
4220 ** next_to_check is not gonna change.
4221 */
4222 if (sendmp != NULL) {
4223 /*
4224 ** Send to the stack if:
4225 ** - LRO not enabled, or
4226 ** - no LRO resources, or
4227 ** - lro enqueue fails
4228 */
4229 if ((!rxr->lro_enabled) ||
4230 ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
4231 (*ifp->if_input)(ifp, sendmp);
4232 }
4233
4234 /* Get the next descriptor */
4235 cur = &rxr->rx_base[i];
4236 staterr = cur->wb.upper.status_error;
4237 }
4238 rxr->next_to_check = i;
4239
4240 /* Advance the E1000's Receive Queue #0 "Tail Pointer". */
4241 E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4242
4243 /*
4244 * Flush any outstanding LRO work
4245 */
4246 while (!SLIST_EMPTY(&lro->lro_active)) {
4247 queued = SLIST_FIRST(&lro->lro_active);
4248 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4249 tcp_lro_flush(lro, queued);
4250 }
4251
4252 IGB_RX_UNLOCK(rxr);
4253
4254 /*
4255 ** We still have cleaning to do?
4256 ** Schedule another interrupt if so.
4257 */
4258 if (staterr & E1000_RXD_STAT_DD) {
4259 E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4260 return TRUE;
4261 }
4262
4263 return FALSE;
4264 }
4265
4266
4267 /*********************************************************************
4268 *
4269 * Verify that the hardware indicated that the checksum is valid.
4270 * Inform the stack about the status of checksum so that stack
4271 * doesn't spend time verifying the checksum.
4272 *
4273 *********************************************************************/
4274 static void
4275 igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4276 {
4277 u16 status = (u16)staterr;
4278 u8 errors = (u8) (staterr >> 24);
4279
4280 /* Ignore Checksum bit is set */
4281 if (status & E1000_RXD_STAT_IXSM) {
4282 mp->m_pkthdr.csum_flags = 0;
4283 return;
4284 }
4285
4286 if (status & E1000_RXD_STAT_IPCS) {
4287 /* Did it pass? */
4288 if (!(errors & E1000_RXD_ERR_IPE)) {
4289 /* IP Checksum Good */
4290 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4291 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4292 } else
4293 mp->m_pkthdr.csum_flags = 0;
4294 }
4295
4296 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4297 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4298 #if __FreeBSD_version >= 800000
4299 if (sctp) /* reassign */
4300 type = CSUM_SCTP_VALID;
4301 #endif
4302 /* Did it pass? */
4303 if (!(errors & E1000_RXD_ERR_TCPE)) {
4304 mp->m_pkthdr.csum_flags |= type;
4305 if (!sctp)
4306 mp->m_pkthdr.csum_data = htons(0xffff);
4307 }
4308 }
4309 return;
4310 }
4311
4312 /*
4313 * This routine is run via an vlan
4314 * config EVENT
4315 */
4316 static void
4317 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4318 {
4319 struct adapter *adapter = ifp->if_softc;
4320 u32 index, bit;
4321
4322 if (ifp->if_softc != arg) /* Not our event */
4323 return;
4324
4325 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4326 return;
4327
4328 index = (vtag >> 5) & 0x7F;
4329 bit = vtag & 0x1F;
4330 igb_shadow_vfta[index] |= (1 << bit);
4331 ++adapter->num_vlans;
4332 /* Re-init to load the changes */
4333 igb_init(adapter);
4334 }
4335
4336 /*
4337 * This routine is run via an vlan
4338 * unconfig EVENT
4339 */
4340 static void
4341 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4342 {
4343 struct adapter *adapter = ifp->if_softc;
4344 u32 index, bit;
4345
4346 if (ifp->if_softc != arg)
4347 return;
4348
4349 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4350 return;
4351
4352 index = (vtag >> 5) & 0x7F;
4353 bit = vtag & 0x1F;
4354 igb_shadow_vfta[index] &= ~(1 << bit);
4355 --adapter->num_vlans;
4356 /* Re-init to load the changes */
4357 igb_init(adapter);
4358 }
4359
4360 static void
4361 igb_setup_vlan_hw_support(struct adapter *adapter)
4362 {
4363 struct e1000_hw *hw = &adapter->hw;
4364 u32 reg;
4365
4366 /*
4367 ** We get here thru init_locked, meaning
4368 ** a soft reset, this has already cleared
4369 ** the VFTA and other state, so if there
4370 ** have been no vlan's registered do nothing.
4371 */
4372 if (adapter->num_vlans == 0)
4373 return;
4374
4375 /*
4376 ** A soft reset zero's out the VFTA, so
4377 ** we need to repopulate it now.
4378 */
4379 for (int i = 0; i < IGB_VFTA_SIZE; i++)
4380 if (igb_shadow_vfta[i] != 0)
4381 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4382 i, igb_shadow_vfta[i]);
4383
4384 reg = E1000_READ_REG(hw, E1000_CTRL);
4385 reg |= E1000_CTRL_VME;
4386 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4387
4388 /* Enable the Filter Table */
4389 reg = E1000_READ_REG(hw, E1000_RCTL);
4390 reg &= ~E1000_RCTL_CFIEN;
4391 reg |= E1000_RCTL_VFE;
4392 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4393
4394 /* Update the frame size */
4395 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4396 adapter->max_frame_size + VLAN_TAG_SIZE);
4397 }
4398
4399 static void
4400 igb_enable_intr(struct adapter *adapter)
4401 {
4402 /* With RSS set up what to auto clear */
4403 if (adapter->msix_mem) {
4404 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4405 adapter->eims_mask);
4406 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4407 adapter->eims_mask);
4408 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4409 adapter->eims_mask);
4410 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4411 E1000_IMS_LSC);
4412 } else {
4413 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4414 IMS_ENABLE_MASK);
4415 }
4416 E1000_WRITE_FLUSH(&adapter->hw);
4417
4418 return;
4419 }
4420
4421 static void
4422 igb_disable_intr(struct adapter *adapter)
4423 {
4424 if (adapter->msix_mem) {
4425 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4426 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4427 }
4428 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4429 E1000_WRITE_FLUSH(&adapter->hw);
4430 return;
4431 }
4432
4433 /*
4434 * Bit of a misnomer, what this really means is
4435 * to enable OS management of the system... aka
4436 * to disable special hardware management features
4437 */
4438 static void
4439 igb_init_manageability(struct adapter *adapter)
4440 {
4441 if (adapter->has_manage) {
4442 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4443 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4444
4445 /* disable hardware interception of ARP */
4446 manc &= ~(E1000_MANC_ARP_EN);
4447
4448 /* enable receiving management packets to the host */
4449 manc |= E1000_MANC_EN_MNG2HOST;
4450 manc2h |= 1 << 5; /* Mng Port 623 */
4451 manc2h |= 1 << 6; /* Mng Port 664 */
4452 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4453 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4454 }
4455 }
4456
4457 /*
4458 * Give control back to hardware management
4459 * controller if there is one.
4460 */
4461 static void
4462 igb_release_manageability(struct adapter *adapter)
4463 {
4464 if (adapter->has_manage) {
4465 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4466
4467 /* re-enable hardware interception of ARP */
4468 manc |= E1000_MANC_ARP_EN;
4469 manc &= ~E1000_MANC_EN_MNG2HOST;
4470
4471 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4472 }
4473 }
4474
4475 /*
4476 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4477 * For ASF and Pass Through versions of f/w this means that
4478 * the driver is loaded.
4479 *
4480 */
4481 static void
4482 igb_get_hw_control(struct adapter *adapter)
4483 {
4484 u32 ctrl_ext;
4485
4486 /* Let firmware know the driver has taken over */
4487 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4488 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4489 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4490 }
4491
4492 /*
4493 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4494 * For ASF and Pass Through versions of f/w this means that the
4495 * driver is no longer loaded.
4496 *
4497 */
4498 static void
4499 igb_release_hw_control(struct adapter *adapter)
4500 {
4501 u32 ctrl_ext;
4502
4503 /* Let firmware taken over control of h/w */
4504 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4505 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4506 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4507 }
4508
4509 static int
4510 igb_is_valid_ether_addr(uint8_t *addr)
4511 {
4512 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4513
4514 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4515 return (FALSE);
4516 }
4517
4518 return (TRUE);
4519 }
4520
4521
4522 /*
4523 * Enable PCI Wake On Lan capability
4524 */
4525 void
4526 igb_enable_wakeup(device_t dev)
4527 {
4528 u16 cap, status;
4529 u8 id;
4530
4531 /* First find the capabilities pointer*/
4532 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4533 /* Read the PM Capabilities */
4534 id = pci_read_config(dev, cap, 1);
4535 if (id != PCIY_PMG) /* Something wrong */
4536 return;
4537 /* OK, we have the power capabilities, so
4538 now get the status register */
4539 cap += PCIR_POWER_STATUS;
4540 status = pci_read_config(dev, cap, 2);
4541 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4542 pci_write_config(dev, cap, status, 2);
4543 return;
4544 }
4545
4546
4547 /**********************************************************************
4548 *
4549 * Update the board statistics counters.
4550 *
4551 **********************************************************************/
4552 static void
4553 igb_update_stats_counters(struct adapter *adapter)
4554 {
4555 struct ifnet *ifp;
4556
4557 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4558 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4559 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4560 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4561 }
4562 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4563 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4564 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4565 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4566
4567 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4568 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4569 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4570 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4571 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4572 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4573 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4574 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4575 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4576 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4577 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4578 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4579 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4580 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4581 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4582 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4583 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4584 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4585 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4586 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4587
4588 /* For the 64-bit byte counters the low dword must be read first. */
4589 /* Both registers clear on the read of the high dword */
4590
4591 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4592 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4593
4594 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4595 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4596 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4597 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4598 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4599
4600 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4601 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4602
4603 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4604 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4605 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4606 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4607 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4608 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4609 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4610 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4611 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4612 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4613
4614 adapter->stats.algnerrc +=
4615 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4616 adapter->stats.rxerrc +=
4617 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4618 adapter->stats.tncrs +=
4619 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4620 adapter->stats.cexterr +=
4621 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4622 adapter->stats.tsctc +=
4623 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4624 adapter->stats.tsctfc +=
4625 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4626 ifp = adapter->ifp;
4627
4628 ifp->if_collisions = adapter->stats.colc;
4629
4630 /* Rx Errors */
4631 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4632 adapter->stats.crcerrs + adapter->stats.algnerrc +
4633 adapter->stats.ruc + adapter->stats.roc +
4634 adapter->stats.mpc + adapter->stats.cexterr;
4635
4636 /* Tx Errors */
4637 ifp->if_oerrors = adapter->stats.ecol +
4638 adapter->stats.latecol + adapter->watchdog_events;
4639 }
4640
4641
4642 /**********************************************************************
4643 *
4644 * This routine is called only when igb_display_debug_stats is enabled.
4645 * This routine provides a way to take a look at important statistics
4646 * maintained by the driver and hardware.
4647 *
4648 **********************************************************************/
4649 static void
4650 igb_print_debug_info(struct adapter *adapter)
4651 {
4652 device_t dev = adapter->dev;
4653 struct rx_ring *rxr = adapter->rx_rings;
4654 struct tx_ring *txr = adapter->tx_rings;
4655 uint8_t *hw_addr = adapter->hw.hw_addr;
4656
4657 device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4658 device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4659 E1000_READ_REG(&adapter->hw, E1000_CTRL),
4660 E1000_READ_REG(&adapter->hw, E1000_RCTL));
4661
4662 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4663 device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4664 E1000_READ_REG(&adapter->hw, E1000_IMS),
4665 E1000_READ_REG(&adapter->hw, E1000_EIMS));
4666 #endif
4667
4668 device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4669 ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4670 (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4671 device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4672 adapter->hw.fc.high_water,
4673 adapter->hw.fc.low_water);
4674
4675 for (int i = 0; i < adapter->num_queues; i++, txr++) {
4676 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4677 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4678 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4679 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4680 txr->me, (long long)txr->no_desc_avail);
4681 device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4682 (long long)txr->tx_irq);
4683 device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4684 (long long)txr->tx_packets);
4685 }
4686
4687 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4688 struct lro_ctrl *lro = &rxr->lro;
4689 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4690 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4691 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4692 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4693 (long long)rxr->rx_packets);
4694 device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4695 (long long)rxr->rx_split_packets);
4696 device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4697 (long long)rxr->rx_bytes);
4698 device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4699 (long long)rxr->rx_irq);
4700 device_printf(dev,"RX(%d) LRO Queued= %d\n",
4701 rxr->me, lro->lro_queued);
4702 device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4703 rxr->me, lro->lro_flushed);
4704 }
4705
4706 device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4707
4708 device_printf(dev, "Mbuf defrag failed = %ld\n",
4709 adapter->mbuf_defrag_failed);
4710 device_printf(dev, "Std mbuf header failed = %ld\n",
4711 adapter->mbuf_header_failed);
4712 device_printf(dev, "Std mbuf packet failed = %ld\n",
4713 adapter->mbuf_packet_failed);
4714 device_printf(dev, "Driver dropped packets = %ld\n",
4715 adapter->dropped_pkts);
4716 device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4717 adapter->no_tx_dma_setup);
4718 }
4719
4720 static void
4721 igb_print_hw_stats(struct adapter *adapter)
4722 {
4723 device_t dev = adapter->dev;
4724
4725 device_printf(dev, "Excessive collisions = %lld\n",
4726 (long long)adapter->stats.ecol);
4727 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4728 device_printf(dev, "Symbol errors = %lld\n",
4729 (long long)adapter->stats.symerrs);
4730 #endif
4731 device_printf(dev, "Sequence errors = %lld\n",
4732 (long long)adapter->stats.sec);
4733 device_printf(dev, "Defer count = %lld\n",
4734 (long long)adapter->stats.dc);
4735 device_printf(dev, "Missed Packets = %lld\n",
4736 (long long)adapter->stats.mpc);
4737 device_printf(dev, "Receive No Buffers = %lld\n",
4738 (long long)adapter->stats.rnbc);
4739 /* RLEC is inaccurate on some hardware, calculate our own. */
4740 device_printf(dev, "Receive Length Errors = %lld\n",
4741 ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4742 device_printf(dev, "Receive errors = %lld\n",
4743 (long long)adapter->stats.rxerrc);
4744 device_printf(dev, "Crc errors = %lld\n",
4745 (long long)adapter->stats.crcerrs);
4746 device_printf(dev, "Alignment errors = %lld\n",
4747 (long long)adapter->stats.algnerrc);
4748 /* On 82575 these are collision counts */
4749 device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4750 (long long)adapter->stats.cexterr);
4751 device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4752 device_printf(dev, "watchdog timeouts = %ld\n",
4753 adapter->watchdog_events);
4754 device_printf(dev, "XON Rcvd = %lld\n",
4755 (long long)adapter->stats.xonrxc);
4756 device_printf(dev, "XON Xmtd = %lld\n",
4757 (long long)adapter->stats.xontxc);
4758 device_printf(dev, "XOFF Rcvd = %lld\n",
4759 (long long)adapter->stats.xoffrxc);
4760 device_printf(dev, "XOFF Xmtd = %lld\n",
4761 (long long)adapter->stats.xofftxc);
4762 device_printf(dev, "Good Packets Rcvd = %lld\n",
4763 (long long)adapter->stats.gprc);
4764 device_printf(dev, "Good Packets Xmtd = %lld\n",
4765 (long long)adapter->stats.gptc);
4766 device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4767 (long long)adapter->stats.tsctc);
4768 device_printf(dev, "TSO Contexts Failed = %lld\n",
4769 (long long)adapter->stats.tsctfc);
4770 }
4771
4772 /**********************************************************************
4773 *
4774 * This routine provides a way to dump out the adapter eeprom,
4775 * often a useful debug/service tool. This only dumps the first
4776 * 32 words, stuff that matters is in that extent.
4777 *
4778 **********************************************************************/
4779 static void
4780 igb_print_nvm_info(struct adapter *adapter)
4781 {
4782 u16 eeprom_data;
4783 int i, j, row = 0;
4784
4785 /* Its a bit crude, but it gets the job done */
4786 printf("\nInterface EEPROM Dump:\n");
4787 printf("Offset\n0x0000 ");
4788 for (i = 0, j = 0; i < 32; i++, j++) {
4789 if (j == 8) { /* Make the offset block */
4790 j = 0; ++row;
4791 printf("\n0x00%x0 ",row);
4792 }
4793 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4794 printf("%04x ", eeprom_data);
4795 }
4796 printf("\n");
4797 }
4798
4799 static int
4800 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4801 {
4802 struct adapter *adapter;
4803 int error;
4804 int result;
4805
4806 result = -1;
4807 error = sysctl_handle_int(oidp, &result, 0, req);
4808
4809 if (error || !req->newptr)
4810 return (error);
4811
4812 if (result == 1) {
4813 adapter = (struct adapter *)arg1;
4814 igb_print_debug_info(adapter);
4815 }
4816 /*
4817 * This value will cause a hex dump of the
4818 * first 32 16-bit words of the EEPROM to
4819 * the screen.
4820 */
4821 if (result == 2) {
4822 adapter = (struct adapter *)arg1;
4823 igb_print_nvm_info(adapter);
4824 }
4825
4826 return (error);
4827 }
4828
4829
4830 static int
4831 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4832 {
4833 struct adapter *adapter;
4834 int error;
4835 int result;
4836
4837 result = -1;
4838 error = sysctl_handle_int(oidp, &result, 0, req);
4839
4840 if (error || !req->newptr)
4841 return (error);
4842
4843 if (result == 1) {
4844 adapter = (struct adapter *)arg1;
4845 igb_print_hw_stats(adapter);
4846 }
4847
4848 return (error);
4849 }
4850
4851 static void
4852 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4853 const char *description, int *limit, int value)
4854 {
4855 *limit = value;
4856 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4857 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4858 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4859 }
4860
4861 #ifdef IGB_IEEE1588
4862 /*
4863 ** igb_hwtstamp_ioctl - control hardware time stamping
4864 **
4865 ** Outgoing time stamping can be enabled and disabled. Play nice and
4866 ** disable it when requested, although it shouldn't case any overhead
4867 ** when no packet needs it. At most one packet in the queue may be
4868 ** marked for time stamping, otherwise it would be impossible to tell
4869 ** for sure to which packet the hardware time stamp belongs.
4870 **
4871 ** Incoming time stamping has to be configured via the hardware
4872 ** filters. Not all combinations are supported, in particular event
4873 ** type has to be specified. Matching the kind of event packet is
4874 ** not supported, with the exception of "all V2 events regardless of
4875 ** level 2 or 4".
4876 **
4877 */
4878 static int
4879 igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
4880 {
4881 struct e1000_hw *hw = &adapter->hw;
4882 struct hwtstamp_ctrl *config;
4883 u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4884 u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
4885 u32 tsync_rx_ctl_type = 0;
4886 u32 tsync_rx_cfg = 0;
4887 int is_l4 = 0;
4888 int is_l2 = 0;
4889 u16 port = 319; /* PTP */
4890 u32 regval;
4891
4892 config = (struct hwtstamp_ctrl *) ifr->ifr_data;
4893
4894 /* reserved for future extensions */
4895 if (config->flags)
4896 return (EINVAL);
4897
4898 switch (config->tx_type) {
4899 case HWTSTAMP_TX_OFF:
4900 tsync_tx_ctl_bit = 0;
4901 break;
4902 case HWTSTAMP_TX_ON:
4903 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4904 break;
4905 default:
4906 return (ERANGE);
4907 }
4908
4909 switch (config->rx_filter) {
4910 case HWTSTAMP_FILTER_NONE:
4911 tsync_rx_ctl_bit = 0;
4912 break;
4913 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4914 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4915 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4916 case HWTSTAMP_FILTER_ALL:
4917 /*
4918 * register TSYNCRXCFG must be set, therefore it is not
4919 * possible to time stamp both Sync and Delay_Req messages
4920 * => fall back to time stamping all packets
4921 */
4922 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
4923 config->rx_filter = HWTSTAMP_FILTER_ALL;
4924 break;
4925 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4926 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4927 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
4928 is_l4 = 1;
4929 break;
4930 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4931 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4932 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
4933 is_l4 = 1;
4934 break;
4935 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4936 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4937 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4938 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
4939 is_l2 = 1;
4940 is_l4 = 1;
4941 config->rx_filter = HWTSTAMP_FILTER_SOME;
4942 break;
4943 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4944 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4945 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4946 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
4947 is_l2 = 1;
4948 is_l4 = 1;
4949 config->rx_filter = HWTSTAMP_FILTER_SOME;
4950 break;
4951 case HWTSTAMP_FILTER_PTP_V2_EVENT:
4952 case HWTSTAMP_FILTER_PTP_V2_SYNC:
4953 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4954 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
4955 config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
4956 is_l2 = 1;
4957 break;
4958 default:
4959 return -ERANGE;
4960 }
4961
4962 /* enable/disable TX */
4963 regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
4964 regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
4965 E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
4966
4967 /* enable/disable RX, define which PTP packets are time stamped */
4968 regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
4969 regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
4970 regval = (regval & ~0xE) | tsync_rx_ctl_type;
4971 E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
4972 E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
4973
4974 /*
4975 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
4976 * (Ethertype to filter on)
4977 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4978 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
4979 */
4980 E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
4981
4982 /* L4 Queue Filter[0]: only filter by source and destination port */
4983 E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
4984 E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
4985 ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
4986 E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
4987 (htons(port)
4988 | (0<<16) /* immediate interrupt disabled */
4989 | 0 /* (1<<17) bit cleared: do not bypass
4990 destination port check */)
4991 : 0);
4992 E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
4993 (0x11 /* UDP */
4994 | (1<<15) /* VF not compared */
4995 | (1<<27) /* Enable Timestamping */
4996 | (7<<28) /* only source port filter enabled,
4997 source/target address and protocol
4998 masked */)
4999 : ((1<<15) | (15<<28) /* all mask bits set = filter not
5000 enabled */));
5001
5002 wrfl();
5003
5004 adapter->hwtstamp_ctrl = config;
5005
5006 /* clear TX/RX time stamp registers, just to be sure */
5007 regval = E1000_READ_REG(hw, E1000_TXSTMPH);
5008 regval = E1000_READ_REG(hw, E1000_RXSTMPH);
5009
5010 return (error);
5011 }
5012
5013 /*
5014 ** igb_read_clock - read raw cycle counter (to be used by time counter)
5015 */
5016 static cycle_t igb_read_clock(const struct cyclecounter *tc)
5017 {
5018 struct igb_adapter *adapter =
5019 container_of(tc, struct igb_adapter, cycles);
5020 struct e1000_hw *hw = &adapter->hw;
5021 u64 stamp;
5022
5023 stamp = E1000_READ_REG(hw, E1000_SYSTIML);
5024 stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
5025
5026 return (stamp);
5027 }
5028
5029 #endif /* IGB_IEEE1588 */
Cache object: 52215326180db0a3d26fbccb084ac3c0
|