FreeBSD/Linux Kernel Cross Reference
sys/dev/e1000/if_igb.c
1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include "if_igb.h"
45
46 /*********************************************************************
47 * Driver version:
48 *********************************************************************/
49 char igb_driver_version[] = "2.5.3-k";
50
51
52 /*********************************************************************
53 * PCI Device ID Table
54 *
55 * Used by probe to select devices to load on
56 * Last field stores an index into e1000_strings
57 * Last entry must be all 0s
58 *
59 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
60 *********************************************************************/
61
62 static igb_vendor_info_t igb_vendor_info_array[] =
63 {
64 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
65 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
66 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
67 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
68 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
69 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
70 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
71 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
72 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
73 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
74 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
75 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
76 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
77 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
78 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
79 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
80 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
81 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
82 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
83 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
84 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
85 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
86 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
87 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0},
88 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
89 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0},
90 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
91 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
92 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
93 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
94 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
95 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
96 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0},
97 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
98 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0},
99 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
100 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
101 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
102 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0},
103 /* required last entry */
104 {0, 0, 0, 0, 0}
105 };
106
107 /*********************************************************************
108 * Table of branding strings for all supported NICs.
109 *********************************************************************/
110
111 static char *igb_strings[] = {
112 "Intel(R) PRO/1000 Network Connection"
113 };
114
115 /*********************************************************************
116 * Function prototypes
117 *********************************************************************/
118 static int igb_probe(device_t);
119 static int igb_attach(device_t);
120 static int igb_detach(device_t);
121 static int igb_shutdown(device_t);
122 static int igb_suspend(device_t);
123 static int igb_resume(device_t);
124 #ifndef IGB_LEGACY_TX
125 static int igb_mq_start(struct ifnet *, struct mbuf *);
126 static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
127 static void igb_qflush(struct ifnet *);
128 static void igb_deferred_mq_start(void *, int);
129 #else
130 static void igb_start(struct ifnet *);
131 static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
132 #endif
133 static int igb_ioctl(struct ifnet *, u_long, caddr_t);
134 static void igb_init(void *);
135 static void igb_init_locked(struct adapter *);
136 static void igb_stop(void *);
137 static void igb_media_status(struct ifnet *, struct ifmediareq *);
138 static int igb_media_change(struct ifnet *);
139 static void igb_identify_hardware(struct adapter *);
140 static int igb_allocate_pci_resources(struct adapter *);
141 static int igb_allocate_msix(struct adapter *);
142 static int igb_allocate_legacy(struct adapter *);
143 static int igb_setup_msix(struct adapter *);
144 static void igb_free_pci_resources(struct adapter *);
145 static void igb_local_timer(void *);
146 static void igb_reset(struct adapter *);
147 static int igb_setup_interface(device_t, struct adapter *);
148 static int igb_allocate_queues(struct adapter *);
149 static void igb_configure_queues(struct adapter *);
150
151 static int igb_allocate_transmit_buffers(struct tx_ring *);
152 static void igb_setup_transmit_structures(struct adapter *);
153 static void igb_setup_transmit_ring(struct tx_ring *);
154 static void igb_initialize_transmit_units(struct adapter *);
155 static void igb_free_transmit_structures(struct adapter *);
156 static void igb_free_transmit_buffers(struct tx_ring *);
157
158 static int igb_allocate_receive_buffers(struct rx_ring *);
159 static int igb_setup_receive_structures(struct adapter *);
160 static int igb_setup_receive_ring(struct rx_ring *);
161 static void igb_initialize_receive_units(struct adapter *);
162 static void igb_free_receive_structures(struct adapter *);
163 static void igb_free_receive_buffers(struct rx_ring *);
164 static void igb_free_receive_ring(struct rx_ring *);
165
166 static void igb_enable_intr(struct adapter *);
167 static void igb_disable_intr(struct adapter *);
168 static void igb_update_stats_counters(struct adapter *);
169 static bool igb_txeof(struct tx_ring *);
170
171 static __inline void igb_rx_discard(struct rx_ring *, int);
172 static __inline void igb_rx_input(struct rx_ring *,
173 struct ifnet *, struct mbuf *, u32);
174
175 static bool igb_rxeof(struct igb_queue *, int, int *);
176 static void igb_rx_checksum(u32, struct mbuf *, u32);
177 static int igb_tx_ctx_setup(struct tx_ring *,
178 struct mbuf *, u32 *, u32 *);
179 static int igb_tso_setup(struct tx_ring *,
180 struct mbuf *, u32 *, u32 *);
181 static void igb_set_promisc(struct adapter *);
182 static void igb_disable_promisc(struct adapter *);
183 static void igb_set_multi(struct adapter *);
184 static void igb_update_link_status(struct adapter *);
185 static void igb_refresh_mbufs(struct rx_ring *, int);
186
187 static void igb_register_vlan(void *, struct ifnet *, u16);
188 static void igb_unregister_vlan(void *, struct ifnet *, u16);
189 static void igb_setup_vlan_hw_support(struct adapter *);
190
191 static int igb_xmit(struct tx_ring *, struct mbuf **);
192 static int igb_dma_malloc(struct adapter *, bus_size_t,
193 struct igb_dma_alloc *, int);
194 static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
195 static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
196 static void igb_print_nvm_info(struct adapter *);
197 static int igb_is_valid_ether_addr(u8 *);
198 static void igb_add_hw_stats(struct adapter *);
199
200 static void igb_vf_init_stats(struct adapter *);
201 static void igb_update_vf_stats_counters(struct adapter *);
202
203 /* Management and WOL Support */
204 static void igb_init_manageability(struct adapter *);
205 static void igb_release_manageability(struct adapter *);
206 static void igb_get_hw_control(struct adapter *);
207 static void igb_release_hw_control(struct adapter *);
208 static void igb_enable_wakeup(device_t);
209 static void igb_led_func(void *, int);
210
211 static int igb_irq_fast(void *);
212 static void igb_msix_que(void *);
213 static void igb_msix_link(void *);
214 static void igb_handle_que(void *context, int pending);
215 static void igb_handle_link(void *context, int pending);
216 static void igb_handle_link_locked(struct adapter *);
217
218 static void igb_set_sysctl_value(struct adapter *, const char *,
219 const char *, int *, int);
220 static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
221 static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
222 static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
223
224 #ifdef DEVICE_POLLING
225 static poll_handler_t igb_poll;
226 #endif /* POLLING */
227
228 /*********************************************************************
229 * FreeBSD Device Interface Entry Points
230 *********************************************************************/
231
232 static device_method_t igb_methods[] = {
233 /* Device interface */
234 DEVMETHOD(device_probe, igb_probe),
235 DEVMETHOD(device_attach, igb_attach),
236 DEVMETHOD(device_detach, igb_detach),
237 DEVMETHOD(device_shutdown, igb_shutdown),
238 DEVMETHOD(device_suspend, igb_suspend),
239 DEVMETHOD(device_resume, igb_resume),
240 DEVMETHOD_END
241 };
242
243 static driver_t igb_driver = {
244 "igb", igb_methods, sizeof(struct adapter),
245 };
246
247 static devclass_t igb_devclass;
248 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
249 MODULE_DEPEND(igb, pci, 1, 1, 1);
250 MODULE_DEPEND(igb, ether, 1, 1, 1);
251
252 /*********************************************************************
253 * Tunable default values.
254 *********************************************************************/
255
256 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
257
258 /* Descriptor defaults */
259 static int igb_rxd = IGB_DEFAULT_RXD;
260 static int igb_txd = IGB_DEFAULT_TXD;
261 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
262 TUNABLE_INT("hw.igb.txd", &igb_txd);
263 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
264 "Number of receive descriptors per queue");
265 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
266 "Number of transmit descriptors per queue");
267
268 /*
269 ** AIM: Adaptive Interrupt Moderation
270 ** which means that the interrupt rate
271 ** is varied over time based on the
272 ** traffic for that interrupt vector
273 */
274 static int igb_enable_aim = TRUE;
275 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
276 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
277 "Enable adaptive interrupt moderation");
278
279 /*
280 * MSIX should be the default for best performance,
281 * but this allows it to be forced off for testing.
282 */
283 static int igb_enable_msix = 1;
284 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
285 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
286 "Enable MSI-X interrupts");
287
288 /*
289 ** Tuneable Interrupt rate
290 */
291 static int igb_max_interrupt_rate = 8000;
292 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
293 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294 &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296 #ifndef IGB_LEGACY_TX
297 /*
298 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
299 */
300 static int igb_buf_ring_size = IGB_BR_SIZE;
301 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
302 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
303 &igb_buf_ring_size, 0, "Size of the bufring");
304 #endif
305
306 /*
307 ** Header split causes the packet header to
308 ** be dma'd to a seperate mbuf from the payload.
309 ** this can have memory alignment benefits. But
310 ** another plus is that small packets often fit
311 ** into the header and thus use no cluster. Its
312 ** a very workload dependent type feature.
313 */
314 static int igb_header_split = FALSE;
315 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
316 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
317 "Enable receive mbuf header split");
318
319 /*
320 ** This will autoconfigure based on the
321 ** number of CPUs and max supported
322 ** MSIX messages if left at 0.
323 */
324 static int igb_num_queues = 0;
325 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
326 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
327 "Number of queues to configure, 0 indicates autoconfigure");
328
329 /*
330 ** Global variable to store last used CPU when binding queues
331 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
332 ** queue is bound to a cpu.
333 */
334 static int igb_last_bind_cpu = -1;
335
336 /* How many packets rxeof tries to clean at a time */
337 static int igb_rx_process_limit = 100;
338 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
339 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
340 &igb_rx_process_limit, 0,
341 "Maximum number of received packets to process at a time, -1 means unlimited");
342
343 /* How many packets txeof tries to clean at a time */
344 static int igb_tx_process_limit = -1;
345 TUNABLE_INT("hw.igb.tx_process_limit", &igb_tx_process_limit);
346 SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
347 &igb_tx_process_limit, 0,
348 "Maximum number of sent packets to process at a time, -1 means unlimited");
349
350 #ifdef DEV_NETMAP /* see ixgbe.c for details */
351 #include <dev/netmap/if_igb_netmap.h>
352 #endif /* DEV_NETMAP */
353 /*********************************************************************
354 * Device identification routine
355 *
356 * igb_probe determines if the driver should be loaded on
357 * adapter based on PCI vendor/device id of the adapter.
358 *
359 * return BUS_PROBE_DEFAULT on success, positive on failure
360 *********************************************************************/
361
362 static int
363 igb_probe(device_t dev)
364 {
365 char adapter_name[256];
366 uint16_t pci_vendor_id = 0;
367 uint16_t pci_device_id = 0;
368 uint16_t pci_subvendor_id = 0;
369 uint16_t pci_subdevice_id = 0;
370 igb_vendor_info_t *ent;
371
372 INIT_DEBUGOUT("igb_probe: begin");
373
374 pci_vendor_id = pci_get_vendor(dev);
375 if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
376 return (ENXIO);
377
378 pci_device_id = pci_get_device(dev);
379 pci_subvendor_id = pci_get_subvendor(dev);
380 pci_subdevice_id = pci_get_subdevice(dev);
381
382 ent = igb_vendor_info_array;
383 while (ent->vendor_id != 0) {
384 if ((pci_vendor_id == ent->vendor_id) &&
385 (pci_device_id == ent->device_id) &&
386
387 ((pci_subvendor_id == ent->subvendor_id) ||
388 (ent->subvendor_id == 0)) &&
389
390 ((pci_subdevice_id == ent->subdevice_id) ||
391 (ent->subdevice_id == 0))) {
392 sprintf(adapter_name, "%s, Version - %s",
393 igb_strings[ent->index],
394 igb_driver_version);
395 device_set_desc_copy(dev, adapter_name);
396 return (BUS_PROBE_DEFAULT);
397 }
398 ent++;
399 }
400 return (ENXIO);
401 }
402
403 /*********************************************************************
404 * Device initialization routine
405 *
406 * The attach entry point is called when the driver is being loaded.
407 * This routine identifies the type of hardware, allocates all resources
408 * and initializes the hardware.
409 *
410 * return 0 on success, positive on failure
411 *********************************************************************/
412
413 static int
414 igb_attach(device_t dev)
415 {
416 struct adapter *adapter;
417 int error = 0;
418 u16 eeprom_data;
419
420 INIT_DEBUGOUT("igb_attach: begin");
421
422 if (resource_disabled("igb", device_get_unit(dev))) {
423 device_printf(dev, "Disabled by device hint\n");
424 return (ENXIO);
425 }
426
427 adapter = device_get_softc(dev);
428 adapter->dev = adapter->osdep.dev = dev;
429 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
430
431 /* SYSCTLs */
432 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
433 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
434 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
435 igb_sysctl_nvm_info, "I", "NVM Information");
436
437 igb_set_sysctl_value(adapter, "enable_aim",
438 "Interrupt Moderation", &adapter->enable_aim,
439 igb_enable_aim);
440
441 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
442 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
443 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
444 adapter, 0, igb_set_flowcntl, "I", "Flow Control");
445
446 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
447
448 /* Determine hardware and mac info */
449 igb_identify_hardware(adapter);
450
451 /* Setup PCI resources */
452 if (igb_allocate_pci_resources(adapter)) {
453 device_printf(dev, "Allocation of PCI resources failed\n");
454 error = ENXIO;
455 goto err_pci;
456 }
457
458 /* Do Shared Code initialization */
459 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
460 device_printf(dev, "Setup of Shared code failed\n");
461 error = ENXIO;
462 goto err_pci;
463 }
464
465 e1000_get_bus_info(&adapter->hw);
466
467 /* Sysctls for limiting the amount of work done in the taskqueues */
468 igb_set_sysctl_value(adapter, "rx_processing_limit",
469 "max number of rx packets to process",
470 &adapter->rx_process_limit, igb_rx_process_limit);
471
472 igb_set_sysctl_value(adapter, "tx_processing_limit",
473 "max number of tx packets to process",
474 &adapter->tx_process_limit, igb_tx_process_limit);
475
476 /*
477 * Validate number of transmit and receive descriptors. It
478 * must not exceed hardware maximum, and must be multiple
479 * of E1000_DBA_ALIGN.
480 */
481 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
482 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
483 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
484 IGB_DEFAULT_TXD, igb_txd);
485 adapter->num_tx_desc = IGB_DEFAULT_TXD;
486 } else
487 adapter->num_tx_desc = igb_txd;
488 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
489 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
490 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
491 IGB_DEFAULT_RXD, igb_rxd);
492 adapter->num_rx_desc = IGB_DEFAULT_RXD;
493 } else
494 adapter->num_rx_desc = igb_rxd;
495
496 adapter->hw.mac.autoneg = DO_AUTO_NEG;
497 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
498 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
499
500 /* Copper options */
501 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
502 adapter->hw.phy.mdix = AUTO_ALL_MODES;
503 adapter->hw.phy.disable_polarity_correction = FALSE;
504 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
505 }
506
507 /*
508 * Set the frame limits assuming
509 * standard ethernet sized frames.
510 */
511 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
512
513 /*
514 ** Allocate and Setup Queues
515 */
516 if (igb_allocate_queues(adapter)) {
517 error = ENOMEM;
518 goto err_pci;
519 }
520
521 /* Allocate the appropriate stats memory */
522 if (adapter->vf_ifp) {
523 adapter->stats =
524 (struct e1000_vf_stats *)malloc(sizeof \
525 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526 igb_vf_init_stats(adapter);
527 } else
528 adapter->stats =
529 (struct e1000_hw_stats *)malloc(sizeof \
530 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
531 if (adapter->stats == NULL) {
532 device_printf(dev, "Can not allocate stats memory\n");
533 error = ENOMEM;
534 goto err_late;
535 }
536
537 /* Allocate multicast array memory. */
538 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
539 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
540 if (adapter->mta == NULL) {
541 device_printf(dev, "Can not allocate multicast setup array\n");
542 error = ENOMEM;
543 goto err_late;
544 }
545
546 /* Some adapter-specific advanced features */
547 if (adapter->hw.mac.type >= e1000_i350) {
548 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
551 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
552 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
553 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
554 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
555 adapter, 0, igb_sysctl_eee, "I",
556 "Disable Energy Efficient Ethernet");
557 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558 if (adapter->hw.mac.type == e1000_i354)
559 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
560 else
561 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
562 }
563 }
564
565 /*
566 ** Start from a known state, this is
567 ** important in reading the nvm and
568 ** mac from that.
569 */
570 e1000_reset_hw(&adapter->hw);
571
572 /* Make sure we have a good EEPROM before we read from it */
573 if (((adapter->hw.mac.type != e1000_i210) &&
574 (adapter->hw.mac.type != e1000_i211)) &&
575 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
576 /*
577 ** Some PCI-E parts fail the first check due to
578 ** the link being in sleep state, call it again,
579 ** if it fails a second time its a real issue.
580 */
581 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
582 device_printf(dev,
583 "The EEPROM Checksum Is Not Valid\n");
584 error = EIO;
585 goto err_late;
586 }
587 }
588
589 /*
590 ** Copy the permanent MAC address out of the EEPROM
591 */
592 if (e1000_read_mac_addr(&adapter->hw) < 0) {
593 device_printf(dev, "EEPROM read error while reading MAC"
594 " address\n");
595 error = EIO;
596 goto err_late;
597 }
598 /* Check its sanity */
599 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
600 device_printf(dev, "Invalid MAC address\n");
601 error = EIO;
602 goto err_late;
603 }
604
605 /* Setup OS specific network interface */
606 if (igb_setup_interface(dev, adapter) != 0)
607 goto err_late;
608
609 /* Now get a good starting state */
610 igb_reset(adapter);
611
612 /* Initialize statistics */
613 igb_update_stats_counters(adapter);
614
615 adapter->hw.mac.get_link_status = 1;
616 igb_update_link_status(adapter);
617
618 /* Indicate SOL/IDER usage */
619 if (e1000_check_reset_block(&adapter->hw))
620 device_printf(dev,
621 "PHY reset is blocked due to SOL/IDER session.\n");
622
623 /* Determine if we have to control management hardware */
624 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
625
626 /*
627 * Setup Wake-on-Lan
628 */
629 /* APME bit in EEPROM is mapped to WUC.APME */
630 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
631 if (eeprom_data)
632 adapter->wol = E1000_WUFC_MAG;
633
634 /* Register for VLAN events */
635 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
636 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
637 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
638 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
639
640 igb_add_hw_stats(adapter);
641
642 /* Tell the stack that the interface is not active */
643 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
644 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
645
646 adapter->led_dev = led_create(igb_led_func, adapter,
647 device_get_nameunit(dev));
648
649 /*
650 ** Configure Interrupts
651 */
652 if ((adapter->msix > 1) && (igb_enable_msix))
653 error = igb_allocate_msix(adapter);
654 else /* MSI or Legacy */
655 error = igb_allocate_legacy(adapter);
656 if (error)
657 goto err_late;
658
659 #ifdef DEV_NETMAP
660 igb_netmap_attach(adapter);
661 #endif /* DEV_NETMAP */
662 INIT_DEBUGOUT("igb_attach: end");
663
664 return (0);
665
666 err_late:
667 if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
668 return(error);
669 igb_free_transmit_structures(adapter);
670 igb_free_receive_structures(adapter);
671 igb_release_hw_control(adapter);
672 err_pci:
673 igb_free_pci_resources(adapter);
674 if (adapter->ifp != NULL)
675 if_free(adapter->ifp);
676 free(adapter->mta, M_DEVBUF);
677 IGB_CORE_LOCK_DESTROY(adapter);
678
679 return (error);
680 }
681
682 /*********************************************************************
683 * Device removal routine
684 *
685 * The detach entry point is called when the driver is being removed.
686 * This routine stops the adapter and deallocates all the resources
687 * that were allocated for driver operation.
688 *
689 * return 0 on success, positive on failure
690 *********************************************************************/
691
692 static int
693 igb_detach(device_t dev)
694 {
695 struct adapter *adapter = device_get_softc(dev);
696 struct ifnet *ifp = adapter->ifp;
697
698 INIT_DEBUGOUT("igb_detach: begin");
699
700 /* Make sure VLANS are not using driver */
701 if (adapter->ifp->if_vlantrunk != NULL) {
702 device_printf(dev,"Vlan in use, detach first\n");
703 return (EBUSY);
704 }
705
706 ether_ifdetach(adapter->ifp);
707
708 if (adapter->led_dev != NULL)
709 led_destroy(adapter->led_dev);
710
711 #ifdef DEVICE_POLLING
712 if (ifp->if_capenable & IFCAP_POLLING)
713 ether_poll_deregister(ifp);
714 #endif
715
716 IGB_CORE_LOCK(adapter);
717 adapter->in_detach = 1;
718 igb_stop(adapter);
719 IGB_CORE_UNLOCK(adapter);
720
721 e1000_phy_hw_reset(&adapter->hw);
722
723 /* Give control back to firmware */
724 igb_release_manageability(adapter);
725 igb_release_hw_control(adapter);
726
727 /* Unregister VLAN events */
728 if (adapter->vlan_attach != NULL)
729 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730 if (adapter->vlan_detach != NULL)
731 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733 callout_drain(&adapter->timer);
734
735 #ifdef DEV_NETMAP
736 netmap_detach(adapter->ifp);
737 #endif /* DEV_NETMAP */
738 igb_free_pci_resources(adapter);
739 bus_generic_detach(dev);
740 if_free(ifp);
741
742 igb_free_transmit_structures(adapter);
743 igb_free_receive_structures(adapter);
744 if (adapter->mta != NULL)
745 free(adapter->mta, M_DEVBUF);
746
747 IGB_CORE_LOCK_DESTROY(adapter);
748
749 return (0);
750 }
751
752 /*********************************************************************
753 *
754 * Shutdown entry point
755 *
756 **********************************************************************/
757
758 static int
759 igb_shutdown(device_t dev)
760 {
761 return igb_suspend(dev);
762 }
763
764 /*
765 * Suspend/resume device methods.
766 */
767 static int
768 igb_suspend(device_t dev)
769 {
770 struct adapter *adapter = device_get_softc(dev);
771
772 IGB_CORE_LOCK(adapter);
773
774 igb_stop(adapter);
775
776 igb_release_manageability(adapter);
777 igb_release_hw_control(adapter);
778 igb_enable_wakeup(dev);
779
780 IGB_CORE_UNLOCK(adapter);
781
782 return bus_generic_suspend(dev);
783 }
784
785 static int
786 igb_resume(device_t dev)
787 {
788 struct adapter *adapter = device_get_softc(dev);
789 struct tx_ring *txr = adapter->tx_rings;
790 struct ifnet *ifp = adapter->ifp;
791
792 IGB_CORE_LOCK(adapter);
793 igb_init_locked(adapter);
794 igb_init_manageability(adapter);
795
796 if ((ifp->if_flags & IFF_UP) &&
797 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
798 for (int i = 0; i < adapter->num_queues; i++, txr++) {
799 IGB_TX_LOCK(txr);
800 #ifndef IGB_LEGACY_TX
801 /* Process the stack queue only if not depleted */
802 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
803 !drbr_empty(ifp, txr->br))
804 igb_mq_start_locked(ifp, txr);
805 #else
806 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
807 igb_start_locked(txr, ifp);
808 #endif
809 IGB_TX_UNLOCK(txr);
810 }
811 }
812 IGB_CORE_UNLOCK(adapter);
813
814 return bus_generic_resume(dev);
815 }
816
817
818 #ifdef IGB_LEGACY_TX
819
820 /*********************************************************************
821 * Transmit entry point
822 *
823 * igb_start is called by the stack to initiate a transmit.
824 * The driver will remain in this routine as long as there are
825 * packets to transmit and transmit resources are available.
826 * In case resources are not available stack is notified and
827 * the packet is requeued.
828 **********************************************************************/
829
830 static void
831 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
832 {
833 struct adapter *adapter = ifp->if_softc;
834 struct mbuf *m_head;
835
836 IGB_TX_LOCK_ASSERT(txr);
837
838 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
839 IFF_DRV_RUNNING)
840 return;
841 if (!adapter->link_active)
842 return;
843
844 /* Call cleanup if number of TX descriptors low */
845 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
846 igb_txeof(txr);
847
848 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
849 if (txr->tx_avail <= IGB_MAX_SCATTER) {
850 txr->queue_status |= IGB_QUEUE_DEPLETED;
851 break;
852 }
853 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
854 if (m_head == NULL)
855 break;
856 /*
857 * Encapsulation can modify our pointer, and or make it
858 * NULL on failure. In that event, we can't requeue.
859 */
860 if (igb_xmit(txr, &m_head)) {
861 if (m_head != NULL)
862 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
863 if (txr->tx_avail <= IGB_MAX_SCATTER)
864 txr->queue_status |= IGB_QUEUE_DEPLETED;
865 break;
866 }
867
868 /* Send a copy of the frame to the BPF listener */
869 ETHER_BPF_MTAP(ifp, m_head);
870
871 /* Set watchdog on */
872 txr->watchdog_time = ticks;
873 txr->queue_status |= IGB_QUEUE_WORKING;
874 }
875 }
876
877 /*
878 * Legacy TX driver routine, called from the
879 * stack, always uses tx[0], and spins for it.
880 * Should not be used with multiqueue tx
881 */
882 static void
883 igb_start(struct ifnet *ifp)
884 {
885 struct adapter *adapter = ifp->if_softc;
886 struct tx_ring *txr = adapter->tx_rings;
887
888 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
889 IGB_TX_LOCK(txr);
890 igb_start_locked(txr, ifp);
891 IGB_TX_UNLOCK(txr);
892 }
893 return;
894 }
895
896 #else /* ~IGB_LEGACY_TX */
897
898 /*
899 ** Multiqueue Transmit Entry:
900 ** quick turnaround to the stack
901 **
902 */
903 static int
904 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
905 {
906 struct adapter *adapter = ifp->if_softc;
907 struct igb_queue *que;
908 struct tx_ring *txr;
909 int i, err = 0;
910
911 /* Which queue to use */
912 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
913 i = m->m_pkthdr.flowid % adapter->num_queues;
914 else
915 i = curcpu % adapter->num_queues;
916 txr = &adapter->tx_rings[i];
917 que = &adapter->queues[i];
918
919 err = drbr_enqueue(ifp, txr->br, m);
920 if (err)
921 return (err);
922 if (IGB_TX_TRYLOCK(txr)) {
923 igb_mq_start_locked(ifp, txr);
924 IGB_TX_UNLOCK(txr);
925 } else
926 taskqueue_enqueue(que->tq, &txr->txq_task);
927
928 return (0);
929 }
930
931 static int
932 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
933 {
934 struct adapter *adapter = txr->adapter;
935 struct mbuf *next;
936 int err = 0, enq = 0;
937
938 IGB_TX_LOCK_ASSERT(txr);
939
940 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
941 adapter->link_active == 0)
942 return (ENETDOWN);
943
944 /* Process the queue */
945 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
946 if ((err = igb_xmit(txr, &next)) != 0) {
947 if (next == NULL) {
948 /* It was freed, move forward */
949 drbr_advance(ifp, txr->br);
950 } else {
951 /*
952 * Still have one left, it may not be
953 * the same since the transmit function
954 * may have changed it.
955 */
956 drbr_putback(ifp, txr->br, next);
957 }
958 break;
959 }
960 drbr_advance(ifp, txr->br);
961 enq++;
962 ifp->if_obytes += next->m_pkthdr.len;
963 if (next->m_flags & M_MCAST)
964 ifp->if_omcasts++;
965 ETHER_BPF_MTAP(ifp, next);
966 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
967 break;
968 }
969 if (enq > 0) {
970 /* Set the watchdog */
971 txr->queue_status |= IGB_QUEUE_WORKING;
972 txr->watchdog_time = ticks;
973 }
974 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
975 igb_txeof(txr);
976 if (txr->tx_avail <= IGB_MAX_SCATTER)
977 txr->queue_status |= IGB_QUEUE_DEPLETED;
978 return (err);
979 }
980
981 /*
982 * Called from a taskqueue to drain queued transmit packets.
983 */
984 static void
985 igb_deferred_mq_start(void *arg, int pending)
986 {
987 struct tx_ring *txr = arg;
988 struct adapter *adapter = txr->adapter;
989 struct ifnet *ifp = adapter->ifp;
990
991 IGB_TX_LOCK(txr);
992 if (!drbr_empty(ifp, txr->br))
993 igb_mq_start_locked(ifp, txr);
994 IGB_TX_UNLOCK(txr);
995 }
996
997 /*
998 ** Flush all ring buffers
999 */
1000 static void
1001 igb_qflush(struct ifnet *ifp)
1002 {
1003 struct adapter *adapter = ifp->if_softc;
1004 struct tx_ring *txr = adapter->tx_rings;
1005 struct mbuf *m;
1006
1007 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1008 IGB_TX_LOCK(txr);
1009 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1010 m_freem(m);
1011 IGB_TX_UNLOCK(txr);
1012 }
1013 if_qflush(ifp);
1014 }
1015 #endif /* ~IGB_LEGACY_TX */
1016
1017 /*********************************************************************
1018 * Ioctl entry point
1019 *
1020 * igb_ioctl is called when the user wants to configure the
1021 * interface.
1022 *
1023 * return 0 on success, positive on failure
1024 **********************************************************************/
1025
1026 static int
1027 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1028 {
1029 struct adapter *adapter = ifp->if_softc;
1030 struct ifreq *ifr = (struct ifreq *)data;
1031 #if defined(INET) || defined(INET6)
1032 struct ifaddr *ifa = (struct ifaddr *)data;
1033 #endif
1034 bool avoid_reset = FALSE;
1035 int error = 0;
1036
1037 if (adapter->in_detach)
1038 return (error);
1039
1040 switch (command) {
1041 case SIOCSIFADDR:
1042 #ifdef INET
1043 if (ifa->ifa_addr->sa_family == AF_INET)
1044 avoid_reset = TRUE;
1045 #endif
1046 #ifdef INET6
1047 if (ifa->ifa_addr->sa_family == AF_INET6)
1048 avoid_reset = TRUE;
1049 #endif
1050 /*
1051 ** Calling init results in link renegotiation,
1052 ** so we avoid doing it when possible.
1053 */
1054 if (avoid_reset) {
1055 ifp->if_flags |= IFF_UP;
1056 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1057 igb_init(adapter);
1058 #ifdef INET
1059 if (!(ifp->if_flags & IFF_NOARP))
1060 arp_ifinit(ifp, ifa);
1061 #endif
1062 } else
1063 error = ether_ioctl(ifp, command, data);
1064 break;
1065 case SIOCSIFMTU:
1066 {
1067 int max_frame_size;
1068
1069 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1070
1071 IGB_CORE_LOCK(adapter);
1072 max_frame_size = 9234;
1073 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1074 ETHER_CRC_LEN) {
1075 IGB_CORE_UNLOCK(adapter);
1076 error = EINVAL;
1077 break;
1078 }
1079
1080 ifp->if_mtu = ifr->ifr_mtu;
1081 adapter->max_frame_size =
1082 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1083 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1084 igb_init_locked(adapter);
1085 IGB_CORE_UNLOCK(adapter);
1086 break;
1087 }
1088 case SIOCSIFFLAGS:
1089 IOCTL_DEBUGOUT("ioctl rcv'd:\
1090 SIOCSIFFLAGS (Set Interface Flags)");
1091 IGB_CORE_LOCK(adapter);
1092 if (ifp->if_flags & IFF_UP) {
1093 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1094 if ((ifp->if_flags ^ adapter->if_flags) &
1095 (IFF_PROMISC | IFF_ALLMULTI)) {
1096 igb_disable_promisc(adapter);
1097 igb_set_promisc(adapter);
1098 }
1099 } else
1100 igb_init_locked(adapter);
1101 } else
1102 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1103 igb_stop(adapter);
1104 adapter->if_flags = ifp->if_flags;
1105 IGB_CORE_UNLOCK(adapter);
1106 break;
1107 case SIOCADDMULTI:
1108 case SIOCDELMULTI:
1109 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1110 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1111 IGB_CORE_LOCK(adapter);
1112 igb_disable_intr(adapter);
1113 igb_set_multi(adapter);
1114 #ifdef DEVICE_POLLING
1115 if (!(ifp->if_capenable & IFCAP_POLLING))
1116 #endif
1117 igb_enable_intr(adapter);
1118 IGB_CORE_UNLOCK(adapter);
1119 }
1120 break;
1121 case SIOCSIFMEDIA:
1122 /* Check SOL/IDER usage */
1123 IGB_CORE_LOCK(adapter);
1124 if (e1000_check_reset_block(&adapter->hw)) {
1125 IGB_CORE_UNLOCK(adapter);
1126 device_printf(adapter->dev, "Media change is"
1127 " blocked due to SOL/IDER session.\n");
1128 break;
1129 }
1130 IGB_CORE_UNLOCK(adapter);
1131 case SIOCGIFMEDIA:
1132 IOCTL_DEBUGOUT("ioctl rcv'd: \
1133 SIOCxIFMEDIA (Get/Set Interface Media)");
1134 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1135 break;
1136 case SIOCSIFCAP:
1137 {
1138 int mask, reinit;
1139
1140 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1141 reinit = 0;
1142 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1143 #ifdef DEVICE_POLLING
1144 if (mask & IFCAP_POLLING) {
1145 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1146 error = ether_poll_register(igb_poll, ifp);
1147 if (error)
1148 return (error);
1149 IGB_CORE_LOCK(adapter);
1150 igb_disable_intr(adapter);
1151 ifp->if_capenable |= IFCAP_POLLING;
1152 IGB_CORE_UNLOCK(adapter);
1153 } else {
1154 error = ether_poll_deregister(ifp);
1155 /* Enable interrupt even in error case */
1156 IGB_CORE_LOCK(adapter);
1157 igb_enable_intr(adapter);
1158 ifp->if_capenable &= ~IFCAP_POLLING;
1159 IGB_CORE_UNLOCK(adapter);
1160 }
1161 }
1162 #endif
1163 #if __FreeBSD_version >= 1000000
1164 /* HW cannot turn these on/off separately */
1165 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1166 ifp->if_capenable ^= IFCAP_RXCSUM;
1167 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1168 reinit = 1;
1169 }
1170 if (mask & IFCAP_TXCSUM) {
1171 ifp->if_capenable ^= IFCAP_TXCSUM;
1172 reinit = 1;
1173 }
1174 if (mask & IFCAP_TXCSUM_IPV6) {
1175 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1176 reinit = 1;
1177 }
1178 #else
1179 if (mask & IFCAP_HWCSUM) {
1180 ifp->if_capenable ^= IFCAP_HWCSUM;
1181 reinit = 1;
1182 }
1183 #endif
1184 if (mask & IFCAP_TSO4) {
1185 ifp->if_capenable ^= IFCAP_TSO4;
1186 reinit = 1;
1187 }
1188 if (mask & IFCAP_TSO6) {
1189 ifp->if_capenable ^= IFCAP_TSO6;
1190 reinit = 1;
1191 }
1192 if (mask & IFCAP_VLAN_HWTAGGING) {
1193 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1194 reinit = 1;
1195 }
1196 if (mask & IFCAP_VLAN_HWFILTER) {
1197 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1198 reinit = 1;
1199 }
1200 if (mask & IFCAP_VLAN_HWTSO) {
1201 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1202 reinit = 1;
1203 }
1204 if (mask & IFCAP_LRO) {
1205 ifp->if_capenable ^= IFCAP_LRO;
1206 reinit = 1;
1207 }
1208 if (mask & IFCAP_WOL) {
1209 if (mask & IFCAP_WOL_MAGIC)
1210 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1211 if (mask & IFCAP_WOL_MCAST)
1212 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1213 if (mask & IFCAP_WOL_UCAST)
1214 ifp->if_capenable ^= IFCAP_WOL_UCAST;
1215 }
1216 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1217 igb_init(adapter);
1218 VLAN_CAPABILITIES(ifp);
1219 break;
1220 }
1221
1222 default:
1223 error = ether_ioctl(ifp, command, data);
1224 break;
1225 }
1226
1227 return (error);
1228 }
1229
1230
1231 /*********************************************************************
1232 * Init entry point
1233 *
1234 * This routine is used in two ways. It is used by the stack as
1235 * init entry point in network interface structure. It is also used
1236 * by the driver as a hw/sw initialization routine to get to a
1237 * consistent state.
1238 *
1239 * return 0 on success, positive on failure
1240 **********************************************************************/
1241
1242 static void
1243 igb_init_locked(struct adapter *adapter)
1244 {
1245 struct ifnet *ifp = adapter->ifp;
1246 device_t dev = adapter->dev;
1247
1248 INIT_DEBUGOUT("igb_init: begin");
1249
1250 IGB_CORE_LOCK_ASSERT(adapter);
1251
1252 igb_disable_intr(adapter);
1253 callout_stop(&adapter->timer);
1254
1255 /* Get the latest mac address, User can use a LAA */
1256 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1257 ETHER_ADDR_LEN);
1258
1259 /* Put the address into the Receive Address Array */
1260 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1261
1262 igb_reset(adapter);
1263 igb_update_link_status(adapter);
1264
1265 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1266
1267 /* Set hardware offload abilities */
1268 ifp->if_hwassist = 0;
1269 if (ifp->if_capenable & IFCAP_TXCSUM) {
1270 #if __FreeBSD_version >= 1000000
1271 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1272 if (adapter->hw.mac.type != e1000_82575)
1273 ifp->if_hwassist |= CSUM_IP_SCTP;
1274 #else
1275 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1276 #if __FreeBSD_version >= 800000
1277 if (adapter->hw.mac.type != e1000_82575)
1278 ifp->if_hwassist |= CSUM_SCTP;
1279 #endif
1280 #endif
1281 }
1282
1283 #if __FreeBSD_version >= 1000000
1284 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1285 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1286 if (adapter->hw.mac.type != e1000_82575)
1287 ifp->if_hwassist |= CSUM_IP6_SCTP;
1288 }
1289 #endif
1290 if (ifp->if_capenable & IFCAP_TSO)
1291 ifp->if_hwassist |= CSUM_TSO;
1292
1293 /* Clear bad data from Rx FIFOs */
1294 e1000_rx_fifo_flush_82575(&adapter->hw);
1295
1296 /* Configure for OS presence */
1297 igb_init_manageability(adapter);
1298
1299 /* Prepare transmit descriptors and buffers */
1300 igb_setup_transmit_structures(adapter);
1301 igb_initialize_transmit_units(adapter);
1302
1303 /* Setup Multicast table */
1304 igb_set_multi(adapter);
1305
1306 /*
1307 ** Figure out the desired mbuf pool
1308 ** for doing jumbo/packetsplit
1309 */
1310 if (adapter->max_frame_size <= 2048)
1311 adapter->rx_mbuf_sz = MCLBYTES;
1312 #ifndef CONTIGMALLOC_WORKS
1313 else
1314 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1315 #else
1316 else if (adapter->max_frame_size <= 4096)
1317 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1318 else
1319 adapter->rx_mbuf_sz = MJUM9BYTES;
1320 #endif
1321
1322 /* Prepare receive descriptors and buffers */
1323 if (igb_setup_receive_structures(adapter)) {
1324 device_printf(dev, "Could not setup receive structures\n");
1325 return;
1326 }
1327 igb_initialize_receive_units(adapter);
1328
1329 /* Enable VLAN support */
1330 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1331 igb_setup_vlan_hw_support(adapter);
1332
1333 /* Don't lose promiscuous settings */
1334 igb_set_promisc(adapter);
1335
1336 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1340 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342 if (adapter->msix > 1) /* Set up queue routing */
1343 igb_configure_queues(adapter);
1344
1345 /* this clears any pending interrupts */
1346 E1000_READ_REG(&adapter->hw, E1000_ICR);
1347 #ifdef DEVICE_POLLING
1348 /*
1349 * Only enable interrupts if we are not polling, make sure
1350 * they are off otherwise.
1351 */
1352 if (ifp->if_capenable & IFCAP_POLLING)
1353 igb_disable_intr(adapter);
1354 else
1355 #endif /* DEVICE_POLLING */
1356 {
1357 igb_enable_intr(adapter);
1358 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1359 }
1360
1361 /* Set Energy Efficient Ethernet */
1362 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1363 if (adapter->hw.mac.type == e1000_i354)
1364 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1365 else
1366 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1367 }
1368 }
1369
1370 static void
1371 igb_init(void *arg)
1372 {
1373 struct adapter *adapter = arg;
1374
1375 IGB_CORE_LOCK(adapter);
1376 igb_init_locked(adapter);
1377 IGB_CORE_UNLOCK(adapter);
1378 }
1379
1380
1381 static void
1382 igb_handle_que(void *context, int pending)
1383 {
1384 struct igb_queue *que = context;
1385 struct adapter *adapter = que->adapter;
1386 struct tx_ring *txr = que->txr;
1387 struct ifnet *ifp = adapter->ifp;
1388
1389 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1390 bool more;
1391
1392 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1393
1394 IGB_TX_LOCK(txr);
1395 igb_txeof(txr);
1396 #ifndef IGB_LEGACY_TX
1397 /* Process the stack queue only if not depleted */
1398 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1399 !drbr_empty(ifp, txr->br))
1400 igb_mq_start_locked(ifp, txr);
1401 #else
1402 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1403 igb_start_locked(txr, ifp);
1404 #endif
1405 IGB_TX_UNLOCK(txr);
1406 /* Do we need another? */
1407 if (more) {
1408 taskqueue_enqueue(que->tq, &que->que_task);
1409 return;
1410 }
1411 }
1412
1413 #ifdef DEVICE_POLLING
1414 if (ifp->if_capenable & IFCAP_POLLING)
1415 return;
1416 #endif
1417 /* Reenable this interrupt */
1418 if (que->eims)
1419 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1420 else
1421 igb_enable_intr(adapter);
1422 }
1423
1424 /* Deal with link in a sleepable context */
1425 static void
1426 igb_handle_link(void *context, int pending)
1427 {
1428 struct adapter *adapter = context;
1429
1430 IGB_CORE_LOCK(adapter);
1431 igb_handle_link_locked(adapter);
1432 IGB_CORE_UNLOCK(adapter);
1433 }
1434
1435 static void
1436 igb_handle_link_locked(struct adapter *adapter)
1437 {
1438 struct tx_ring *txr = adapter->tx_rings;
1439 struct ifnet *ifp = adapter->ifp;
1440
1441 IGB_CORE_LOCK_ASSERT(adapter);
1442 adapter->hw.mac.get_link_status = 1;
1443 igb_update_link_status(adapter);
1444 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1445 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1446 IGB_TX_LOCK(txr);
1447 #ifndef IGB_LEGACY_TX
1448 /* Process the stack queue only if not depleted */
1449 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1450 !drbr_empty(ifp, txr->br))
1451 igb_mq_start_locked(ifp, txr);
1452 #else
1453 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1454 igb_start_locked(txr, ifp);
1455 #endif
1456 IGB_TX_UNLOCK(txr);
1457 }
1458 }
1459 }
1460
1461 /*********************************************************************
1462 *
1463 * MSI/Legacy Deferred
1464 * Interrupt Service routine
1465 *
1466 *********************************************************************/
1467 static int
1468 igb_irq_fast(void *arg)
1469 {
1470 struct adapter *adapter = arg;
1471 struct igb_queue *que = adapter->queues;
1472 u32 reg_icr;
1473
1474
1475 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1476
1477 /* Hot eject? */
1478 if (reg_icr == 0xffffffff)
1479 return FILTER_STRAY;
1480
1481 /* Definitely not our interrupt. */
1482 if (reg_icr == 0x0)
1483 return FILTER_STRAY;
1484
1485 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1486 return FILTER_STRAY;
1487
1488 /*
1489 * Mask interrupts until the taskqueue is finished running. This is
1490 * cheap, just assume that it is needed. This also works around the
1491 * MSI message reordering errata on certain systems.
1492 */
1493 igb_disable_intr(adapter);
1494 taskqueue_enqueue(que->tq, &que->que_task);
1495
1496 /* Link status change */
1497 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1498 taskqueue_enqueue(que->tq, &adapter->link_task);
1499
1500 if (reg_icr & E1000_ICR_RXO)
1501 adapter->rx_overruns++;
1502 return FILTER_HANDLED;
1503 }
1504
1505 #ifdef DEVICE_POLLING
1506 #if __FreeBSD_version >= 800000
1507 #define POLL_RETURN_COUNT(a) (a)
1508 static int
1509 #else
1510 #define POLL_RETURN_COUNT(a)
1511 static void
1512 #endif
1513 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1514 {
1515 struct adapter *adapter = ifp->if_softc;
1516 struct igb_queue *que;
1517 struct tx_ring *txr;
1518 u32 reg_icr, rx_done = 0;
1519 u32 loop = IGB_MAX_LOOP;
1520 bool more;
1521
1522 IGB_CORE_LOCK(adapter);
1523 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1524 IGB_CORE_UNLOCK(adapter);
1525 return POLL_RETURN_COUNT(rx_done);
1526 }
1527
1528 if (cmd == POLL_AND_CHECK_STATUS) {
1529 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1530 /* Link status change */
1531 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1532 igb_handle_link_locked(adapter);
1533
1534 if (reg_icr & E1000_ICR_RXO)
1535 adapter->rx_overruns++;
1536 }
1537 IGB_CORE_UNLOCK(adapter);
1538
1539 for (int i = 0; i < adapter->num_queues; i++) {
1540 que = &adapter->queues[i];
1541 txr = que->txr;
1542
1543 igb_rxeof(que, count, &rx_done);
1544
1545 IGB_TX_LOCK(txr);
1546 do {
1547 more = igb_txeof(txr);
1548 } while (loop-- && more);
1549 #ifndef IGB_LEGACY_TX
1550 if (!drbr_empty(ifp, txr->br))
1551 igb_mq_start_locked(ifp, txr);
1552 #else
1553 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1554 igb_start_locked(txr, ifp);
1555 #endif
1556 IGB_TX_UNLOCK(txr);
1557 }
1558
1559 return POLL_RETURN_COUNT(rx_done);
1560 }
1561 #endif /* DEVICE_POLLING */
1562
1563 /*********************************************************************
1564 *
1565 * MSIX Que Interrupt Service routine
1566 *
1567 **********************************************************************/
1568 static void
1569 igb_msix_que(void *arg)
1570 {
1571 struct igb_queue *que = arg;
1572 struct adapter *adapter = que->adapter;
1573 struct ifnet *ifp = adapter->ifp;
1574 struct tx_ring *txr = que->txr;
1575 struct rx_ring *rxr = que->rxr;
1576 u32 newitr = 0;
1577 bool more_rx;
1578
1579 /* Ignore spurious interrupts */
1580 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1581 return;
1582
1583 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1584 ++que->irqs;
1585
1586 IGB_TX_LOCK(txr);
1587 igb_txeof(txr);
1588 #ifndef IGB_LEGACY_TX
1589 /* Process the stack queue only if not depleted */
1590 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1591 !drbr_empty(ifp, txr->br))
1592 igb_mq_start_locked(ifp, txr);
1593 #else
1594 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1595 igb_start_locked(txr, ifp);
1596 #endif
1597 IGB_TX_UNLOCK(txr);
1598
1599 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1600
1601 if (adapter->enable_aim == FALSE)
1602 goto no_calc;
1603 /*
1604 ** Do Adaptive Interrupt Moderation:
1605 ** - Write out last calculated setting
1606 ** - Calculate based on average size over
1607 ** the last interval.
1608 */
1609 if (que->eitr_setting)
1610 E1000_WRITE_REG(&adapter->hw,
1611 E1000_EITR(que->msix), que->eitr_setting);
1612
1613 que->eitr_setting = 0;
1614
1615 /* Idle, do nothing */
1616 if ((txr->bytes == 0) && (rxr->bytes == 0))
1617 goto no_calc;
1618
1619 /* Used half Default if sub-gig */
1620 if (adapter->link_speed != 1000)
1621 newitr = IGB_DEFAULT_ITR / 2;
1622 else {
1623 if ((txr->bytes) && (txr->packets))
1624 newitr = txr->bytes/txr->packets;
1625 if ((rxr->bytes) && (rxr->packets))
1626 newitr = max(newitr,
1627 (rxr->bytes / rxr->packets));
1628 newitr += 24; /* account for hardware frame, crc */
1629 /* set an upper boundary */
1630 newitr = min(newitr, 3000);
1631 /* Be nice to the mid range */
1632 if ((newitr > 300) && (newitr < 1200))
1633 newitr = (newitr / 3);
1634 else
1635 newitr = (newitr / 2);
1636 }
1637 newitr &= 0x7FFC; /* Mask invalid bits */
1638 if (adapter->hw.mac.type == e1000_82575)
1639 newitr |= newitr << 16;
1640 else
1641 newitr |= E1000_EITR_CNT_IGNR;
1642
1643 /* save for next interrupt */
1644 que->eitr_setting = newitr;
1645
1646 /* Reset state */
1647 txr->bytes = 0;
1648 txr->packets = 0;
1649 rxr->bytes = 0;
1650 rxr->packets = 0;
1651
1652 no_calc:
1653 /* Schedule a clean task if needed*/
1654 if (more_rx)
1655 taskqueue_enqueue(que->tq, &que->que_task);
1656 else
1657 /* Reenable this interrupt */
1658 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1659 return;
1660 }
1661
1662
1663 /*********************************************************************
1664 *
1665 * MSIX Link Interrupt Service routine
1666 *
1667 **********************************************************************/
1668
1669 static void
1670 igb_msix_link(void *arg)
1671 {
1672 struct adapter *adapter = arg;
1673 u32 icr;
1674
1675 ++adapter->link_irq;
1676 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1677 if (!(icr & E1000_ICR_LSC))
1678 goto spurious;
1679 igb_handle_link(adapter, 0);
1680
1681 spurious:
1682 /* Rearm */
1683 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1684 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1685 return;
1686 }
1687
1688
1689 /*********************************************************************
1690 *
1691 * Media Ioctl callback
1692 *
1693 * This routine is called whenever the user queries the status of
1694 * the interface using ifconfig.
1695 *
1696 **********************************************************************/
1697 static void
1698 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1699 {
1700 struct adapter *adapter = ifp->if_softc;
1701
1702 INIT_DEBUGOUT("igb_media_status: begin");
1703
1704 IGB_CORE_LOCK(adapter);
1705 igb_update_link_status(adapter);
1706
1707 ifmr->ifm_status = IFM_AVALID;
1708 ifmr->ifm_active = IFM_ETHER;
1709
1710 if (!adapter->link_active) {
1711 IGB_CORE_UNLOCK(adapter);
1712 return;
1713 }
1714
1715 ifmr->ifm_status |= IFM_ACTIVE;
1716
1717 switch (adapter->link_speed) {
1718 case 10:
1719 ifmr->ifm_active |= IFM_10_T;
1720 break;
1721 case 100:
1722 /*
1723 ** Support for 100Mb SFP - these are Fiber
1724 ** but the media type appears as serdes
1725 */
1726 if (adapter->hw.phy.media_type ==
1727 e1000_media_type_internal_serdes)
1728 ifmr->ifm_active |= IFM_100_FX;
1729 else
1730 ifmr->ifm_active |= IFM_100_TX;
1731 break;
1732 case 1000:
1733 ifmr->ifm_active |= IFM_1000_T;
1734 break;
1735 case 2500:
1736 ifmr->ifm_active |= IFM_2500_SX;
1737 break;
1738 }
1739
1740 if (adapter->link_duplex == FULL_DUPLEX)
1741 ifmr->ifm_active |= IFM_FDX;
1742 else
1743 ifmr->ifm_active |= IFM_HDX;
1744
1745 IGB_CORE_UNLOCK(adapter);
1746 }
1747
1748 /*********************************************************************
1749 *
1750 * Media Ioctl callback
1751 *
1752 * This routine is called when the user changes speed/duplex using
1753 * media/mediopt option with ifconfig.
1754 *
1755 **********************************************************************/
1756 static int
1757 igb_media_change(struct ifnet *ifp)
1758 {
1759 struct adapter *adapter = ifp->if_softc;
1760 struct ifmedia *ifm = &adapter->media;
1761
1762 INIT_DEBUGOUT("igb_media_change: begin");
1763
1764 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1765 return (EINVAL);
1766
1767 IGB_CORE_LOCK(adapter);
1768 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1769 case IFM_AUTO:
1770 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1771 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1772 break;
1773 case IFM_1000_LX:
1774 case IFM_1000_SX:
1775 case IFM_1000_T:
1776 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1777 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1778 break;
1779 case IFM_100_TX:
1780 adapter->hw.mac.autoneg = FALSE;
1781 adapter->hw.phy.autoneg_advertised = 0;
1782 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1783 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1784 else
1785 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1786 break;
1787 case IFM_10_T:
1788 adapter->hw.mac.autoneg = FALSE;
1789 adapter->hw.phy.autoneg_advertised = 0;
1790 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1791 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1792 else
1793 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1794 break;
1795 default:
1796 device_printf(adapter->dev, "Unsupported media type\n");
1797 }
1798
1799 igb_init_locked(adapter);
1800 IGB_CORE_UNLOCK(adapter);
1801
1802 return (0);
1803 }
1804
1805
1806 /*********************************************************************
1807 *
1808 * This routine maps the mbufs to Advanced TX descriptors.
1809 *
1810 **********************************************************************/
1811 static int
1812 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1813 {
1814 struct adapter *adapter = txr->adapter;
1815 u32 olinfo_status = 0, cmd_type_len;
1816 int i, j, error, nsegs;
1817 int first;
1818 bool remap = TRUE;
1819 struct mbuf *m_head;
1820 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1821 bus_dmamap_t map;
1822 struct igb_tx_buf *txbuf;
1823 union e1000_adv_tx_desc *txd = NULL;
1824
1825 m_head = *m_headp;
1826
1827 /* Basic descriptor defines */
1828 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1829 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1830
1831 if (m_head->m_flags & M_VLANTAG)
1832 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1833
1834 /*
1835 * Important to capture the first descriptor
1836 * used because it will contain the index of
1837 * the one we tell the hardware to report back
1838 */
1839 first = txr->next_avail_desc;
1840 txbuf = &txr->tx_buffers[first];
1841 map = txbuf->map;
1842
1843 /*
1844 * Map the packet for DMA.
1845 */
1846 retry:
1847 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1848 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1849
1850 if (__predict_false(error)) {
1851 struct mbuf *m;
1852
1853 switch (error) {
1854 case EFBIG:
1855 /* Try it again? - one try */
1856 if (remap == TRUE) {
1857 remap = FALSE;
1858 m = m_collapse(*m_headp, M_NOWAIT,
1859 IGB_MAX_SCATTER);
1860 if (m == NULL) {
1861 adapter->mbuf_defrag_failed++;
1862 m_freem(*m_headp);
1863 *m_headp = NULL;
1864 return (ENOBUFS);
1865 }
1866 *m_headp = m;
1867 goto retry;
1868 } else
1869 return (error);
1870 default:
1871 txr->no_tx_dma_setup++;
1872 m_freem(*m_headp);
1873 *m_headp = NULL;
1874 return (error);
1875 }
1876 }
1877
1878 /* Make certain there are enough descriptors */
1879 if (txr->tx_avail < (nsegs + 2)) {
1880 txr->no_desc_avail++;
1881 bus_dmamap_unload(txr->txtag, map);
1882 return (ENOBUFS);
1883 }
1884 m_head = *m_headp;
1885
1886 /*
1887 ** Set up the appropriate offload context
1888 ** this will consume the first descriptor
1889 */
1890 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1891 if (__predict_false(error)) {
1892 m_freem(*m_headp);
1893 *m_headp = NULL;
1894 return (error);
1895 }
1896
1897 /* 82575 needs the queue index added */
1898 if (adapter->hw.mac.type == e1000_82575)
1899 olinfo_status |= txr->me << 4;
1900
1901 i = txr->next_avail_desc;
1902 for (j = 0; j < nsegs; j++) {
1903 bus_size_t seglen;
1904 bus_addr_t segaddr;
1905
1906 txbuf = &txr->tx_buffers[i];
1907 txd = &txr->tx_base[i];
1908 seglen = segs[j].ds_len;
1909 segaddr = htole64(segs[j].ds_addr);
1910
1911 txd->read.buffer_addr = segaddr;
1912 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1913 cmd_type_len | seglen);
1914 txd->read.olinfo_status = htole32(olinfo_status);
1915
1916 if (++i == txr->num_desc)
1917 i = 0;
1918 }
1919
1920 txd->read.cmd_type_len |=
1921 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1922 txr->tx_avail -= nsegs;
1923 txr->next_avail_desc = i;
1924
1925 txbuf->m_head = m_head;
1926 /*
1927 ** Here we swap the map so the last descriptor,
1928 ** which gets the completion interrupt has the
1929 ** real map, and the first descriptor gets the
1930 ** unused map from this descriptor.
1931 */
1932 txr->tx_buffers[first].map = txbuf->map;
1933 txbuf->map = map;
1934 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1935
1936 /* Set the EOP descriptor that will be marked done */
1937 txbuf = &txr->tx_buffers[first];
1938 txbuf->eop = txd;
1939
1940 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1941 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1942 /*
1943 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1944 * hardware that this frame is available to transmit.
1945 */
1946 ++txr->total_packets;
1947 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1948
1949 return (0);
1950 }
1951 static void
1952 igb_set_promisc(struct adapter *adapter)
1953 {
1954 struct ifnet *ifp = adapter->ifp;
1955 struct e1000_hw *hw = &adapter->hw;
1956 u32 reg;
1957
1958 if (adapter->vf_ifp) {
1959 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1960 return;
1961 }
1962
1963 reg = E1000_READ_REG(hw, E1000_RCTL);
1964 if (ifp->if_flags & IFF_PROMISC) {
1965 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1966 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1967 } else if (ifp->if_flags & IFF_ALLMULTI) {
1968 reg |= E1000_RCTL_MPE;
1969 reg &= ~E1000_RCTL_UPE;
1970 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1971 }
1972 }
1973
1974 static void
1975 igb_disable_promisc(struct adapter *adapter)
1976 {
1977 struct e1000_hw *hw = &adapter->hw;
1978 struct ifnet *ifp = adapter->ifp;
1979 u32 reg;
1980 int mcnt = 0;
1981
1982 if (adapter->vf_ifp) {
1983 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1984 return;
1985 }
1986 reg = E1000_READ_REG(hw, E1000_RCTL);
1987 reg &= (~E1000_RCTL_UPE);
1988 if (ifp->if_flags & IFF_ALLMULTI)
1989 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1990 else {
1991 struct ifmultiaddr *ifma;
1992 #if __FreeBSD_version < 800000
1993 IF_ADDR_LOCK(ifp);
1994 #else
1995 if_maddr_rlock(ifp);
1996 #endif
1997 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1998 if (ifma->ifma_addr->sa_family != AF_LINK)
1999 continue;
2000 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2001 break;
2002 mcnt++;
2003 }
2004 #if __FreeBSD_version < 800000
2005 IF_ADDR_UNLOCK(ifp);
2006 #else
2007 if_maddr_runlock(ifp);
2008 #endif
2009 }
2010 /* Don't disable if in MAX groups */
2011 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2012 reg &= (~E1000_RCTL_MPE);
2013 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2014 }
2015
2016
2017 /*********************************************************************
2018 * Multicast Update
2019 *
2020 * This routine is called whenever multicast address list is updated.
2021 *
2022 **********************************************************************/
2023
2024 static void
2025 igb_set_multi(struct adapter *adapter)
2026 {
2027 struct ifnet *ifp = adapter->ifp;
2028 struct ifmultiaddr *ifma;
2029 u32 reg_rctl = 0;
2030 u8 *mta;
2031
2032 int mcnt = 0;
2033
2034 IOCTL_DEBUGOUT("igb_set_multi: begin");
2035
2036 mta = adapter->mta;
2037 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2038 MAX_NUM_MULTICAST_ADDRESSES);
2039
2040 #if __FreeBSD_version < 800000
2041 IF_ADDR_LOCK(ifp);
2042 #else
2043 if_maddr_rlock(ifp);
2044 #endif
2045 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2046 if (ifma->ifma_addr->sa_family != AF_LINK)
2047 continue;
2048
2049 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2050 break;
2051
2052 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2053 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2054 mcnt++;
2055 }
2056 #if __FreeBSD_version < 800000
2057 IF_ADDR_UNLOCK(ifp);
2058 #else
2059 if_maddr_runlock(ifp);
2060 #endif
2061
2062 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2063 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2064 reg_rctl |= E1000_RCTL_MPE;
2065 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2066 } else
2067 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2068 }
2069
2070
2071 /*********************************************************************
2072 * Timer routine:
2073 * This routine checks for link status,
2074 * updates statistics, and does the watchdog.
2075 *
2076 **********************************************************************/
2077
2078 static void
2079 igb_local_timer(void *arg)
2080 {
2081 struct adapter *adapter = arg;
2082 device_t dev = adapter->dev;
2083 struct ifnet *ifp = adapter->ifp;
2084 struct tx_ring *txr = adapter->tx_rings;
2085 struct igb_queue *que = adapter->queues;
2086 int hung = 0, busy = 0;
2087
2088
2089 IGB_CORE_LOCK_ASSERT(adapter);
2090
2091 igb_update_link_status(adapter);
2092 igb_update_stats_counters(adapter);
2093
2094 /*
2095 ** Check the TX queues status
2096 ** - central locked handling of OACTIVE
2097 ** - watchdog only if all queues show hung
2098 */
2099 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2100 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2101 (adapter->pause_frames == 0))
2102 ++hung;
2103 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2104 ++busy;
2105 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2106 taskqueue_enqueue(que->tq, &que->que_task);
2107 }
2108 if (hung == adapter->num_queues)
2109 goto timeout;
2110 if (busy == adapter->num_queues)
2111 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2112 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2113 (busy < adapter->num_queues))
2114 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2115
2116 adapter->pause_frames = 0;
2117 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2118 #ifndef DEVICE_POLLING
2119 /* Schedule all queue interrupts - deadlock protection */
2120 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2121 #endif
2122 return;
2123
2124 timeout:
2125 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2126 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2127 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2128 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2129 device_printf(dev,"TX(%d) desc avail = %d,"
2130 "Next TX to Clean = %d\n",
2131 txr->me, txr->tx_avail, txr->next_to_clean);
2132 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2133 adapter->watchdog_events++;
2134 igb_init_locked(adapter);
2135 }
2136
2137 static void
2138 igb_update_link_status(struct adapter *adapter)
2139 {
2140 struct e1000_hw *hw = &adapter->hw;
2141 struct e1000_fc_info *fc = &hw->fc;
2142 struct ifnet *ifp = adapter->ifp;
2143 device_t dev = adapter->dev;
2144 struct tx_ring *txr = adapter->tx_rings;
2145 u32 link_check, thstat, ctrl;
2146 char *flowctl = NULL;
2147
2148 link_check = thstat = ctrl = 0;
2149
2150 /* Get the cached link value or read for real */
2151 switch (hw->phy.media_type) {
2152 case e1000_media_type_copper:
2153 if (hw->mac.get_link_status) {
2154 /* Do the work to read phy */
2155 e1000_check_for_link(hw);
2156 link_check = !hw->mac.get_link_status;
2157 } else
2158 link_check = TRUE;
2159 break;
2160 case e1000_media_type_fiber:
2161 e1000_check_for_link(hw);
2162 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2163 E1000_STATUS_LU);
2164 break;
2165 case e1000_media_type_internal_serdes:
2166 e1000_check_for_link(hw);
2167 link_check = adapter->hw.mac.serdes_has_link;
2168 break;
2169 /* VF device is type_unknown */
2170 case e1000_media_type_unknown:
2171 e1000_check_for_link(hw);
2172 link_check = !hw->mac.get_link_status;
2173 /* Fall thru */
2174 default:
2175 break;
2176 }
2177
2178 /* Check for thermal downshift or shutdown */
2179 if (hw->mac.type == e1000_i350) {
2180 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2181 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2182 }
2183
2184 /* Get the flow control for display */
2185 switch (fc->current_mode) {
2186 case e1000_fc_rx_pause:
2187 flowctl = "RX";
2188 break;
2189 case e1000_fc_tx_pause:
2190 flowctl = "TX";
2191 break;
2192 case e1000_fc_full:
2193 flowctl = "Full";
2194 break;
2195 case e1000_fc_none:
2196 default:
2197 flowctl = "None";
2198 break;
2199 }
2200
2201 /* Now we check if a transition has happened */
2202 if (link_check && (adapter->link_active == 0)) {
2203 e1000_get_speed_and_duplex(&adapter->hw,
2204 &adapter->link_speed, &adapter->link_duplex);
2205 if (bootverbose)
2206 device_printf(dev, "Link is up %d Mbps %s,"
2207 " Flow Control: %s\n",
2208 adapter->link_speed,
2209 ((adapter->link_duplex == FULL_DUPLEX) ?
2210 "Full Duplex" : "Half Duplex"), flowctl);
2211 adapter->link_active = 1;
2212 ifp->if_baudrate = adapter->link_speed * 1000000;
2213 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2214 (thstat & E1000_THSTAT_LINK_THROTTLE))
2215 device_printf(dev, "Link: thermal downshift\n");
2216 /* Delay Link Up for Phy update */
2217 if (((hw->mac.type == e1000_i210) ||
2218 (hw->mac.type == e1000_i211)) &&
2219 (hw->phy.id == I210_I_PHY_ID))
2220 msec_delay(I210_LINK_DELAY);
2221 /* Reset if the media type changed. */
2222 if (hw->dev_spec._82575.media_changed) {
2223 hw->dev_spec._82575.media_changed = false;
2224 adapter->flags |= IGB_MEDIA_RESET;
2225 igb_reset(adapter);
2226 }
2227 /* This can sleep */
2228 if_link_state_change(ifp, LINK_STATE_UP);
2229 } else if (!link_check && (adapter->link_active == 1)) {
2230 ifp->if_baudrate = adapter->link_speed = 0;
2231 adapter->link_duplex = 0;
2232 if (bootverbose)
2233 device_printf(dev, "Link is Down\n");
2234 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2235 (thstat & E1000_THSTAT_PWR_DOWN))
2236 device_printf(dev, "Link: thermal shutdown\n");
2237 adapter->link_active = 0;
2238 /* This can sleep */
2239 if_link_state_change(ifp, LINK_STATE_DOWN);
2240 /* Reset queue state */
2241 for (int i = 0; i < adapter->num_queues; i++, txr++)
2242 txr->queue_status = IGB_QUEUE_IDLE;
2243 }
2244 }
2245
2246 /*********************************************************************
2247 *
2248 * This routine disables all traffic on the adapter by issuing a
2249 * global reset on the MAC and deallocates TX/RX buffers.
2250 *
2251 **********************************************************************/
2252
2253 static void
2254 igb_stop(void *arg)
2255 {
2256 struct adapter *adapter = arg;
2257 struct ifnet *ifp = adapter->ifp;
2258 struct tx_ring *txr = adapter->tx_rings;
2259
2260 IGB_CORE_LOCK_ASSERT(adapter);
2261
2262 INIT_DEBUGOUT("igb_stop: begin");
2263
2264 igb_disable_intr(adapter);
2265
2266 callout_stop(&adapter->timer);
2267
2268 /* Tell the stack that the interface is no longer active */
2269 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2270 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2271
2272 /* Disarm watchdog timer. */
2273 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2274 IGB_TX_LOCK(txr);
2275 txr->queue_status = IGB_QUEUE_IDLE;
2276 IGB_TX_UNLOCK(txr);
2277 }
2278
2279 e1000_reset_hw(&adapter->hw);
2280 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0);
2281
2282 e1000_led_off(&adapter->hw);
2283 e1000_cleanup_led(&adapter->hw);
2284 }
2285
2286
2287 /*********************************************************************
2288 *
2289 * Determine hardware revision.
2290 *
2291 **********************************************************************/
2292 static void
2293 igb_identify_hardware(struct adapter *adapter)
2294 {
2295 device_t dev = adapter->dev;
2296
2297 /* Make sure our PCI config space has the necessary stuff set */
2298 pci_enable_busmaster(dev);
2299 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2300
2301 /* Save off the information about this board */
2302 adapter->hw.vendor_id = pci_get_vendor(dev);
2303 adapter->hw.device_id = pci_get_device(dev);
2304 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2305 adapter->hw.subsystem_vendor_id =
2306 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2307 adapter->hw.subsystem_device_id =
2308 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2309
2310 /* Set MAC type early for PCI setup */
2311 e1000_set_mac_type(&adapter->hw);
2312
2313 /* Are we a VF device? */
2314 if ((adapter->hw.mac.type == e1000_vfadapt) ||
2315 (adapter->hw.mac.type == e1000_vfadapt_i350))
2316 adapter->vf_ifp = 1;
2317 else
2318 adapter->vf_ifp = 0;
2319 }
2320
2321 static int
2322 igb_allocate_pci_resources(struct adapter *adapter)
2323 {
2324 device_t dev = adapter->dev;
2325 int rid;
2326
2327 rid = PCIR_BAR(0);
2328 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2329 &rid, RF_ACTIVE);
2330 if (adapter->pci_mem == NULL) {
2331 device_printf(dev, "Unable to allocate bus resource: memory\n");
2332 return (ENXIO);
2333 }
2334 adapter->osdep.mem_bus_space_tag =
2335 rman_get_bustag(adapter->pci_mem);
2336 adapter->osdep.mem_bus_space_handle =
2337 rman_get_bushandle(adapter->pci_mem);
2338 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2339
2340 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2341
2342 /* This will setup either MSI/X or MSI */
2343 adapter->msix = igb_setup_msix(adapter);
2344 adapter->hw.back = &adapter->osdep;
2345
2346 return (0);
2347 }
2348
2349 /*********************************************************************
2350 *
2351 * Setup the Legacy or MSI Interrupt handler
2352 *
2353 **********************************************************************/
2354 static int
2355 igb_allocate_legacy(struct adapter *adapter)
2356 {
2357 device_t dev = adapter->dev;
2358 struct igb_queue *que = adapter->queues;
2359 #ifndef IGB_LEGACY_TX
2360 struct tx_ring *txr = adapter->tx_rings;
2361 #endif
2362 int error, rid = 0;
2363
2364 /* Turn off all interrupts */
2365 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2366
2367 /* MSI RID is 1 */
2368 if (adapter->msix == 1)
2369 rid = 1;
2370
2371 /* We allocate a single interrupt resource */
2372 adapter->res = bus_alloc_resource_any(dev,
2373 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2374 if (adapter->res == NULL) {
2375 device_printf(dev, "Unable to allocate bus resource: "
2376 "interrupt\n");
2377 return (ENXIO);
2378 }
2379
2380 #ifndef IGB_LEGACY_TX
2381 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2382 #endif
2383
2384 /*
2385 * Try allocating a fast interrupt and the associated deferred
2386 * processing contexts.
2387 */
2388 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2389 /* Make tasklet for deferred link handling */
2390 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2391 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2392 taskqueue_thread_enqueue, &que->tq);
2393 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2394 device_get_nameunit(adapter->dev));
2395 if ((error = bus_setup_intr(dev, adapter->res,
2396 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2397 adapter, &adapter->tag)) != 0) {
2398 device_printf(dev, "Failed to register fast interrupt "
2399 "handler: %d\n", error);
2400 taskqueue_free(que->tq);
2401 que->tq = NULL;
2402 return (error);
2403 }
2404
2405 return (0);
2406 }
2407
2408
2409 /*********************************************************************
2410 *
2411 * Setup the MSIX Queue Interrupt handlers:
2412 *
2413 **********************************************************************/
2414 static int
2415 igb_allocate_msix(struct adapter *adapter)
2416 {
2417 device_t dev = adapter->dev;
2418 struct igb_queue *que = adapter->queues;
2419 int error, rid, vector = 0;
2420
2421 /* Be sure to start with all interrupts disabled */
2422 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2423 E1000_WRITE_FLUSH(&adapter->hw);
2424
2425 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2426 rid = vector +1;
2427 que->res = bus_alloc_resource_any(dev,
2428 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2429 if (que->res == NULL) {
2430 device_printf(dev,
2431 "Unable to allocate bus resource: "
2432 "MSIX Queue Interrupt\n");
2433 return (ENXIO);
2434 }
2435 error = bus_setup_intr(dev, que->res,
2436 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2437 igb_msix_que, que, &que->tag);
2438 if (error) {
2439 que->res = NULL;
2440 device_printf(dev, "Failed to register Queue handler");
2441 return (error);
2442 }
2443 #if __FreeBSD_version >= 800504
2444 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2445 #endif
2446 que->msix = vector;
2447 if (adapter->hw.mac.type == e1000_82575)
2448 que->eims = E1000_EICR_TX_QUEUE0 << i;
2449 else
2450 que->eims = 1 << vector;
2451 /*
2452 ** Bind the msix vector, and thus the
2453 ** rings to the corresponding cpu.
2454 */
2455 if (adapter->num_queues > 1) {
2456 if (igb_last_bind_cpu < 0)
2457 igb_last_bind_cpu = CPU_FIRST();
2458 bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2459 device_printf(dev,
2460 "Bound queue %d to cpu %d\n",
2461 i,igb_last_bind_cpu);
2462 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2463 }
2464 #ifndef IGB_LEGACY_TX
2465 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2466 que->txr);
2467 #endif
2468 /* Make tasklet for deferred handling */
2469 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2470 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2471 taskqueue_thread_enqueue, &que->tq);
2472 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2473 device_get_nameunit(adapter->dev));
2474 }
2475
2476 /* And Link */
2477 rid = vector + 1;
2478 adapter->res = bus_alloc_resource_any(dev,
2479 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2480 if (adapter->res == NULL) {
2481 device_printf(dev,
2482 "Unable to allocate bus resource: "
2483 "MSIX Link Interrupt\n");
2484 return (ENXIO);
2485 }
2486 if ((error = bus_setup_intr(dev, adapter->res,
2487 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2488 igb_msix_link, adapter, &adapter->tag)) != 0) {
2489 device_printf(dev, "Failed to register Link handler");
2490 return (error);
2491 }
2492 #if __FreeBSD_version >= 800504
2493 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2494 #endif
2495 adapter->linkvec = vector;
2496
2497 return (0);
2498 }
2499
2500
2501 static void
2502 igb_configure_queues(struct adapter *adapter)
2503 {
2504 struct e1000_hw *hw = &adapter->hw;
2505 struct igb_queue *que;
2506 u32 tmp, ivar = 0, newitr = 0;
2507
2508 /* First turn on RSS capability */
2509 if (adapter->hw.mac.type != e1000_82575)
2510 E1000_WRITE_REG(hw, E1000_GPIE,
2511 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2512 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2513
2514 /* Turn on MSIX */
2515 switch (adapter->hw.mac.type) {
2516 case e1000_82580:
2517 case e1000_i350:
2518 case e1000_i354:
2519 case e1000_i210:
2520 case e1000_i211:
2521 case e1000_vfadapt:
2522 case e1000_vfadapt_i350:
2523 /* RX entries */
2524 for (int i = 0; i < adapter->num_queues; i++) {
2525 u32 index = i >> 1;
2526 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2527 que = &adapter->queues[i];
2528 if (i & 1) {
2529 ivar &= 0xFF00FFFF;
2530 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2531 } else {
2532 ivar &= 0xFFFFFF00;
2533 ivar |= que->msix | E1000_IVAR_VALID;
2534 }
2535 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2536 }
2537 /* TX entries */
2538 for (int i = 0; i < adapter->num_queues; i++) {
2539 u32 index = i >> 1;
2540 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2541 que = &adapter->queues[i];
2542 if (i & 1) {
2543 ivar &= 0x00FFFFFF;
2544 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2545 } else {
2546 ivar &= 0xFFFF00FF;
2547 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2548 }
2549 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2550 adapter->que_mask |= que->eims;
2551 }
2552
2553 /* And for the link interrupt */
2554 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2555 adapter->link_mask = 1 << adapter->linkvec;
2556 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2557 break;
2558 case e1000_82576:
2559 /* RX entries */
2560 for (int i = 0; i < adapter->num_queues; i++) {
2561 u32 index = i & 0x7; /* Each IVAR has two entries */
2562 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2563 que = &adapter->queues[i];
2564 if (i < 8) {
2565 ivar &= 0xFFFFFF00;
2566 ivar |= que->msix | E1000_IVAR_VALID;
2567 } else {
2568 ivar &= 0xFF00FFFF;
2569 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2570 }
2571 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2572 adapter->que_mask |= que->eims;
2573 }
2574 /* TX entries */
2575 for (int i = 0; i < adapter->num_queues; i++) {
2576 u32 index = i & 0x7; /* Each IVAR has two entries */
2577 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2578 que = &adapter->queues[i];
2579 if (i < 8) {
2580 ivar &= 0xFFFF00FF;
2581 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2582 } else {
2583 ivar &= 0x00FFFFFF;
2584 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2585 }
2586 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2587 adapter->que_mask |= que->eims;
2588 }
2589
2590 /* And for the link interrupt */
2591 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2592 adapter->link_mask = 1 << adapter->linkvec;
2593 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2594 break;
2595
2596 case e1000_82575:
2597 /* enable MSI-X support*/
2598 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2599 tmp |= E1000_CTRL_EXT_PBA_CLR;
2600 /* Auto-Mask interrupts upon ICR read. */
2601 tmp |= E1000_CTRL_EXT_EIAME;
2602 tmp |= E1000_CTRL_EXT_IRCA;
2603 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2604
2605 /* Queues */
2606 for (int i = 0; i < adapter->num_queues; i++) {
2607 que = &adapter->queues[i];
2608 tmp = E1000_EICR_RX_QUEUE0 << i;
2609 tmp |= E1000_EICR_TX_QUEUE0 << i;
2610 que->eims = tmp;
2611 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2612 i, que->eims);
2613 adapter->que_mask |= que->eims;
2614 }
2615
2616 /* Link */
2617 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2618 E1000_EIMS_OTHER);
2619 adapter->link_mask |= E1000_EIMS_OTHER;
2620 default:
2621 break;
2622 }
2623
2624 /* Set the starting interrupt rate */
2625 if (igb_max_interrupt_rate > 0)
2626 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2627
2628 if (hw->mac.type == e1000_82575)
2629 newitr |= newitr << 16;
2630 else
2631 newitr |= E1000_EITR_CNT_IGNR;
2632
2633 for (int i = 0; i < adapter->num_queues; i++) {
2634 que = &adapter->queues[i];
2635 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2636 }
2637
2638 return;
2639 }
2640
2641
2642 static void
2643 igb_free_pci_resources(struct adapter *adapter)
2644 {
2645 struct igb_queue *que = adapter->queues;
2646 device_t dev = adapter->dev;
2647 int rid;
2648
2649 /*
2650 ** There is a slight possibility of a failure mode
2651 ** in attach that will result in entering this function
2652 ** before interrupt resources have been initialized, and
2653 ** in that case we do not want to execute the loops below
2654 ** We can detect this reliably by the state of the adapter
2655 ** res pointer.
2656 */
2657 if (adapter->res == NULL)
2658 goto mem;
2659
2660 /*
2661 * First release all the interrupt resources:
2662 */
2663 for (int i = 0; i < adapter->num_queues; i++, que++) {
2664 rid = que->msix + 1;
2665 if (que->tag != NULL) {
2666 bus_teardown_intr(dev, que->res, que->tag);
2667 que->tag = NULL;
2668 }
2669 if (que->res != NULL)
2670 bus_release_resource(dev,
2671 SYS_RES_IRQ, rid, que->res);
2672 }
2673
2674 /* Clean the Legacy or Link interrupt last */
2675 if (adapter->linkvec) /* we are doing MSIX */
2676 rid = adapter->linkvec + 1;
2677 else
2678 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2679
2680 que = adapter->queues;
2681 if (adapter->tag != NULL) {
2682 taskqueue_drain(que->tq, &adapter->link_task);
2683 bus_teardown_intr(dev, adapter->res, adapter->tag);
2684 adapter->tag = NULL;
2685 }
2686 if (adapter->res != NULL)
2687 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2688
2689 for (int i = 0; i < adapter->num_queues; i++, que++) {
2690 if (que->tq != NULL) {
2691 #ifndef IGB_LEGACY_TX
2692 taskqueue_drain(que->tq, &que->txr->txq_task);
2693 #endif
2694 taskqueue_drain(que->tq, &que->que_task);
2695 taskqueue_free(que->tq);
2696 }
2697 }
2698 mem:
2699 if (adapter->msix)
2700 pci_release_msi(dev);
2701
2702 if (adapter->msix_mem != NULL)
2703 bus_release_resource(dev, SYS_RES_MEMORY,
2704 adapter->memrid, adapter->msix_mem);
2705
2706 if (adapter->pci_mem != NULL)
2707 bus_release_resource(dev, SYS_RES_MEMORY,
2708 PCIR_BAR(0), adapter->pci_mem);
2709
2710 }
2711
2712 /*
2713 * Setup Either MSI/X or MSI
2714 */
2715 static int
2716 igb_setup_msix(struct adapter *adapter)
2717 {
2718 device_t dev = adapter->dev;
2719 int bar, want, queues, msgs, maxqueues;
2720
2721 /* tuneable override */
2722 if (igb_enable_msix == 0)
2723 goto msi;
2724
2725 /* First try MSI/X */
2726 msgs = pci_msix_count(dev);
2727 if (msgs == 0)
2728 goto msi;
2729 /*
2730 ** Some new devices, as with ixgbe, now may
2731 ** use a different BAR, so we need to keep
2732 ** track of which is used.
2733 */
2734 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2735 bar = pci_read_config(dev, adapter->memrid, 4);
2736 if (bar == 0) /* use next bar */
2737 adapter->memrid += 4;
2738 adapter->msix_mem = bus_alloc_resource_any(dev,
2739 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2740 if (adapter->msix_mem == NULL) {
2741 /* May not be enabled */
2742 device_printf(adapter->dev,
2743 "Unable to map MSIX table \n");
2744 goto msi;
2745 }
2746
2747 /* Figure out a reasonable auto config value */
2748 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2749
2750 /* Manual override */
2751 if (igb_num_queues != 0)
2752 queues = igb_num_queues;
2753
2754 /* Sanity check based on HW */
2755 switch (adapter->hw.mac.type) {
2756 case e1000_82575:
2757 maxqueues = 4;
2758 break;
2759 case e1000_82576:
2760 case e1000_82580:
2761 case e1000_i350:
2762 case e1000_i354:
2763 maxqueues = 8;
2764 break;
2765 case e1000_i210:
2766 maxqueues = 4;
2767 break;
2768 case e1000_i211:
2769 maxqueues = 2;
2770 break;
2771 default: /* VF interfaces */
2772 maxqueues = 1;
2773 break;
2774 }
2775 if (queues > maxqueues)
2776 queues = maxqueues;
2777
2778 /* Manual override */
2779 if (igb_num_queues != 0)
2780 queues = igb_num_queues;
2781
2782 /*
2783 ** One vector (RX/TX pair) per queue
2784 ** plus an additional for Link interrupt
2785 */
2786 want = queues + 1;
2787 if (msgs >= want)
2788 msgs = want;
2789 else {
2790 device_printf(adapter->dev,
2791 "MSIX Configuration Problem, "
2792 "%d vectors configured, but %d queues wanted!\n",
2793 msgs, want);
2794 goto msi;
2795 }
2796 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2797 device_printf(adapter->dev,
2798 "Using MSIX interrupts with %d vectors\n", msgs);
2799 adapter->num_queues = queues;
2800 return (msgs);
2801 }
2802 /*
2803 ** If MSIX alloc failed or provided us with
2804 ** less than needed, free and fall through to MSI
2805 */
2806 pci_release_msi(dev);
2807
2808 msi:
2809 if (adapter->msix_mem != NULL) {
2810 bus_release_resource(dev, SYS_RES_MEMORY,
2811 adapter->memrid, adapter->msix_mem);
2812 adapter->msix_mem = NULL;
2813 }
2814 msgs = 1;
2815 if (pci_alloc_msi(dev, &msgs) == 0) {
2816 device_printf(adapter->dev," Using an MSI interrupt\n");
2817 return (msgs);
2818 }
2819 device_printf(adapter->dev," Using a Legacy interrupt\n");
2820 return (0);
2821 }
2822
2823 /*********************************************************************
2824 *
2825 * Initialize the DMA Coalescing feature
2826 *
2827 **********************************************************************/
2828 static void
2829 igb_init_dmac(struct adapter *adapter, u32 pba)
2830 {
2831 device_t dev = adapter->dev;
2832 struct e1000_hw *hw = &adapter->hw;
2833 u32 dmac, reg = ~E1000_DMACR_DMAC_EN;
2834 u16 hwm;
2835
2836 if (hw->mac.type == e1000_i211)
2837 return;
2838
2839 if (hw->mac.type > e1000_82580) {
2840
2841 if (adapter->dmac == 0) { /* Disabling it */
2842 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2843 return;
2844 } else
2845 device_printf(dev, "DMA Coalescing enabled\n");
2846
2847 /* Set starting threshold */
2848 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2849
2850 hwm = 64 * pba - adapter->max_frame_size / 16;
2851 if (hwm < 64 * (pba - 6))
2852 hwm = 64 * (pba - 6);
2853 reg = E1000_READ_REG(hw, E1000_FCRTC);
2854 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2855 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2856 & E1000_FCRTC_RTH_COAL_MASK);
2857 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2858
2859
2860 dmac = pba - adapter->max_frame_size / 512;
2861 if (dmac < pba - 10)
2862 dmac = pba - 10;
2863 reg = E1000_READ_REG(hw, E1000_DMACR);
2864 reg &= ~E1000_DMACR_DMACTHR_MASK;
2865 reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2866 & E1000_DMACR_DMACTHR_MASK);
2867
2868 /* transition to L0x or L1 if available..*/
2869 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2870
2871 /* Check if status is 2.5Gb backplane connection
2872 * before configuration of watchdog timer, which is
2873 * in msec values in 12.8usec intervals
2874 * watchdog timer= msec values in 32usec intervals
2875 * for non 2.5Gb connection
2876 */
2877 if (hw->mac.type == e1000_i354) {
2878 int status = E1000_READ_REG(hw, E1000_STATUS);
2879 if ((status & E1000_STATUS_2P5_SKU) &&
2880 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2881 reg |= ((adapter->dmac * 5) >> 6);
2882 else
2883 reg |= (adapter->dmac >> 5);
2884 } else {
2885 reg |= (adapter->dmac >> 5);
2886 }
2887
2888 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2889
2890 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2891
2892 /* Set the interval before transition */
2893 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2894 if (hw->mac.type == e1000_i350)
2895 reg |= IGB_DMCTLX_DCFLUSH_DIS;
2896 /*
2897 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2898 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2899 */
2900 if (hw->mac.type == e1000_i354) {
2901 int status = E1000_READ_REG(hw, E1000_STATUS);
2902 if ((status & E1000_STATUS_2P5_SKU) &&
2903 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2904 reg |= 0xA;
2905 else
2906 reg |= 0x4;
2907 } else {
2908 reg |= 0x4;
2909 }
2910
2911 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2912
2913 /* free space in tx packet buffer to wake from DMA coal */
2914 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2915 (2 * adapter->max_frame_size)) >> 6);
2916
2917 /* make low power state decision controlled by DMA coal */
2918 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2919 reg &= ~E1000_PCIEMISC_LX_DECISION;
2920 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2921
2922 } else if (hw->mac.type == e1000_82580) {
2923 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2924 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2925 reg & ~E1000_PCIEMISC_LX_DECISION);
2926 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2927 }
2928 }
2929
2930
2931 /*********************************************************************
2932 *
2933 * Set up an fresh starting state
2934 *
2935 **********************************************************************/
2936 static void
2937 igb_reset(struct adapter *adapter)
2938 {
2939 device_t dev = adapter->dev;
2940 struct e1000_hw *hw = &adapter->hw;
2941 struct e1000_fc_info *fc = &hw->fc;
2942 struct ifnet *ifp = adapter->ifp;
2943 u32 pba = 0;
2944 u16 hwm;
2945
2946 INIT_DEBUGOUT("igb_reset: begin");
2947
2948 /* Let the firmware know the OS is in control */
2949 igb_get_hw_control(adapter);
2950
2951 /*
2952 * Packet Buffer Allocation (PBA)
2953 * Writing PBA sets the receive portion of the buffer
2954 * the remainder is used for the transmit buffer.
2955 */
2956 switch (hw->mac.type) {
2957 case e1000_82575:
2958 pba = E1000_PBA_32K;
2959 break;
2960 case e1000_82576:
2961 case e1000_vfadapt:
2962 pba = E1000_READ_REG(hw, E1000_RXPBS);
2963 pba &= E1000_RXPBS_SIZE_MASK_82576;
2964 break;
2965 case e1000_82580:
2966 case e1000_i350:
2967 case e1000_i354:
2968 case e1000_vfadapt_i350:
2969 pba = E1000_READ_REG(hw, E1000_RXPBS);
2970 pba = e1000_rxpbs_adjust_82580(pba);
2971 break;
2972 case e1000_i210:
2973 case e1000_i211:
2974 pba = E1000_PBA_34K;
2975 default:
2976 break;
2977 }
2978
2979 /* Special needs in case of Jumbo frames */
2980 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2981 u32 tx_space, min_tx, min_rx;
2982 pba = E1000_READ_REG(hw, E1000_PBA);
2983 tx_space = pba >> 16;
2984 pba &= 0xffff;
2985 min_tx = (adapter->max_frame_size +
2986 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2987 min_tx = roundup2(min_tx, 1024);
2988 min_tx >>= 10;
2989 min_rx = adapter->max_frame_size;
2990 min_rx = roundup2(min_rx, 1024);
2991 min_rx >>= 10;
2992 if (tx_space < min_tx &&
2993 ((min_tx - tx_space) < pba)) {
2994 pba = pba - (min_tx - tx_space);
2995 /*
2996 * if short on rx space, rx wins
2997 * and must trump tx adjustment
2998 */
2999 if (pba < min_rx)
3000 pba = min_rx;
3001 }
3002 E1000_WRITE_REG(hw, E1000_PBA, pba);
3003 }
3004
3005 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3006
3007 /*
3008 * These parameters control the automatic generation (Tx) and
3009 * response (Rx) to Ethernet PAUSE frames.
3010 * - High water mark should allow for at least two frames to be
3011 * received after sending an XOFF.
3012 * - Low water mark works best when it is very near the high water mark.
3013 * This allows the receiver to restart by sending XON when it has
3014 * drained a bit.
3015 */
3016 hwm = min(((pba << 10) * 9 / 10),
3017 ((pba << 10) - 2 * adapter->max_frame_size));
3018
3019 if (hw->mac.type < e1000_82576) {
3020 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
3021 fc->low_water = fc->high_water - 8;
3022 } else {
3023 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
3024 fc->low_water = fc->high_water - 16;
3025 }
3026
3027 fc->pause_time = IGB_FC_PAUSE_TIME;
3028 fc->send_xon = TRUE;
3029 if (adapter->fc)
3030 fc->requested_mode = adapter->fc;
3031 else
3032 fc->requested_mode = e1000_fc_default;
3033
3034 /* Issue a global reset */
3035 e1000_reset_hw(hw);
3036 E1000_WRITE_REG(hw, E1000_WUFC, 0);
3037
3038 /* Reset for AutoMediaDetect */
3039 if (adapter->flags & IGB_MEDIA_RESET) {
3040 e1000_setup_init_funcs(hw, TRUE);
3041 e1000_get_bus_info(hw);
3042 adapter->flags &= ~IGB_MEDIA_RESET;
3043 }
3044
3045 if (e1000_init_hw(hw) < 0)
3046 device_printf(dev, "Hardware Initialization Failed\n");
3047
3048 /* Setup DMA Coalescing */
3049 igb_init_dmac(adapter, pba);
3050
3051 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3052 e1000_get_phy_info(hw);
3053 e1000_check_for_link(hw);
3054 return;
3055 }
3056
3057 /*********************************************************************
3058 *
3059 * Setup networking device structure and register an interface.
3060 *
3061 **********************************************************************/
3062 static int
3063 igb_setup_interface(device_t dev, struct adapter *adapter)
3064 {
3065 struct ifnet *ifp;
3066
3067 INIT_DEBUGOUT("igb_setup_interface: begin");
3068
3069 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3070 if (ifp == NULL) {
3071 device_printf(dev, "can not allocate ifnet structure\n");
3072 return (-1);
3073 }
3074 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3075 ifp->if_init = igb_init;
3076 ifp->if_softc = adapter;
3077 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3078 ifp->if_ioctl = igb_ioctl;
3079
3080 /* TSO parameters */
3081 ifp->if_hw_tsomax = IP_MAXPACKET;
3082 ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3083 ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3084
3085 #ifndef IGB_LEGACY_TX
3086 ifp->if_transmit = igb_mq_start;
3087 ifp->if_qflush = igb_qflush;
3088 #else
3089 ifp->if_start = igb_start;
3090 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3091 ifp->if_snd.ifq_drv_maxlen = 0;
3092 IFQ_SET_READY(&ifp->if_snd);
3093 #endif
3094
3095 ether_ifattach(ifp, adapter->hw.mac.addr);
3096
3097 ifp->if_capabilities = ifp->if_capenable = 0;
3098
3099 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3100 #if __FreeBSD_version >= 1000000
3101 ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3102 #endif
3103 ifp->if_capabilities |= IFCAP_TSO;
3104 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3105 ifp->if_capenable = ifp->if_capabilities;
3106
3107 /* Don't enable LRO by default */
3108 ifp->if_capabilities |= IFCAP_LRO;
3109
3110 #ifdef DEVICE_POLLING
3111 ifp->if_capabilities |= IFCAP_POLLING;
3112 #endif
3113
3114 /*
3115 * Tell the upper layer(s) we
3116 * support full VLAN capability.
3117 */
3118 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3119 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3120 | IFCAP_VLAN_HWTSO
3121 | IFCAP_VLAN_MTU;
3122 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3123 | IFCAP_VLAN_HWTSO
3124 | IFCAP_VLAN_MTU;
3125
3126 /*
3127 * Enable only WOL MAGIC by default if WOL is enabled in EEPROM.
3128 */
3129 ifp->if_capabilities |= IFCAP_WOL;
3130 if (adapter->wol)
3131 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3132
3133 /*
3134 ** Don't turn this on by default, if vlans are
3135 ** created on another pseudo device (eg. lagg)
3136 ** then vlan events are not passed thru, breaking
3137 ** operation, but with HW FILTER off it works. If
3138 ** using vlans directly on the igb driver you can
3139 ** enable this and get full hardware tag filtering.
3140 */
3141 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3142
3143 /*
3144 * Specify the media types supported by this adapter and register
3145 * callbacks to update media and link information
3146 */
3147 ifmedia_init(&adapter->media, IFM_IMASK,
3148 igb_media_change, igb_media_status);
3149 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3150 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3151 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3152 0, NULL);
3153 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3154 } else {
3155 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3156 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3157 0, NULL);
3158 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3159 0, NULL);
3160 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3161 0, NULL);
3162 if (adapter->hw.phy.type != e1000_phy_ife) {
3163 ifmedia_add(&adapter->media,
3164 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3165 ifmedia_add(&adapter->media,
3166 IFM_ETHER | IFM_1000_T, 0, NULL);
3167 }
3168 }
3169 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3170 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3171 return (0);
3172 }
3173
3174
3175 /*
3176 * Manage DMA'able memory.
3177 */
3178 static void
3179 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3180 {
3181 if (error)
3182 return;
3183 *(bus_addr_t *) arg = segs[0].ds_addr;
3184 }
3185
3186 static int
3187 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3188 struct igb_dma_alloc *dma, int mapflags)
3189 {
3190 int error;
3191
3192 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3193 IGB_DBA_ALIGN, 0, /* alignment, bounds */
3194 BUS_SPACE_MAXADDR, /* lowaddr */
3195 BUS_SPACE_MAXADDR, /* highaddr */
3196 NULL, NULL, /* filter, filterarg */
3197 size, /* maxsize */
3198 1, /* nsegments */
3199 size, /* maxsegsize */
3200 0, /* flags */
3201 NULL, /* lockfunc */
3202 NULL, /* lockarg */
3203 &dma->dma_tag);
3204 if (error) {
3205 device_printf(adapter->dev,
3206 "%s: bus_dma_tag_create failed: %d\n",
3207 __func__, error);
3208 goto fail_0;
3209 }
3210
3211 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3212 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3213 if (error) {
3214 device_printf(adapter->dev,
3215 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3216 __func__, (uintmax_t)size, error);
3217 goto fail_2;
3218 }
3219
3220 dma->dma_paddr = 0;
3221 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3222 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3223 if (error || dma->dma_paddr == 0) {
3224 device_printf(adapter->dev,
3225 "%s: bus_dmamap_load failed: %d\n",
3226 __func__, error);
3227 goto fail_3;
3228 }
3229
3230 return (0);
3231
3232 fail_3:
3233 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3234 fail_2:
3235 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3236 bus_dma_tag_destroy(dma->dma_tag);
3237 fail_0:
3238 dma->dma_tag = NULL;
3239
3240 return (error);
3241 }
3242
3243 static void
3244 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3245 {
3246 if (dma->dma_tag == NULL)
3247 return;
3248 if (dma->dma_paddr != 0) {
3249 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3250 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3251 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3252 dma->dma_paddr = 0;
3253 }
3254 if (dma->dma_vaddr != NULL) {
3255 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3256 dma->dma_vaddr = NULL;
3257 }
3258 bus_dma_tag_destroy(dma->dma_tag);
3259 dma->dma_tag = NULL;
3260 }
3261
3262
3263 /*********************************************************************
3264 *
3265 * Allocate memory for the transmit and receive rings, and then
3266 * the descriptors associated with each, called only once at attach.
3267 *
3268 **********************************************************************/
3269 static int
3270 igb_allocate_queues(struct adapter *adapter)
3271 {
3272 device_t dev = adapter->dev;
3273 struct igb_queue *que = NULL;
3274 struct tx_ring *txr = NULL;
3275 struct rx_ring *rxr = NULL;
3276 int rsize, tsize, error = E1000_SUCCESS;
3277 int txconf = 0, rxconf = 0;
3278
3279 /* First allocate the top level queue structs */
3280 if (!(adapter->queues =
3281 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3282 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3283 device_printf(dev, "Unable to allocate queue memory\n");
3284 error = ENOMEM;
3285 goto fail;
3286 }
3287
3288 /* Next allocate the TX ring struct memory */
3289 if (!(adapter->tx_rings =
3290 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3291 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3292 device_printf(dev, "Unable to allocate TX ring memory\n");
3293 error = ENOMEM;
3294 goto tx_fail;
3295 }
3296
3297 /* Now allocate the RX */
3298 if (!(adapter->rx_rings =
3299 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3300 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3301 device_printf(dev, "Unable to allocate RX ring memory\n");
3302 error = ENOMEM;
3303 goto rx_fail;
3304 }
3305
3306 tsize = roundup2(adapter->num_tx_desc *
3307 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3308 /*
3309 * Now set up the TX queues, txconf is needed to handle the
3310 * possibility that things fail midcourse and we need to
3311 * undo memory gracefully
3312 */
3313 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3314 /* Set up some basics */
3315 txr = &adapter->tx_rings[i];
3316 txr->adapter = adapter;
3317 txr->me = i;
3318 txr->num_desc = adapter->num_tx_desc;
3319
3320 /* Initialize the TX lock */
3321 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3322 device_get_nameunit(dev), txr->me);
3323 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3324
3325 if (igb_dma_malloc(adapter, tsize,
3326 &txr->txdma, BUS_DMA_NOWAIT)) {
3327 device_printf(dev,
3328 "Unable to allocate TX Descriptor memory\n");
3329 error = ENOMEM;
3330 goto err_tx_desc;
3331 }
3332 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3333 bzero((void *)txr->tx_base, tsize);
3334
3335 /* Now allocate transmit buffers for the ring */
3336 if (igb_allocate_transmit_buffers(txr)) {
3337 device_printf(dev,
3338 "Critical Failure setting up transmit buffers\n");
3339 error = ENOMEM;
3340 goto err_tx_desc;
3341 }
3342 #ifndef IGB_LEGACY_TX
3343 /* Allocate a buf ring */
3344 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3345 M_WAITOK, &txr->tx_mtx);
3346 #endif
3347 }
3348
3349 /*
3350 * Next the RX queues...
3351 */
3352 rsize = roundup2(adapter->num_rx_desc *
3353 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3354 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3355 rxr = &adapter->rx_rings[i];
3356 rxr->adapter = adapter;
3357 rxr->me = i;
3358
3359 /* Initialize the RX lock */
3360 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3361 device_get_nameunit(dev), txr->me);
3362 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3363
3364 if (igb_dma_malloc(adapter, rsize,
3365 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3366 device_printf(dev,
3367 "Unable to allocate RxDescriptor memory\n");
3368 error = ENOMEM;
3369 goto err_rx_desc;
3370 }
3371 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3372 bzero((void *)rxr->rx_base, rsize);
3373
3374 /* Allocate receive buffers for the ring*/
3375 if (igb_allocate_receive_buffers(rxr)) {
3376 device_printf(dev,
3377 "Critical Failure setting up receive buffers\n");
3378 error = ENOMEM;
3379 goto err_rx_desc;
3380 }
3381 }
3382
3383 /*
3384 ** Finally set up the queue holding structs
3385 */
3386 for (int i = 0; i < adapter->num_queues; i++) {
3387 que = &adapter->queues[i];
3388 que->adapter = adapter;
3389 que->txr = &adapter->tx_rings[i];
3390 que->rxr = &adapter->rx_rings[i];
3391 }
3392
3393 return (0);
3394
3395 err_rx_desc:
3396 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3397 igb_dma_free(adapter, &rxr->rxdma);
3398 err_tx_desc:
3399 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3400 igb_dma_free(adapter, &txr->txdma);
3401 free(adapter->rx_rings, M_DEVBUF);
3402 rx_fail:
3403 #ifndef IGB_LEGACY_TX
3404 buf_ring_free(txr->br, M_DEVBUF);
3405 #endif
3406 free(adapter->tx_rings, M_DEVBUF);
3407 tx_fail:
3408 free(adapter->queues, M_DEVBUF);
3409 fail:
3410 return (error);
3411 }
3412
3413 /*********************************************************************
3414 *
3415 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3416 * the information needed to transmit a packet on the wire. This is
3417 * called only once at attach, setup is done every reset.
3418 *
3419 **********************************************************************/
3420 static int
3421 igb_allocate_transmit_buffers(struct tx_ring *txr)
3422 {
3423 struct adapter *adapter = txr->adapter;
3424 device_t dev = adapter->dev;
3425 struct igb_tx_buf *txbuf;
3426 int error, i;
3427
3428 /*
3429 * Setup DMA descriptor areas.
3430 */
3431 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3432 1, 0, /* alignment, bounds */
3433 BUS_SPACE_MAXADDR, /* lowaddr */
3434 BUS_SPACE_MAXADDR, /* highaddr */
3435 NULL, NULL, /* filter, filterarg */
3436 IGB_TSO_SIZE, /* maxsize */
3437 IGB_MAX_SCATTER, /* nsegments */
3438 PAGE_SIZE, /* maxsegsize */
3439 0, /* flags */
3440 NULL, /* lockfunc */
3441 NULL, /* lockfuncarg */
3442 &txr->txtag))) {
3443 device_printf(dev,"Unable to allocate TX DMA tag\n");
3444 goto fail;
3445 }
3446
3447 if (!(txr->tx_buffers =
3448 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3449 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3450 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3451 error = ENOMEM;
3452 goto fail;
3453 }
3454
3455 /* Create the descriptor buffer dma maps */
3456 txbuf = txr->tx_buffers;
3457 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3458 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3459 if (error != 0) {
3460 device_printf(dev, "Unable to create TX DMA map\n");
3461 goto fail;
3462 }
3463 }
3464
3465 return 0;
3466 fail:
3467 /* We free all, it handles case where we are in the middle */
3468 igb_free_transmit_structures(adapter);
3469 return (error);
3470 }
3471
3472 /*********************************************************************
3473 *
3474 * Initialize a transmit ring.
3475 *
3476 **********************************************************************/
3477 static void
3478 igb_setup_transmit_ring(struct tx_ring *txr)
3479 {
3480 struct adapter *adapter = txr->adapter;
3481 struct igb_tx_buf *txbuf;
3482 int i;
3483 #ifdef DEV_NETMAP
3484 struct netmap_adapter *na = NA(adapter->ifp);
3485 struct netmap_slot *slot;
3486 #endif /* DEV_NETMAP */
3487
3488 /* Clear the old descriptor contents */
3489 IGB_TX_LOCK(txr);
3490 #ifdef DEV_NETMAP
3491 slot = netmap_reset(na, NR_TX, txr->me, 0);
3492 #endif /* DEV_NETMAP */
3493 bzero((void *)txr->tx_base,
3494 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3495 /* Reset indices */
3496 txr->next_avail_desc = 0;
3497 txr->next_to_clean = 0;
3498
3499 /* Free any existing tx buffers. */
3500 txbuf = txr->tx_buffers;
3501 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3502 if (txbuf->m_head != NULL) {
3503 bus_dmamap_sync(txr->txtag, txbuf->map,
3504 BUS_DMASYNC_POSTWRITE);
3505 bus_dmamap_unload(txr->txtag, txbuf->map);
3506 m_freem(txbuf->m_head);
3507 txbuf->m_head = NULL;
3508 }
3509 #ifdef DEV_NETMAP
3510 if (slot) {
3511 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3512 /* no need to set the address */
3513 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3514 }
3515 #endif /* DEV_NETMAP */
3516 /* clear the watch index */
3517 txbuf->eop = NULL;
3518 }
3519
3520 /* Set number of descriptors available */
3521 txr->tx_avail = adapter->num_tx_desc;
3522
3523 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3524 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3525 IGB_TX_UNLOCK(txr);
3526 }
3527
3528 /*********************************************************************
3529 *
3530 * Initialize all transmit rings.
3531 *
3532 **********************************************************************/
3533 static void
3534 igb_setup_transmit_structures(struct adapter *adapter)
3535 {
3536 struct tx_ring *txr = adapter->tx_rings;
3537
3538 for (int i = 0; i < adapter->num_queues; i++, txr++)
3539 igb_setup_transmit_ring(txr);
3540
3541 return;
3542 }
3543
3544 /*********************************************************************
3545 *
3546 * Enable transmit unit.
3547 *
3548 **********************************************************************/
3549 static void
3550 igb_initialize_transmit_units(struct adapter *adapter)
3551 {
3552 struct tx_ring *txr = adapter->tx_rings;
3553 struct e1000_hw *hw = &adapter->hw;
3554 u32 tctl, txdctl;
3555
3556 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3557 tctl = txdctl = 0;
3558
3559 /* Setup the Tx Descriptor Rings */
3560 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3561 u64 bus_addr = txr->txdma.dma_paddr;
3562
3563 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3564 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3565 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3566 (uint32_t)(bus_addr >> 32));
3567 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3568 (uint32_t)bus_addr);
3569
3570 /* Setup the HW Tx Head and Tail descriptor pointers */
3571 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3572 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3573
3574 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3575 E1000_READ_REG(hw, E1000_TDBAL(i)),
3576 E1000_READ_REG(hw, E1000_TDLEN(i)));
3577
3578 txr->queue_status = IGB_QUEUE_IDLE;
3579
3580 txdctl |= IGB_TX_PTHRESH;
3581 txdctl |= IGB_TX_HTHRESH << 8;
3582 txdctl |= IGB_TX_WTHRESH << 16;
3583 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3584 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3585 }
3586
3587 if (adapter->vf_ifp)
3588 return;
3589
3590 e1000_config_collision_dist(hw);
3591
3592 /* Program the Transmit Control Register */
3593 tctl = E1000_READ_REG(hw, E1000_TCTL);
3594 tctl &= ~E1000_TCTL_CT;
3595 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3596 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3597
3598 /* This write will effectively turn on the transmit unit. */
3599 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3600 }
3601
3602 /*********************************************************************
3603 *
3604 * Free all transmit rings.
3605 *
3606 **********************************************************************/
3607 static void
3608 igb_free_transmit_structures(struct adapter *adapter)
3609 {
3610 struct tx_ring *txr = adapter->tx_rings;
3611
3612 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3613 IGB_TX_LOCK(txr);
3614 igb_free_transmit_buffers(txr);
3615 igb_dma_free(adapter, &txr->txdma);
3616 IGB_TX_UNLOCK(txr);
3617 IGB_TX_LOCK_DESTROY(txr);
3618 }
3619 free(adapter->tx_rings, M_DEVBUF);
3620 }
3621
3622 /*********************************************************************
3623 *
3624 * Free transmit ring related data structures.
3625 *
3626 **********************************************************************/
3627 static void
3628 igb_free_transmit_buffers(struct tx_ring *txr)
3629 {
3630 struct adapter *adapter = txr->adapter;
3631 struct igb_tx_buf *tx_buffer;
3632 int i;
3633
3634 INIT_DEBUGOUT("free_transmit_ring: begin");
3635
3636 if (txr->tx_buffers == NULL)
3637 return;
3638
3639 tx_buffer = txr->tx_buffers;
3640 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3641 if (tx_buffer->m_head != NULL) {
3642 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3643 BUS_DMASYNC_POSTWRITE);
3644 bus_dmamap_unload(txr->txtag,
3645 tx_buffer->map);
3646 m_freem(tx_buffer->m_head);
3647 tx_buffer->m_head = NULL;
3648 if (tx_buffer->map != NULL) {
3649 bus_dmamap_destroy(txr->txtag,
3650 tx_buffer->map);
3651 tx_buffer->map = NULL;
3652 }
3653 } else if (tx_buffer->map != NULL) {
3654 bus_dmamap_unload(txr->txtag,
3655 tx_buffer->map);
3656 bus_dmamap_destroy(txr->txtag,
3657 tx_buffer->map);
3658 tx_buffer->map = NULL;
3659 }
3660 }
3661 #ifndef IGB_LEGACY_TX
3662 if (txr->br != NULL)
3663 buf_ring_free(txr->br, M_DEVBUF);
3664 #endif
3665 if (txr->tx_buffers != NULL) {
3666 free(txr->tx_buffers, M_DEVBUF);
3667 txr->tx_buffers = NULL;
3668 }
3669 if (txr->txtag != NULL) {
3670 bus_dma_tag_destroy(txr->txtag);
3671 txr->txtag = NULL;
3672 }
3673 return;
3674 }
3675
3676 /**********************************************************************
3677 *
3678 * Setup work for hardware segmentation offload (TSO) on
3679 * adapters using advanced tx descriptors
3680 *
3681 **********************************************************************/
3682 static int
3683 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3684 u32 *cmd_type_len, u32 *olinfo_status)
3685 {
3686 struct adapter *adapter = txr->adapter;
3687 struct e1000_adv_tx_context_desc *TXD;
3688 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3689 u32 mss_l4len_idx = 0, paylen;
3690 u16 vtag = 0, eh_type;
3691 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3692 struct ether_vlan_header *eh;
3693 #ifdef INET6
3694 struct ip6_hdr *ip6;
3695 #endif
3696 #ifdef INET
3697 struct ip *ip;
3698 #endif
3699 struct tcphdr *th;
3700
3701
3702 /*
3703 * Determine where frame payload starts.
3704 * Jump over vlan headers if already present
3705 */
3706 eh = mtod(mp, struct ether_vlan_header *);
3707 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3708 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3709 eh_type = eh->evl_proto;
3710 } else {
3711 ehdrlen = ETHER_HDR_LEN;
3712 eh_type = eh->evl_encap_proto;
3713 }
3714
3715 switch (ntohs(eh_type)) {
3716 #ifdef INET6
3717 case ETHERTYPE_IPV6:
3718 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3719 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3720 if (ip6->ip6_nxt != IPPROTO_TCP)
3721 return (ENXIO);
3722 ip_hlen = sizeof(struct ip6_hdr);
3723 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3724 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3725 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3726 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3727 break;
3728 #endif
3729 #ifdef INET
3730 case ETHERTYPE_IP:
3731 ip = (struct ip *)(mp->m_data + ehdrlen);
3732 if (ip->ip_p != IPPROTO_TCP)
3733 return (ENXIO);
3734 ip->ip_sum = 0;
3735 ip_hlen = ip->ip_hl << 2;
3736 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3737 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3738 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3739 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3740 /* Tell transmit desc to also do IPv4 checksum. */
3741 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3742 break;
3743 #endif
3744 default:
3745 device_printf(adapter->dev,
3746 "CSUM_TSO but no supported IP version (0x%04x)",
3747 ntohs(eh_type));
3748 return (ENXIO);
3749 }
3750
3751 ctxd = txr->next_avail_desc;
3752 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3753
3754 tcp_hlen = th->th_off << 2;
3755
3756 /* This is used in the transmit desc in encap */
3757 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3758
3759 /* VLAN MACLEN IPLEN */
3760 if (mp->m_flags & M_VLANTAG) {
3761 vtag = htole16(mp->m_pkthdr.ether_vtag);
3762 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3763 }
3764
3765 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3766 vlan_macip_lens |= ip_hlen;
3767 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3768
3769 /* ADV DTYPE TUCMD */
3770 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3771 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3772 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3773
3774 /* MSS L4LEN IDX */
3775 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3776 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3777 /* 82575 needs the queue index added */
3778 if (adapter->hw.mac.type == e1000_82575)
3779 mss_l4len_idx |= txr->me << 4;
3780 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3781
3782 TXD->seqnum_seed = htole32(0);
3783
3784 if (++ctxd == txr->num_desc)
3785 ctxd = 0;
3786
3787 txr->tx_avail--;
3788 txr->next_avail_desc = ctxd;
3789 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3790 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3791 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3792 ++txr->tso_tx;
3793 return (0);
3794 }
3795
3796 /*********************************************************************
3797 *
3798 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3799 *
3800 **********************************************************************/
3801
3802 static int
3803 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3804 u32 *cmd_type_len, u32 *olinfo_status)
3805 {
3806 struct e1000_adv_tx_context_desc *TXD;
3807 struct adapter *adapter = txr->adapter;
3808 struct ether_vlan_header *eh;
3809 struct ip *ip;
3810 struct ip6_hdr *ip6;
3811 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3812 int ehdrlen, ip_hlen = 0;
3813 u16 etype;
3814 u8 ipproto = 0;
3815 int ctxd = txr->next_avail_desc;
3816 u16 vtag = 0;
3817
3818 /* First check if TSO is to be used */
3819 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3820 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3821
3822 /* Indicate the whole packet as payload when not doing TSO */
3823 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3824
3825 /* Now ready a context descriptor */
3826 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3827
3828 /*
3829 ** In advanced descriptors the vlan tag must
3830 ** be placed into the context descriptor. Hence
3831 ** we need to make one even if not doing offloads.
3832 */
3833 if (mp->m_flags & M_VLANTAG) {
3834 vtag = htole16(mp->m_pkthdr.ether_vtag);
3835 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3836 } else if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) {
3837 return (0);
3838 }
3839
3840 /*
3841 * Determine where frame payload starts.
3842 * Jump over vlan headers if already present,
3843 * helpful for QinQ too.
3844 */
3845 eh = mtod(mp, struct ether_vlan_header *);
3846 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3847 etype = ntohs(eh->evl_proto);
3848 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3849 } else {
3850 etype = ntohs(eh->evl_encap_proto);
3851 ehdrlen = ETHER_HDR_LEN;
3852 }
3853
3854 /* Set the ether header length */
3855 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3856
3857 switch (etype) {
3858 case ETHERTYPE_IP:
3859 ip = (struct ip *)(mp->m_data + ehdrlen);
3860 ip_hlen = ip->ip_hl << 2;
3861 ipproto = ip->ip_p;
3862 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3863 break;
3864 case ETHERTYPE_IPV6:
3865 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3866 ip_hlen = sizeof(struct ip6_hdr);
3867 /* XXX-BZ this will go badly in case of ext hdrs. */
3868 ipproto = ip6->ip6_nxt;
3869 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3870 break;
3871 default:
3872 break;
3873 }
3874
3875 vlan_macip_lens |= ip_hlen;
3876 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3877
3878 switch (ipproto) {
3879 case IPPROTO_TCP:
3880 #if __FreeBSD_version >= 1000000
3881 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
3882 #else
3883 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3884 #endif
3885 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3886 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3887 }
3888 break;
3889 case IPPROTO_UDP:
3890 #if __FreeBSD_version >= 1000000
3891 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
3892 #else
3893 if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3894 #endif
3895 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3896 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3897 }
3898 break;
3899
3900 #if __FreeBSD_version >= 800000
3901 case IPPROTO_SCTP:
3902 #if __FreeBSD_version >= 1000000
3903 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
3904 #else
3905 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) {
3906 #endif
3907 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3908 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3909 }
3910 break;
3911 #endif
3912 default:
3913 break;
3914 }
3915
3916 /* 82575 needs the queue index added */
3917 if (adapter->hw.mac.type == e1000_82575)
3918 mss_l4len_idx = txr->me << 4;
3919
3920 /* Now copy bits into descriptor */
3921 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3922 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3923 TXD->seqnum_seed = htole32(0);
3924 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3925
3926 /* We've consumed the first desc, adjust counters */
3927 if (++ctxd == txr->num_desc)
3928 ctxd = 0;
3929 txr->next_avail_desc = ctxd;
3930 --txr->tx_avail;
3931
3932 return (0);
3933 }
3934
3935 /**********************************************************************
3936 *
3937 * Examine each tx_buffer in the used queue. If the hardware is done
3938 * processing the packet then free associated resources. The
3939 * tx_buffer is put back on the free queue.
3940 *
3941 * TRUE return means there's work in the ring to clean, FALSE its empty.
3942 **********************************************************************/
3943 static bool
3944 igb_txeof(struct tx_ring *txr)
3945 {
3946 struct adapter *adapter = txr->adapter;
3947 struct ifnet *ifp = adapter->ifp;
3948 u32 work, processed = 0;
3949 int limit = adapter->tx_process_limit;
3950 struct igb_tx_buf *buf;
3951 union e1000_adv_tx_desc *txd;
3952
3953 mtx_assert(&txr->tx_mtx, MA_OWNED);
3954
3955 #ifdef DEV_NETMAP
3956 if (netmap_tx_irq(ifp, txr->me))
3957 return (FALSE);
3958 #endif /* DEV_NETMAP */
3959
3960 if (txr->tx_avail == txr->num_desc) {
3961 txr->queue_status = IGB_QUEUE_IDLE;
3962 return FALSE;
3963 }
3964
3965 /* Get work starting point */
3966 work = txr->next_to_clean;
3967 buf = &txr->tx_buffers[work];
3968 txd = &txr->tx_base[work];
3969 work -= txr->num_desc; /* The distance to ring end */
3970 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3971 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3972 do {
3973 union e1000_adv_tx_desc *eop = buf->eop;
3974 if (eop == NULL) /* No work */
3975 break;
3976
3977 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3978 break; /* I/O not complete */
3979
3980 if (buf->m_head) {
3981 txr->bytes +=
3982 buf->m_head->m_pkthdr.len;
3983 bus_dmamap_sync(txr->txtag,
3984 buf->map,
3985 BUS_DMASYNC_POSTWRITE);
3986 bus_dmamap_unload(txr->txtag,
3987 buf->map);
3988 m_freem(buf->m_head);
3989 buf->m_head = NULL;
3990 }
3991 buf->eop = NULL;
3992 ++txr->tx_avail;
3993
3994 /* We clean the range if multi segment */
3995 while (txd != eop) {
3996 ++txd;
3997 ++buf;
3998 ++work;
3999 /* wrap the ring? */
4000 if (__predict_false(!work)) {
4001 work -= txr->num_desc;
4002 buf = txr->tx_buffers;
4003 txd = txr->tx_base;
4004 }
4005 if (buf->m_head) {
4006 txr->bytes +=
4007 buf->m_head->m_pkthdr.len;
4008 bus_dmamap_sync(txr->txtag,
4009 buf->map,
4010 BUS_DMASYNC_POSTWRITE);
4011 bus_dmamap_unload(txr->txtag,
4012 buf->map);
4013 m_freem(buf->m_head);
4014 buf->m_head = NULL;
4015 }
4016 ++txr->tx_avail;
4017 buf->eop = NULL;
4018
4019 }
4020 ++txr->packets;
4021 ++processed;
4022 ++ifp->if_opackets;
4023 txr->watchdog_time = ticks;
4024
4025 /* Try the next packet */
4026 ++txd;
4027 ++buf;
4028 ++work;
4029 /* reset with a wrap */
4030 if (__predict_false(!work)) {
4031 work -= txr->num_desc;
4032 buf = txr->tx_buffers;
4033 txd = txr->tx_base;
4034 }
4035 prefetch(txd);
4036 } while (__predict_true(--limit));
4037
4038 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4039 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4040
4041 work += txr->num_desc;
4042 txr->next_to_clean = work;
4043
4044 /*
4045 ** Watchdog calculation, we know there's
4046 ** work outstanding or the first return
4047 ** would have been taken, so none processed
4048 ** for too long indicates a hang.
4049 */
4050 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4051 txr->queue_status |= IGB_QUEUE_HUNG;
4052
4053 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4054 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4055
4056 if (txr->tx_avail == txr->num_desc) {
4057 txr->queue_status = IGB_QUEUE_IDLE;
4058 return (FALSE);
4059 }
4060
4061 return (TRUE);
4062 }
4063
4064 /*********************************************************************
4065 *
4066 * Refresh mbuf buffers for RX descriptor rings
4067 * - now keeps its own state so discards due to resource
4068 * exhaustion are unnecessary, if an mbuf cannot be obtained
4069 * it just returns, keeping its placeholder, thus it can simply
4070 * be recalled to try again.
4071 *
4072 **********************************************************************/
4073 static void
4074 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4075 {
4076 struct adapter *adapter = rxr->adapter;
4077 bus_dma_segment_t hseg[1];
4078 bus_dma_segment_t pseg[1];
4079 struct igb_rx_buf *rxbuf;
4080 struct mbuf *mh, *mp;
4081 int i, j, nsegs, error;
4082 bool refreshed = FALSE;
4083
4084 i = j = rxr->next_to_refresh;
4085 /*
4086 ** Get one descriptor beyond
4087 ** our work mark to control
4088 ** the loop.
4089 */
4090 if (++j == adapter->num_rx_desc)
4091 j = 0;
4092
4093 while (j != limit) {
4094 rxbuf = &rxr->rx_buffers[i];
4095 /* No hdr mbuf used with header split off */
4096 if (rxr->hdr_split == FALSE)
4097 goto no_split;
4098 if (rxbuf->m_head == NULL) {
4099 mh = m_gethdr(M_NOWAIT, MT_DATA);
4100 if (mh == NULL)
4101 goto update;
4102 } else
4103 mh = rxbuf->m_head;
4104
4105 mh->m_pkthdr.len = mh->m_len = MHLEN;
4106 mh->m_len = MHLEN;
4107 mh->m_flags |= M_PKTHDR;
4108 /* Get the memory mapping */
4109 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4110 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4111 if (error != 0) {
4112 printf("Refresh mbufs: hdr dmamap load"
4113 " failure - %d\n", error);
4114 m_free(mh);
4115 rxbuf->m_head = NULL;
4116 goto update;
4117 }
4118 rxbuf->m_head = mh;
4119 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4120 BUS_DMASYNC_PREREAD);
4121 rxr->rx_base[i].read.hdr_addr =
4122 htole64(hseg[0].ds_addr);
4123 no_split:
4124 if (rxbuf->m_pack == NULL) {
4125 mp = m_getjcl(M_NOWAIT, MT_DATA,
4126 M_PKTHDR, adapter->rx_mbuf_sz);
4127 if (mp == NULL)
4128 goto update;
4129 } else
4130 mp = rxbuf->m_pack;
4131
4132 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4133 /* Get the memory mapping */
4134 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4135 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4136 if (error != 0) {
4137 printf("Refresh mbufs: payload dmamap load"
4138 " failure - %d\n", error);
4139 m_free(mp);
4140 rxbuf->m_pack = NULL;
4141 goto update;
4142 }
4143 rxbuf->m_pack = mp;
4144 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4145 BUS_DMASYNC_PREREAD);
4146 rxr->rx_base[i].read.pkt_addr =
4147 htole64(pseg[0].ds_addr);
4148 refreshed = TRUE; /* I feel wefreshed :) */
4149
4150 i = j; /* our next is precalculated */
4151 rxr->next_to_refresh = i;
4152 if (++j == adapter->num_rx_desc)
4153 j = 0;
4154 }
4155 update:
4156 if (refreshed) /* update tail */
4157 E1000_WRITE_REG(&adapter->hw,
4158 E1000_RDT(rxr->me), rxr->next_to_refresh);
4159 return;
4160 }
4161
4162
4163 /*********************************************************************
4164 *
4165 * Allocate memory for rx_buffer structures. Since we use one
4166 * rx_buffer per received packet, the maximum number of rx_buffer's
4167 * that we'll need is equal to the number of receive descriptors
4168 * that we've allocated.
4169 *
4170 **********************************************************************/
4171 static int
4172 igb_allocate_receive_buffers(struct rx_ring *rxr)
4173 {
4174 struct adapter *adapter = rxr->adapter;
4175 device_t dev = adapter->dev;
4176 struct igb_rx_buf *rxbuf;
4177 int i, bsize, error;
4178
4179 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4180 if (!(rxr->rx_buffers =
4181 (struct igb_rx_buf *) malloc(bsize,
4182 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4183 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4184 error = ENOMEM;
4185 goto fail;
4186 }
4187
4188 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4189 1, 0, /* alignment, bounds */
4190 BUS_SPACE_MAXADDR, /* lowaddr */
4191 BUS_SPACE_MAXADDR, /* highaddr */
4192 NULL, NULL, /* filter, filterarg */
4193 MSIZE, /* maxsize */
4194 1, /* nsegments */
4195 MSIZE, /* maxsegsize */
4196 0, /* flags */
4197 NULL, /* lockfunc */
4198 NULL, /* lockfuncarg */
4199 &rxr->htag))) {
4200 device_printf(dev, "Unable to create RX DMA tag\n");
4201 goto fail;
4202 }
4203
4204 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4205 1, 0, /* alignment, bounds */
4206 BUS_SPACE_MAXADDR, /* lowaddr */
4207 BUS_SPACE_MAXADDR, /* highaddr */
4208 NULL, NULL, /* filter, filterarg */
4209 MJUM9BYTES, /* maxsize */
4210 1, /* nsegments */
4211 MJUM9BYTES, /* maxsegsize */
4212 0, /* flags */
4213 NULL, /* lockfunc */
4214 NULL, /* lockfuncarg */
4215 &rxr->ptag))) {
4216 device_printf(dev, "Unable to create RX payload DMA tag\n");
4217 goto fail;
4218 }
4219
4220 for (i = 0; i < adapter->num_rx_desc; i++) {
4221 rxbuf = &rxr->rx_buffers[i];
4222 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4223 if (error) {
4224 device_printf(dev,
4225 "Unable to create RX head DMA maps\n");
4226 goto fail;
4227 }
4228 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4229 if (error) {
4230 device_printf(dev,
4231 "Unable to create RX packet DMA maps\n");
4232 goto fail;
4233 }
4234 }
4235
4236 return (0);
4237
4238 fail:
4239 /* Frees all, but can handle partial completion */
4240 igb_free_receive_structures(adapter);
4241 return (error);
4242 }
4243
4244
4245 static void
4246 igb_free_receive_ring(struct rx_ring *rxr)
4247 {
4248 struct adapter *adapter = rxr->adapter;
4249 struct igb_rx_buf *rxbuf;
4250
4251
4252 for (int i = 0; i < adapter->num_rx_desc; i++) {
4253 rxbuf = &rxr->rx_buffers[i];
4254 if (rxbuf->m_head != NULL) {
4255 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4256 BUS_DMASYNC_POSTREAD);
4257 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4258 rxbuf->m_head->m_flags |= M_PKTHDR;
4259 m_freem(rxbuf->m_head);
4260 }
4261 if (rxbuf->m_pack != NULL) {
4262 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4263 BUS_DMASYNC_POSTREAD);
4264 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4265 rxbuf->m_pack->m_flags |= M_PKTHDR;
4266 m_freem(rxbuf->m_pack);
4267 }
4268 rxbuf->m_head = NULL;
4269 rxbuf->m_pack = NULL;
4270 }
4271 }
4272
4273
4274 /*********************************************************************
4275 *
4276 * Initialize a receive ring and its buffers.
4277 *
4278 **********************************************************************/
4279 static int
4280 igb_setup_receive_ring(struct rx_ring *rxr)
4281 {
4282 struct adapter *adapter;
4283 struct ifnet *ifp;
4284 device_t dev;
4285 struct igb_rx_buf *rxbuf;
4286 bus_dma_segment_t pseg[1], hseg[1];
4287 struct lro_ctrl *lro = &rxr->lro;
4288 int rsize, nsegs, error = 0;
4289 #ifdef DEV_NETMAP
4290 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4291 struct netmap_slot *slot;
4292 #endif /* DEV_NETMAP */
4293
4294 adapter = rxr->adapter;
4295 dev = adapter->dev;
4296 ifp = adapter->ifp;
4297
4298 /* Clear the ring contents */
4299 IGB_RX_LOCK(rxr);
4300 #ifdef DEV_NETMAP
4301 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4302 #endif /* DEV_NETMAP */
4303 rsize = roundup2(adapter->num_rx_desc *
4304 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4305 bzero((void *)rxr->rx_base, rsize);
4306
4307 /*
4308 ** Free current RX buffer structures and their mbufs
4309 */
4310 igb_free_receive_ring(rxr);
4311
4312 /* Configure for header split? */
4313 if (igb_header_split)
4314 rxr->hdr_split = TRUE;
4315
4316 /* Now replenish the ring mbufs */
4317 for (int j = 0; j < adapter->num_rx_desc; ++j) {
4318 struct mbuf *mh, *mp;
4319
4320 rxbuf = &rxr->rx_buffers[j];
4321 #ifdef DEV_NETMAP
4322 if (slot) {
4323 /* slot sj is mapped to the i-th NIC-ring entry */
4324 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4325 uint64_t paddr;
4326 void *addr;
4327
4328 addr = PNMB(na, slot + sj, &paddr);
4329 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4330 /* Update descriptor */
4331 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4332 continue;
4333 }
4334 #endif /* DEV_NETMAP */
4335 if (rxr->hdr_split == FALSE)
4336 goto skip_head;
4337
4338 /* First the header */
4339 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4340 if (rxbuf->m_head == NULL) {
4341 error = ENOBUFS;
4342 goto fail;
4343 }
4344 m_adj(rxbuf->m_head, ETHER_ALIGN);
4345 mh = rxbuf->m_head;
4346 mh->m_len = mh->m_pkthdr.len = MHLEN;
4347 mh->m_flags |= M_PKTHDR;
4348 /* Get the memory mapping */
4349 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4350 rxbuf->hmap, rxbuf->m_head, hseg,
4351 &nsegs, BUS_DMA_NOWAIT);
4352 if (error != 0) /* Nothing elegant to do here */
4353 goto fail;
4354 bus_dmamap_sync(rxr->htag,
4355 rxbuf->hmap, BUS_DMASYNC_PREREAD);
4356 /* Update descriptor */
4357 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4358
4359 skip_head:
4360 /* Now the payload cluster */
4361 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4362 M_PKTHDR, adapter->rx_mbuf_sz);
4363 if (rxbuf->m_pack == NULL) {
4364 error = ENOBUFS;
4365 goto fail;
4366 }
4367 mp = rxbuf->m_pack;
4368 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4369 /* Get the memory mapping */
4370 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4371 rxbuf->pmap, mp, pseg,
4372 &nsegs, BUS_DMA_NOWAIT);
4373 if (error != 0)
4374 goto fail;
4375 bus_dmamap_sync(rxr->ptag,
4376 rxbuf->pmap, BUS_DMASYNC_PREREAD);
4377 /* Update descriptor */
4378 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4379 }
4380
4381 /* Setup our descriptor indices */
4382 rxr->next_to_check = 0;
4383 rxr->next_to_refresh = adapter->num_rx_desc - 1;
4384 rxr->lro_enabled = FALSE;
4385 rxr->rx_split_packets = 0;
4386 rxr->rx_bytes = 0;
4387
4388 rxr->fmp = NULL;
4389 rxr->lmp = NULL;
4390
4391 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4392 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4393
4394 /*
4395 ** Now set up the LRO interface, we
4396 ** also only do head split when LRO
4397 ** is enabled, since so often they
4398 ** are undesireable in similar setups.
4399 */
4400 if (ifp->if_capenable & IFCAP_LRO) {
4401 error = tcp_lro_init(lro);
4402 if (error) {
4403 device_printf(dev, "LRO Initialization failed!\n");
4404 goto fail;
4405 }
4406 INIT_DEBUGOUT("RX LRO Initialized\n");
4407 rxr->lro_enabled = TRUE;
4408 lro->ifp = adapter->ifp;
4409 }
4410
4411 IGB_RX_UNLOCK(rxr);
4412 return (0);
4413
4414 fail:
4415 igb_free_receive_ring(rxr);
4416 IGB_RX_UNLOCK(rxr);
4417 return (error);
4418 }
4419
4420
4421 /*********************************************************************
4422 *
4423 * Initialize all receive rings.
4424 *
4425 **********************************************************************/
4426 static int
4427 igb_setup_receive_structures(struct adapter *adapter)
4428 {
4429 struct rx_ring *rxr = adapter->rx_rings;
4430 int i;
4431
4432 for (i = 0; i < adapter->num_queues; i++, rxr++)
4433 if (igb_setup_receive_ring(rxr))
4434 goto fail;
4435
4436 return (0);
4437 fail:
4438 /*
4439 * Free RX buffers allocated so far, we will only handle
4440 * the rings that completed, the failing case will have
4441 * cleaned up for itself. 'i' is the endpoint.
4442 */
4443 for (int j = 0; j < i; ++j) {
4444 rxr = &adapter->rx_rings[j];
4445 IGB_RX_LOCK(rxr);
4446 igb_free_receive_ring(rxr);
4447 IGB_RX_UNLOCK(rxr);
4448 }
4449
4450 return (ENOBUFS);
4451 }
4452
4453 /*********************************************************************
4454 *
4455 * Enable receive unit.
4456 *
4457 **********************************************************************/
4458 static void
4459 igb_initialize_receive_units(struct adapter *adapter)
4460 {
4461 struct rx_ring *rxr = adapter->rx_rings;
4462 struct ifnet *ifp = adapter->ifp;
4463 struct e1000_hw *hw = &adapter->hw;
4464 u32 rctl, rxcsum, psize, srrctl = 0;
4465
4466 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4467
4468 /*
4469 * Make sure receives are disabled while setting
4470 * up the descriptor ring
4471 */
4472 rctl = E1000_READ_REG(hw, E1000_RCTL);
4473 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4474
4475 /*
4476 ** Set up for header split
4477 */
4478 if (igb_header_split) {
4479 /* Use a standard mbuf for the header */
4480 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4481 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4482 } else
4483 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4484
4485 /*
4486 ** Set up for jumbo frames
4487 */
4488 if (ifp->if_mtu > ETHERMTU) {
4489 rctl |= E1000_RCTL_LPE;
4490 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4491 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4492 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4493 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4494 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4495 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4496 }
4497 /* Set maximum packet len */
4498 psize = adapter->max_frame_size;
4499 /* are we on a vlan? */
4500 if (adapter->ifp->if_vlantrunk != NULL)
4501 psize += VLAN_TAG_SIZE;
4502 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4503 } else {
4504 rctl &= ~E1000_RCTL_LPE;
4505 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4506 rctl |= E1000_RCTL_SZ_2048;
4507 }
4508
4509 /*
4510 * If TX flow control is disabled and there's >1 queue defined,
4511 * enable DROP.
4512 *
4513 * This drops frames rather than hanging the RX MAC for all queues.
4514 */
4515 if ((adapter->num_queues > 1) &&
4516 (adapter->fc == e1000_fc_none ||
4517 adapter->fc == e1000_fc_rx_pause)) {
4518 srrctl |= E1000_SRRCTL_DROP_EN;
4519 }
4520
4521 /* Setup the Base and Length of the Rx Descriptor Rings */
4522 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4523 u64 bus_addr = rxr->rxdma.dma_paddr;
4524 u32 rxdctl;
4525
4526 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4527 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4528 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4529 (uint32_t)(bus_addr >> 32));
4530 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4531 (uint32_t)bus_addr);
4532 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4533 /* Enable this Queue */
4534 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4535 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4536 rxdctl &= 0xFFF00000;
4537 rxdctl |= IGB_RX_PTHRESH;
4538 rxdctl |= IGB_RX_HTHRESH << 8;
4539 rxdctl |= IGB_RX_WTHRESH << 16;
4540 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4541 }
4542
4543 /*
4544 ** Setup for RX MultiQueue
4545 */
4546 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4547 if (adapter->num_queues >1) {
4548 u32 random[10], mrqc, shift = 0;
4549 union igb_reta {
4550 u32 dword;
4551 u8 bytes[4];
4552 } reta;
4553
4554 arc4rand(&random, sizeof(random), 0);
4555 if (adapter->hw.mac.type == e1000_82575)
4556 shift = 6;
4557 /* Warning FM follows */
4558 for (int i = 0; i < 128; i++) {
4559 reta.bytes[i & 3] =
4560 (i % adapter->num_queues) << shift;
4561 if ((i & 3) == 3)
4562 E1000_WRITE_REG(hw,
4563 E1000_RETA(i >> 2), reta.dword);
4564 }
4565 /* Now fill in hash table */
4566 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4567 for (int i = 0; i < 10; i++)
4568 E1000_WRITE_REG_ARRAY(hw,
4569 E1000_RSSRK(0), i, random[i]);
4570
4571 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4572 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4573 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4574 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4575 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4576 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4577 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4578 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4579
4580 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4581
4582 /*
4583 ** NOTE: Receive Full-Packet Checksum Offload
4584 ** is mutually exclusive with Multiqueue. However
4585 ** this is not the same as TCP/IP checksums which
4586 ** still work.
4587 */
4588 rxcsum |= E1000_RXCSUM_PCSD;
4589 #if __FreeBSD_version >= 800000
4590 /* For SCTP Offload */
4591 if ((hw->mac.type != e1000_82575) &&
4592 (ifp->if_capenable & IFCAP_RXCSUM))
4593 rxcsum |= E1000_RXCSUM_CRCOFL;
4594 #endif
4595 } else {
4596 /* Non RSS setup */
4597 if (ifp->if_capenable & IFCAP_RXCSUM) {
4598 rxcsum |= E1000_RXCSUM_IPPCSE;
4599 #if __FreeBSD_version >= 800000
4600 if (adapter->hw.mac.type != e1000_82575)
4601 rxcsum |= E1000_RXCSUM_CRCOFL;
4602 #endif
4603 } else
4604 rxcsum &= ~E1000_RXCSUM_TUOFL;
4605 }
4606 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4607
4608 /* Setup the Receive Control Register */
4609 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4610 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4611 E1000_RCTL_RDMTS_HALF |
4612 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4613 /* Strip CRC bytes. */
4614 rctl |= E1000_RCTL_SECRC;
4615 /* Make sure VLAN Filters are off */
4616 rctl &= ~E1000_RCTL_VFE;
4617 /* Don't store bad packets */
4618 rctl &= ~E1000_RCTL_SBP;
4619
4620 /* Enable Receives */
4621 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4622
4623 /*
4624 * Setup the HW Rx Head and Tail Descriptor Pointers
4625 * - needs to be after enable
4626 */
4627 for (int i = 0; i < adapter->num_queues; i++) {
4628 rxr = &adapter->rx_rings[i];
4629 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4630 #ifdef DEV_NETMAP
4631 /*
4632 * an init() while a netmap client is active must
4633 * preserve the rx buffers passed to userspace.
4634 * In this driver it means we adjust RDT to
4635 * something different from next_to_refresh
4636 * (which is not used in netmap mode).
4637 */
4638 if (ifp->if_capenable & IFCAP_NETMAP) {
4639 struct netmap_adapter *na = NA(adapter->ifp);
4640 struct netmap_kring *kring = &na->rx_rings[i];
4641 int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4642
4643 if (t >= adapter->num_rx_desc)
4644 t -= adapter->num_rx_desc;
4645 else if (t < 0)
4646 t += adapter->num_rx_desc;
4647 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4648 } else
4649 #endif /* DEV_NETMAP */
4650 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4651 }
4652 return;
4653 }
4654
4655 /*********************************************************************
4656 *
4657 * Free receive rings.
4658 *
4659 **********************************************************************/
4660 static void
4661 igb_free_receive_structures(struct adapter *adapter)
4662 {
4663 struct rx_ring *rxr = adapter->rx_rings;
4664
4665 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4666 struct lro_ctrl *lro = &rxr->lro;
4667 igb_free_receive_buffers(rxr);
4668 tcp_lro_free(lro);
4669 igb_dma_free(adapter, &rxr->rxdma);
4670 }
4671
4672 free(adapter->rx_rings, M_DEVBUF);
4673 }
4674
4675 /*********************************************************************
4676 *
4677 * Free receive ring data structures.
4678 *
4679 **********************************************************************/
4680 static void
4681 igb_free_receive_buffers(struct rx_ring *rxr)
4682 {
4683 struct adapter *adapter = rxr->adapter;
4684 struct igb_rx_buf *rxbuf;
4685 int i;
4686
4687 INIT_DEBUGOUT("free_receive_structures: begin");
4688
4689 /* Cleanup any existing buffers */
4690 if (rxr->rx_buffers != NULL) {
4691 for (i = 0; i < adapter->num_rx_desc; i++) {
4692 rxbuf = &rxr->rx_buffers[i];
4693 if (rxbuf->m_head != NULL) {
4694 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4695 BUS_DMASYNC_POSTREAD);
4696 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4697 rxbuf->m_head->m_flags |= M_PKTHDR;
4698 m_freem(rxbuf->m_head);
4699 }
4700 if (rxbuf->m_pack != NULL) {
4701 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4702 BUS_DMASYNC_POSTREAD);
4703 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4704 rxbuf->m_pack->m_flags |= M_PKTHDR;
4705 m_freem(rxbuf->m_pack);
4706 }
4707 rxbuf->m_head = NULL;
4708 rxbuf->m_pack = NULL;
4709 if (rxbuf->hmap != NULL) {
4710 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4711 rxbuf->hmap = NULL;
4712 }
4713 if (rxbuf->pmap != NULL) {
4714 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4715 rxbuf->pmap = NULL;
4716 }
4717 }
4718 if (rxr->rx_buffers != NULL) {
4719 free(rxr->rx_buffers, M_DEVBUF);
4720 rxr->rx_buffers = NULL;
4721 }
4722 }
4723
4724 if (rxr->htag != NULL) {
4725 bus_dma_tag_destroy(rxr->htag);
4726 rxr->htag = NULL;
4727 }
4728 if (rxr->ptag != NULL) {
4729 bus_dma_tag_destroy(rxr->ptag);
4730 rxr->ptag = NULL;
4731 }
4732 }
4733
4734 static __inline void
4735 igb_rx_discard(struct rx_ring *rxr, int i)
4736 {
4737 struct igb_rx_buf *rbuf;
4738
4739 rbuf = &rxr->rx_buffers[i];
4740
4741 /* Partially received? Free the chain */
4742 if (rxr->fmp != NULL) {
4743 rxr->fmp->m_flags |= M_PKTHDR;
4744 m_freem(rxr->fmp);
4745 rxr->fmp = NULL;
4746 rxr->lmp = NULL;
4747 }
4748
4749 /*
4750 ** With advanced descriptors the writeback
4751 ** clobbers the buffer addrs, so its easier
4752 ** to just free the existing mbufs and take
4753 ** the normal refresh path to get new buffers
4754 ** and mapping.
4755 */
4756 if (rbuf->m_head) {
4757 m_free(rbuf->m_head);
4758 rbuf->m_head = NULL;
4759 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4760 }
4761
4762 if (rbuf->m_pack) {
4763 m_free(rbuf->m_pack);
4764 rbuf->m_pack = NULL;
4765 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4766 }
4767
4768 return;
4769 }
4770
4771 static __inline void
4772 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4773 {
4774
4775 /*
4776 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4777 * should be computed by hardware. Also it should not have VLAN tag in
4778 * ethernet header.
4779 */
4780 if (rxr->lro_enabled &&
4781 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4782 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4783 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4784 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4785 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4786 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4787 /*
4788 * Send to the stack if:
4789 ** - LRO not enabled, or
4790 ** - no LRO resources, or
4791 ** - lro enqueue fails
4792 */
4793 if (rxr->lro.lro_cnt != 0)
4794 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4795 return;
4796 }
4797 IGB_RX_UNLOCK(rxr);
4798 (*ifp->if_input)(ifp, m);
4799 IGB_RX_LOCK(rxr);
4800 }
4801
4802 /*********************************************************************
4803 *
4804 * This routine executes in interrupt context. It replenishes
4805 * the mbufs in the descriptor and sends data which has been
4806 * dma'ed into host memory to upper layer.
4807 *
4808 * We loop at most count times if count is > 0, or until done if
4809 * count < 0.
4810 *
4811 * Return TRUE if more to clean, FALSE otherwise
4812 *********************************************************************/
4813 static bool
4814 igb_rxeof(struct igb_queue *que, int count, int *done)
4815 {
4816 struct adapter *adapter = que->adapter;
4817 struct rx_ring *rxr = que->rxr;
4818 struct ifnet *ifp = adapter->ifp;
4819 struct lro_ctrl *lro = &rxr->lro;
4820 struct lro_entry *queued;
4821 int i, processed = 0, rxdone = 0;
4822 u32 ptype, staterr = 0;
4823 union e1000_adv_rx_desc *cur;
4824
4825 IGB_RX_LOCK(rxr);
4826 /* Sync the ring. */
4827 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4828 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4829
4830 #ifdef DEV_NETMAP
4831 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4832 IGB_RX_UNLOCK(rxr);
4833 return (FALSE);
4834 }
4835 #endif /* DEV_NETMAP */
4836
4837 /* Main clean loop */
4838 for (i = rxr->next_to_check; count != 0;) {
4839 struct mbuf *sendmp, *mh, *mp;
4840 struct igb_rx_buf *rxbuf;
4841 u16 hlen, plen, hdr, vtag;
4842 bool eop = FALSE;
4843
4844 cur = &rxr->rx_base[i];
4845 staterr = le32toh(cur->wb.upper.status_error);
4846 if ((staterr & E1000_RXD_STAT_DD) == 0)
4847 break;
4848 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4849 break;
4850 count--;
4851 sendmp = mh = mp = NULL;
4852 cur->wb.upper.status_error = 0;
4853 rxbuf = &rxr->rx_buffers[i];
4854 plen = le16toh(cur->wb.upper.length);
4855 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4856 if (((adapter->hw.mac.type == e1000_i350) ||
4857 (adapter->hw.mac.type == e1000_i354)) &&
4858 (staterr & E1000_RXDEXT_STATERR_LB))
4859 vtag = be16toh(cur->wb.upper.vlan);
4860 else
4861 vtag = le16toh(cur->wb.upper.vlan);
4862 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4863 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4864
4865 /*
4866 * Free the frame (all segments) if we're at EOP and
4867 * it's an error.
4868 *
4869 * The datasheet states that EOP + status is only valid for
4870 * the final segment in a multi-segment frame.
4871 */
4872 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4873 adapter->dropped_pkts++;
4874 ++rxr->rx_discarded;
4875 igb_rx_discard(rxr, i);
4876 goto next_desc;
4877 }
4878
4879 /*
4880 ** The way the hardware is configured to
4881 ** split, it will ONLY use the header buffer
4882 ** when header split is enabled, otherwise we
4883 ** get normal behavior, ie, both header and
4884 ** payload are DMA'd into the payload buffer.
4885 **
4886 ** The fmp test is to catch the case where a
4887 ** packet spans multiple descriptors, in that
4888 ** case only the first header is valid.
4889 */
4890 if (rxr->hdr_split && rxr->fmp == NULL) {
4891 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4892 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4893 E1000_RXDADV_HDRBUFLEN_SHIFT;
4894 if (hlen > IGB_HDR_BUF)
4895 hlen = IGB_HDR_BUF;
4896 mh = rxr->rx_buffers[i].m_head;
4897 mh->m_len = hlen;
4898 /* clear buf pointer for refresh */
4899 rxbuf->m_head = NULL;
4900 /*
4901 ** Get the payload length, this
4902 ** could be zero if its a small
4903 ** packet.
4904 */
4905 if (plen > 0) {
4906 mp = rxr->rx_buffers[i].m_pack;
4907 mp->m_len = plen;
4908 mh->m_next = mp;
4909 /* clear buf pointer */
4910 rxbuf->m_pack = NULL;
4911 rxr->rx_split_packets++;
4912 }
4913 } else {
4914 /*
4915 ** Either no header split, or a
4916 ** secondary piece of a fragmented
4917 ** split packet.
4918 */
4919 mh = rxr->rx_buffers[i].m_pack;
4920 mh->m_len = plen;
4921 /* clear buf info for refresh */
4922 rxbuf->m_pack = NULL;
4923 }
4924 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4925
4926 ++processed; /* So we know when to refresh */
4927
4928 /* Initial frame - setup */
4929 if (rxr->fmp == NULL) {
4930 mh->m_pkthdr.len = mh->m_len;
4931 /* Save the head of the chain */
4932 rxr->fmp = mh;
4933 rxr->lmp = mh;
4934 if (mp != NULL) {
4935 /* Add payload if split */
4936 mh->m_pkthdr.len += mp->m_len;
4937 rxr->lmp = mh->m_next;
4938 }
4939 } else {
4940 /* Chain mbuf's together */
4941 rxr->lmp->m_next = mh;
4942 rxr->lmp = rxr->lmp->m_next;
4943 rxr->fmp->m_pkthdr.len += mh->m_len;
4944 }
4945
4946 if (eop) {
4947 rxr->fmp->m_pkthdr.rcvif = ifp;
4948 ifp->if_ipackets++;
4949 rxr->rx_packets++;
4950 /* capture data for AIM */
4951 rxr->packets++;
4952 rxr->bytes += rxr->fmp->m_pkthdr.len;
4953 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4954
4955 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4956 igb_rx_checksum(staterr, rxr->fmp, ptype);
4957
4958 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4959 (staterr & E1000_RXD_STAT_VP) != 0) {
4960 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4961 rxr->fmp->m_flags |= M_VLANTAG;
4962 }
4963
4964 /*
4965 * In case of multiqueue, we have RXCSUM.PCSD bit set
4966 * and never cleared. This means we have RSS hash
4967 * available to be used.
4968 */
4969 if (adapter->num_queues > 1) {
4970 rxr->fmp->m_pkthdr.flowid =
4971 le32toh(cur->wb.lower.hi_dword.rss);
4972 /*
4973 * Full RSS support is not avilable in
4974 * FreeBSD 10 so setting the hash type to
4975 * OPAQUE.
4976 */
4977 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4978 } else {
4979 #ifndef IGB_LEGACY_TX
4980 rxr->fmp->m_pkthdr.flowid = que->msix;
4981 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4982 #endif
4983 }
4984 sendmp = rxr->fmp;
4985 /* Make sure to set M_PKTHDR. */
4986 sendmp->m_flags |= M_PKTHDR;
4987 rxr->fmp = NULL;
4988 rxr->lmp = NULL;
4989 }
4990
4991 next_desc:
4992 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4993 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4994
4995 /* Advance our pointers to the next descriptor. */
4996 if (++i == adapter->num_rx_desc)
4997 i = 0;
4998 /*
4999 ** Send to the stack or LRO
5000 */
5001 if (sendmp != NULL) {
5002 rxr->next_to_check = i;
5003 igb_rx_input(rxr, ifp, sendmp, ptype);
5004 i = rxr->next_to_check;
5005 rxdone++;
5006 }
5007
5008 /* Every 8 descriptors we go to refresh mbufs */
5009 if (processed == 8) {
5010 igb_refresh_mbufs(rxr, i);
5011 processed = 0;
5012 }
5013 }
5014
5015 /* Catch any remainders */
5016 if (igb_rx_unrefreshed(rxr))
5017 igb_refresh_mbufs(rxr, i);
5018
5019 rxr->next_to_check = i;
5020
5021 /*
5022 * Flush any outstanding LRO work
5023 */
5024 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5025 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5026 tcp_lro_flush(lro, queued);
5027 }
5028
5029 if (done != NULL)
5030 *done += rxdone;
5031
5032 IGB_RX_UNLOCK(rxr);
5033 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5034 }
5035
5036 /*********************************************************************
5037 *
5038 * Verify that the hardware indicated that the checksum is valid.
5039 * Inform the stack about the status of checksum so that stack
5040 * doesn't spend time verifying the checksum.
5041 *
5042 *********************************************************************/
5043 static void
5044 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5045 {
5046 u16 status = (u16)staterr;
5047 u8 errors = (u8) (staterr >> 24);
5048 int sctp;
5049
5050 /* Ignore Checksum bit is set */
5051 if (status & E1000_RXD_STAT_IXSM) {
5052 mp->m_pkthdr.csum_flags = 0;
5053 return;
5054 }
5055
5056 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5057 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5058 sctp = 1;
5059 else
5060 sctp = 0;
5061 if (status & E1000_RXD_STAT_IPCS) {
5062 /* Did it pass? */
5063 if (!(errors & E1000_RXD_ERR_IPE)) {
5064 /* IP Checksum Good */
5065 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5066 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5067 } else
5068 mp->m_pkthdr.csum_flags = 0;
5069 }
5070
5071 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5072 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5073 #if __FreeBSD_version >= 800000
5074 if (sctp) /* reassign */
5075 type = CSUM_SCTP_VALID;
5076 #endif
5077 /* Did it pass? */
5078 if (!(errors & E1000_RXD_ERR_TCPE)) {
5079 mp->m_pkthdr.csum_flags |= type;
5080 if (sctp == 0)
5081 mp->m_pkthdr.csum_data = htons(0xffff);
5082 }
5083 }
5084 return;
5085 }
5086
5087 /*
5088 * This routine is run via an vlan
5089 * config EVENT
5090 */
5091 static void
5092 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5093 {
5094 struct adapter *adapter = ifp->if_softc;
5095 u32 index, bit;
5096
5097 if (ifp->if_softc != arg) /* Not our event */
5098 return;
5099
5100 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5101 return;
5102
5103 IGB_CORE_LOCK(adapter);
5104 index = (vtag >> 5) & 0x7F;
5105 bit = vtag & 0x1F;
5106 adapter->shadow_vfta[index] |= (1 << bit);
5107 ++adapter->num_vlans;
5108 /* Change hw filter setting */
5109 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5110 igb_setup_vlan_hw_support(adapter);
5111 IGB_CORE_UNLOCK(adapter);
5112 }
5113
5114 /*
5115 * This routine is run via an vlan
5116 * unconfig EVENT
5117 */
5118 static void
5119 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5120 {
5121 struct adapter *adapter = ifp->if_softc;
5122 u32 index, bit;
5123
5124 if (ifp->if_softc != arg)
5125 return;
5126
5127 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5128 return;
5129
5130 IGB_CORE_LOCK(adapter);
5131 index = (vtag >> 5) & 0x7F;
5132 bit = vtag & 0x1F;
5133 adapter->shadow_vfta[index] &= ~(1 << bit);
5134 --adapter->num_vlans;
5135 /* Change hw filter setting */
5136 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5137 igb_setup_vlan_hw_support(adapter);
5138 IGB_CORE_UNLOCK(adapter);
5139 }
5140
5141 static void
5142 igb_setup_vlan_hw_support(struct adapter *adapter)
5143 {
5144 struct e1000_hw *hw = &adapter->hw;
5145 struct ifnet *ifp = adapter->ifp;
5146 u32 reg;
5147
5148 if (adapter->vf_ifp) {
5149 e1000_rlpml_set_vf(hw,
5150 adapter->max_frame_size + VLAN_TAG_SIZE);
5151 return;
5152 }
5153
5154 reg = E1000_READ_REG(hw, E1000_CTRL);
5155 reg |= E1000_CTRL_VME;
5156 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5157
5158 /* Enable the Filter Table */
5159 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5160 reg = E1000_READ_REG(hw, E1000_RCTL);
5161 reg &= ~E1000_RCTL_CFIEN;
5162 reg |= E1000_RCTL_VFE;
5163 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5164 }
5165
5166 /* Update the frame size */
5167 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5168 adapter->max_frame_size + VLAN_TAG_SIZE);
5169
5170 /* Don't bother with table if no vlans */
5171 if ((adapter->num_vlans == 0) ||
5172 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5173 return;
5174 /*
5175 ** A soft reset zero's out the VFTA, so
5176 ** we need to repopulate it now.
5177 */
5178 for (int i = 0; i < IGB_VFTA_SIZE; i++)
5179 if (adapter->shadow_vfta[i] != 0) {
5180 if (adapter->vf_ifp)
5181 e1000_vfta_set_vf(hw,
5182 adapter->shadow_vfta[i], TRUE);
5183 else
5184 e1000_write_vfta(hw,
5185 i, adapter->shadow_vfta[i]);
5186 }
5187 }
5188
5189 static void
5190 igb_enable_intr(struct adapter *adapter)
5191 {
5192 /* With RSS set up what to auto clear */
5193 if (adapter->msix_mem) {
5194 u32 mask = (adapter->que_mask | adapter->link_mask);
5195 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5196 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5197 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5198 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5199 E1000_IMS_LSC);
5200 } else {
5201 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5202 IMS_ENABLE_MASK);
5203 }
5204 E1000_WRITE_FLUSH(&adapter->hw);
5205
5206 return;
5207 }
5208
5209 static void
5210 igb_disable_intr(struct adapter *adapter)
5211 {
5212 if (adapter->msix_mem) {
5213 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5214 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5215 }
5216 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5217 E1000_WRITE_FLUSH(&adapter->hw);
5218 return;
5219 }
5220
5221 /*
5222 * Bit of a misnomer, what this really means is
5223 * to enable OS management of the system... aka
5224 * to disable special hardware management features
5225 */
5226 static void
5227 igb_init_manageability(struct adapter *adapter)
5228 {
5229 if (adapter->has_manage) {
5230 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5231 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5232
5233 /* disable hardware interception of ARP */
5234 manc &= ~(E1000_MANC_ARP_EN);
5235
5236 /* enable receiving management packets to the host */
5237 manc |= E1000_MANC_EN_MNG2HOST;
5238 manc2h |= 1 << 5; /* Mng Port 623 */
5239 manc2h |= 1 << 6; /* Mng Port 664 */
5240 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5241 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5242 }
5243 }
5244
5245 /*
5246 * Give control back to hardware management
5247 * controller if there is one.
5248 */
5249 static void
5250 igb_release_manageability(struct adapter *adapter)
5251 {
5252 if (adapter->has_manage) {
5253 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5254
5255 /* re-enable hardware interception of ARP */
5256 manc |= E1000_MANC_ARP_EN;
5257 manc &= ~E1000_MANC_EN_MNG2HOST;
5258
5259 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5260 }
5261 }
5262
5263 /*
5264 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5265 * For ASF and Pass Through versions of f/w this means that
5266 * the driver is loaded.
5267 *
5268 */
5269 static void
5270 igb_get_hw_control(struct adapter *adapter)
5271 {
5272 u32 ctrl_ext;
5273
5274 if (adapter->vf_ifp)
5275 return;
5276
5277 /* Let firmware know the driver has taken over */
5278 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5279 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5280 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5281 }
5282
5283 /*
5284 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5285 * For ASF and Pass Through versions of f/w this means that the
5286 * driver is no longer loaded.
5287 *
5288 */
5289 static void
5290 igb_release_hw_control(struct adapter *adapter)
5291 {
5292 u32 ctrl_ext;
5293
5294 if (adapter->vf_ifp)
5295 return;
5296
5297 /* Let firmware taken over control of h/w */
5298 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5299 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5300 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5301 }
5302
5303 static int
5304 igb_is_valid_ether_addr(uint8_t *addr)
5305 {
5306 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5307
5308 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5309 return (FALSE);
5310 }
5311
5312 return (TRUE);
5313 }
5314
5315
5316 /*
5317 * Enable PCI Wake On Lan capability
5318 */
5319 static void
5320 igb_enable_wakeup(device_t dev)
5321 {
5322 struct adapter *adapter = device_get_softc(dev);
5323 struct ifnet *ifp = adapter->ifp;
5324 u32 pmc, ctrl, ctrl_ext, rctl, wuc;
5325 u16 status;
5326
5327 if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5328 return;
5329
5330 adapter->wol = E1000_READ_REG(&adapter->hw, E1000_WUFC);
5331 if (ifp->if_capenable & IFCAP_WOL_MAGIC)
5332 adapter->wol |= E1000_WUFC_MAG;
5333 else
5334 adapter->wol &= ~E1000_WUFC_MAG;
5335
5336 if (ifp->if_capenable & IFCAP_WOL_MCAST) {
5337 adapter->wol |= E1000_WUFC_MC;
5338 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5339 rctl |= E1000_RCTL_MPE;
5340 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5341 } else
5342 adapter->wol &= ~E1000_WUFC_MC;
5343
5344 if (ifp->if_capenable & IFCAP_WOL_UCAST)
5345 adapter->wol |= E1000_WUFC_EX;
5346 else
5347 adapter->wol &= ~E1000_WUFC_EX;
5348
5349 if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5350 goto pme;
5351
5352 /* Advertise the wakeup capability */
5353 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5354 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5355 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5356
5357 /* Keep the laser running on Fiber adapters */
5358 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5359 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5360 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5361 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5362 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5363 }
5364
5365 /* Enable wakeup by the MAC */
5366 wuc = E1000_READ_REG(&adapter->hw, E1000_WUC);
5367 wuc |= E1000_WUC_PME_EN | E1000_WUC_APME;
5368 E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc);
5369 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5370
5371 pme:
5372 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5373 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5374 if (ifp->if_capenable & IFCAP_WOL)
5375 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5376 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5377 }
5378
5379 static void
5380 igb_led_func(void *arg, int onoff)
5381 {
5382 struct adapter *adapter = arg;
5383
5384 IGB_CORE_LOCK(adapter);
5385 if (onoff) {
5386 e1000_setup_led(&adapter->hw);
5387 e1000_led_on(&adapter->hw);
5388 } else {
5389 e1000_led_off(&adapter->hw);
5390 e1000_cleanup_led(&adapter->hw);
5391 }
5392 IGB_CORE_UNLOCK(adapter);
5393 }
5394
5395 /**********************************************************************
5396 *
5397 * Update the board statistics counters.
5398 *
5399 **********************************************************************/
5400 static void
5401 igb_update_stats_counters(struct adapter *adapter)
5402 {
5403 struct ifnet *ifp;
5404 struct e1000_hw *hw = &adapter->hw;
5405 struct e1000_hw_stats *stats;
5406
5407 /*
5408 ** The virtual function adapter has only a
5409 ** small controlled set of stats, do only
5410 ** those and return.
5411 */
5412 if (adapter->vf_ifp) {
5413 igb_update_vf_stats_counters(adapter);
5414 return;
5415 }
5416
5417 stats = (struct e1000_hw_stats *)adapter->stats;
5418
5419 if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5420 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5421 stats->symerrs +=
5422 E1000_READ_REG(hw,E1000_SYMERRS);
5423 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5424 }
5425
5426 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5427 stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5428 stats->scc += E1000_READ_REG(hw, E1000_SCC);
5429 stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5430
5431 stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5432 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5433 stats->colc += E1000_READ_REG(hw, E1000_COLC);
5434 stats->dc += E1000_READ_REG(hw, E1000_DC);
5435 stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5436 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5437 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5438 /*
5439 ** For watchdog management we need to know if we have been
5440 ** paused during the last interval, so capture that here.
5441 */
5442 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5443 stats->xoffrxc += adapter->pause_frames;
5444 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5445 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5446 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5447 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5448 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5449 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5450 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5451 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5452 stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5453 stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5454 stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5455 stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5456
5457 /* For the 64-bit byte counters the low dword must be read first. */
5458 /* Both registers clear on the read of the high dword */
5459
5460 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5461 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5462 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5463 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5464
5465 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5466 stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5467 stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5468 stats->roc += E1000_READ_REG(hw, E1000_ROC);
5469 stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5470
5471 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5472 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5473 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5474
5475 stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5476 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5477 stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5478 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5479
5480 stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5481 stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5482 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5483 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5484 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5485 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5486 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5487 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5488 stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5489 stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5490
5491 /* Interrupt Counts */
5492
5493 stats->iac += E1000_READ_REG(hw, E1000_IAC);
5494 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5495 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5496 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5497 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5498 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5499 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5500 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5501 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5502
5503 /* Host to Card Statistics */
5504
5505 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5506 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5507 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5508 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5509 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5510 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5511 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5512 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5513 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5514 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5515 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5516 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5517 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5518 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5519
5520 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5521 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5522 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5523 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5524 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5525 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5526
5527 ifp = adapter->ifp;
5528 ifp->if_collisions = stats->colc;
5529
5530 /* Rx Errors */
5531 ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5532 stats->crcerrs + stats->algnerrc +
5533 stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5534
5535 /* Tx Errors */
5536 ifp->if_oerrors = stats->ecol +
5537 stats->latecol + adapter->watchdog_events;
5538
5539 /* Driver specific counters */
5540 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5541 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5542 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5543 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5544 adapter->packet_buf_alloc_tx =
5545 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5546 adapter->packet_buf_alloc_rx =
5547 (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5548 }
5549
5550
5551 /**********************************************************************
5552 *
5553 * Initialize the VF board statistics counters.
5554 *
5555 **********************************************************************/
5556 static void
5557 igb_vf_init_stats(struct adapter *adapter)
5558 {
5559 struct e1000_hw *hw = &adapter->hw;
5560 struct e1000_vf_stats *stats;
5561
5562 stats = (struct e1000_vf_stats *)adapter->stats;
5563 if (stats == NULL)
5564 return;
5565 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5566 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5567 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5568 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5569 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5570 }
5571
5572 /**********************************************************************
5573 *
5574 * Update the VF board statistics counters.
5575 *
5576 **********************************************************************/
5577 static void
5578 igb_update_vf_stats_counters(struct adapter *adapter)
5579 {
5580 struct e1000_hw *hw = &adapter->hw;
5581 struct e1000_vf_stats *stats;
5582
5583 if (adapter->link_speed == 0)
5584 return;
5585
5586 stats = (struct e1000_vf_stats *)adapter->stats;
5587
5588 UPDATE_VF_REG(E1000_VFGPRC,
5589 stats->last_gprc, stats->gprc);
5590 UPDATE_VF_REG(E1000_VFGORC,
5591 stats->last_gorc, stats->gorc);
5592 UPDATE_VF_REG(E1000_VFGPTC,
5593 stats->last_gptc, stats->gptc);
5594 UPDATE_VF_REG(E1000_VFGOTC,
5595 stats->last_gotc, stats->gotc);
5596 UPDATE_VF_REG(E1000_VFMPRC,
5597 stats->last_mprc, stats->mprc);
5598 }
5599
5600 /* Export a single 32-bit register via a read-only sysctl. */
5601 static int
5602 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5603 {
5604 struct adapter *adapter;
5605 u_int val;
5606
5607 adapter = oidp->oid_arg1;
5608 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5609 return (sysctl_handle_int(oidp, &val, 0, req));
5610 }
5611
5612 /*
5613 ** Tuneable interrupt rate handler
5614 */
5615 static int
5616 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5617 {
5618 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1);
5619 int error;
5620 u32 reg, usec, rate;
5621
5622 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5623 usec = ((reg & 0x7FFC) >> 2);
5624 if (usec > 0)
5625 rate = 1000000 / usec;
5626 else
5627 rate = 0;
5628 error = sysctl_handle_int(oidp, &rate, 0, req);
5629 if (error || !req->newptr)
5630 return error;
5631 return 0;
5632 }
5633
5634 /*
5635 * Add sysctl variables, one per statistic, to the system.
5636 */
5637 static void
5638 igb_add_hw_stats(struct adapter *adapter)
5639 {
5640 device_t dev = adapter->dev;
5641
5642 struct tx_ring *txr = adapter->tx_rings;
5643 struct rx_ring *rxr = adapter->rx_rings;
5644
5645 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5646 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5647 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5648 struct e1000_hw_stats *stats = adapter->stats;
5649
5650 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5651 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5652
5653 #define QUEUE_NAME_LEN 32
5654 char namebuf[QUEUE_NAME_LEN];
5655
5656 /* Driver Statistics */
5657 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5658 CTLFLAG_RD, &adapter->dropped_pkts,
5659 "Driver dropped packets");
5660 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5661 CTLFLAG_RD, &adapter->link_irq,
5662 "Link MSIX IRQ Handled");
5663 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5664 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5665 "Defragmenting mbuf chain failed");
5666 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5667 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5668 "Driver tx dma failure in xmit");
5669 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5670 CTLFLAG_RD, &adapter->rx_overruns,
5671 "RX overruns");
5672 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5673 CTLFLAG_RD, &adapter->watchdog_events,
5674 "Watchdog timeouts");
5675
5676 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5677 CTLFLAG_RD, &adapter->device_control,
5678 "Device Control Register");
5679 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5680 CTLFLAG_RD, &adapter->rx_control,
5681 "Receiver Control Register");
5682 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5683 CTLFLAG_RD, &adapter->int_mask,
5684 "Interrupt Mask");
5685 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5686 CTLFLAG_RD, &adapter->eint_mask,
5687 "Extended Interrupt Mask");
5688 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5689 CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5690 "Transmit Buffer Packet Allocation");
5691 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5692 CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5693 "Receive Buffer Packet Allocation");
5694 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5695 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5696 "Flow Control High Watermark");
5697 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5698 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5699 "Flow Control Low Watermark");
5700
5701 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5702 struct lro_ctrl *lro = &rxr->lro;
5703
5704 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5705 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5706 CTLFLAG_RD, NULL, "Queue Name");
5707 queue_list = SYSCTL_CHILDREN(queue_node);
5708
5709 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5710 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5711 sizeof(&adapter->queues[i]),
5712 igb_sysctl_interrupt_rate_handler,
5713 "IU", "Interrupt Rate");
5714
5715 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5716 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5717 igb_sysctl_reg_handler, "IU",
5718 "Transmit Descriptor Head");
5719 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5720 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5721 igb_sysctl_reg_handler, "IU",
5722 "Transmit Descriptor Tail");
5723 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5724 CTLFLAG_RD, &txr->no_desc_avail,
5725 "Queue No Descriptor Available");
5726 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5727 CTLFLAG_RD, &txr->total_packets,
5728 "Queue Packets Transmitted");
5729
5730 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5731 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5732 igb_sysctl_reg_handler, "IU",
5733 "Receive Descriptor Head");
5734 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5735 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5736 igb_sysctl_reg_handler, "IU",
5737 "Receive Descriptor Tail");
5738 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5739 CTLFLAG_RD, &rxr->rx_packets,
5740 "Queue Packets Received");
5741 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5742 CTLFLAG_RD, &rxr->rx_bytes,
5743 "Queue Bytes Received");
5744 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5745 CTLFLAG_RD, &lro->lro_queued, 0,
5746 "LRO Queued");
5747 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5748 CTLFLAG_RD, &lro->lro_flushed, 0,
5749 "LRO Flushed");
5750 }
5751
5752 /* MAC stats get their own sub node */
5753
5754 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5755 CTLFLAG_RD, NULL, "MAC Statistics");
5756 stat_list = SYSCTL_CHILDREN(stat_node);
5757
5758 /*
5759 ** VF adapter has a very limited set of stats
5760 ** since its not managing the metal, so to speak.
5761 */
5762 if (adapter->vf_ifp) {
5763 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5764 CTLFLAG_RD, &stats->gprc,
5765 "Good Packets Received");
5766 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5767 CTLFLAG_RD, &stats->gptc,
5768 "Good Packets Transmitted");
5769 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5770 CTLFLAG_RD, &stats->gorc,
5771 "Good Octets Received");
5772 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5773 CTLFLAG_RD, &stats->gotc,
5774 "Good Octets Transmitted");
5775 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5776 CTLFLAG_RD, &stats->mprc,
5777 "Multicast Packets Received");
5778 return;
5779 }
5780
5781 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5782 CTLFLAG_RD, &stats->ecol,
5783 "Excessive collisions");
5784 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5785 CTLFLAG_RD, &stats->scc,
5786 "Single collisions");
5787 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5788 CTLFLAG_RD, &stats->mcc,
5789 "Multiple collisions");
5790 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5791 CTLFLAG_RD, &stats->latecol,
5792 "Late collisions");
5793 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5794 CTLFLAG_RD, &stats->colc,
5795 "Collision Count");
5796 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5797 CTLFLAG_RD, &stats->symerrs,
5798 "Symbol Errors");
5799 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5800 CTLFLAG_RD, &stats->sec,
5801 "Sequence Errors");
5802 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5803 CTLFLAG_RD, &stats->dc,
5804 "Defer Count");
5805 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5806 CTLFLAG_RD, &stats->mpc,
5807 "Missed Packets");
5808 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5809 CTLFLAG_RD, &stats->rlec,
5810 "Receive Length Errors");
5811 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5812 CTLFLAG_RD, &stats->rnbc,
5813 "Receive No Buffers");
5814 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5815 CTLFLAG_RD, &stats->ruc,
5816 "Receive Undersize");
5817 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5818 CTLFLAG_RD, &stats->rfc,
5819 "Fragmented Packets Received");
5820 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5821 CTLFLAG_RD, &stats->roc,
5822 "Oversized Packets Received");
5823 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5824 CTLFLAG_RD, &stats->rjc,
5825 "Recevied Jabber");
5826 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5827 CTLFLAG_RD, &stats->rxerrc,
5828 "Receive Errors");
5829 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5830 CTLFLAG_RD, &stats->crcerrs,
5831 "CRC errors");
5832 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5833 CTLFLAG_RD, &stats->algnerrc,
5834 "Alignment Errors");
5835 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
5836 CTLFLAG_RD, &stats->tncrs,
5837 "Transmit with No CRS");
5838 /* On 82575 these are collision counts */
5839 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5840 CTLFLAG_RD, &stats->cexterr,
5841 "Collision/Carrier extension errors");
5842 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5843 CTLFLAG_RD, &stats->xonrxc,
5844 "XON Received");
5845 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5846 CTLFLAG_RD, &stats->xontxc,
5847 "XON Transmitted");
5848 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5849 CTLFLAG_RD, &stats->xoffrxc,
5850 "XOFF Received");
5851 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5852 CTLFLAG_RD, &stats->xofftxc,
5853 "XOFF Transmitted");
5854 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
5855 CTLFLAG_RD, &stats->fcruc,
5856 "Unsupported Flow Control Received");
5857 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
5858 CTLFLAG_RD, &stats->mgprc,
5859 "Management Packets Received");
5860 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
5861 CTLFLAG_RD, &stats->mgpdc,
5862 "Management Packets Dropped");
5863 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
5864 CTLFLAG_RD, &stats->mgptc,
5865 "Management Packets Transmitted");
5866 /* Packet Reception Stats */
5867 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5868 CTLFLAG_RD, &stats->tpr,
5869 "Total Packets Received");
5870 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5871 CTLFLAG_RD, &stats->gprc,
5872 "Good Packets Received");
5873 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5874 CTLFLAG_RD, &stats->bprc,
5875 "Broadcast Packets Received");
5876 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5877 CTLFLAG_RD, &stats->mprc,
5878 "Multicast Packets Received");
5879 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5880 CTLFLAG_RD, &stats->prc64,
5881 "64 byte frames received");
5882 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5883 CTLFLAG_RD, &stats->prc127,
5884 "65-127 byte frames received");
5885 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5886 CTLFLAG_RD, &stats->prc255,
5887 "128-255 byte frames received");
5888 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5889 CTLFLAG_RD, &stats->prc511,
5890 "256-511 byte frames received");
5891 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5892 CTLFLAG_RD, &stats->prc1023,
5893 "512-1023 byte frames received");
5894 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5895 CTLFLAG_RD, &stats->prc1522,
5896 "1023-1522 byte frames received");
5897 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5898 CTLFLAG_RD, &stats->gorc,
5899 "Good Octets Received");
5900 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
5901 CTLFLAG_RD, &stats->tor,
5902 "Total Octets Received");
5903
5904 /* Packet Transmission Stats */
5905 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5906 CTLFLAG_RD, &stats->gotc,
5907 "Good Octets Transmitted");
5908 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
5909 CTLFLAG_RD, &stats->tot,
5910 "Total Octets Transmitted");
5911 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5912 CTLFLAG_RD, &stats->tpt,
5913 "Total Packets Transmitted");
5914 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5915 CTLFLAG_RD, &stats->gptc,
5916 "Good Packets Transmitted");
5917 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5918 CTLFLAG_RD, &stats->bptc,
5919 "Broadcast Packets Transmitted");
5920 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5921 CTLFLAG_RD, &stats->mptc,
5922 "Multicast Packets Transmitted");
5923 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5924 CTLFLAG_RD, &stats->ptc64,
5925 "64 byte frames transmitted");
5926 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5927 CTLFLAG_RD, &stats->ptc127,
5928 "65-127 byte frames transmitted");
5929 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5930 CTLFLAG_RD, &stats->ptc255,
5931 "128-255 byte frames transmitted");
5932 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5933 CTLFLAG_RD, &stats->ptc511,
5934 "256-511 byte frames transmitted");
5935 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5936 CTLFLAG_RD, &stats->ptc1023,
5937 "512-1023 byte frames transmitted");
5938 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5939 CTLFLAG_RD, &stats->ptc1522,
5940 "1024-1522 byte frames transmitted");
5941 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5942 CTLFLAG_RD, &stats->tsctc,
5943 "TSO Contexts Transmitted");
5944 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5945 CTLFLAG_RD, &stats->tsctfc,
5946 "TSO Contexts Failed");
5947
5948
5949 /* Interrupt Stats */
5950
5951 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5952 CTLFLAG_RD, NULL, "Interrupt Statistics");
5953 int_list = SYSCTL_CHILDREN(int_node);
5954
5955 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5956 CTLFLAG_RD, &stats->iac,
5957 "Interrupt Assertion Count");
5958
5959 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5960 CTLFLAG_RD, &stats->icrxptc,
5961 "Interrupt Cause Rx Pkt Timer Expire Count");
5962
5963 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5964 CTLFLAG_RD, &stats->icrxatc,
5965 "Interrupt Cause Rx Abs Timer Expire Count");
5966
5967 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5968 CTLFLAG_RD, &stats->ictxptc,
5969 "Interrupt Cause Tx Pkt Timer Expire Count");
5970
5971 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5972 CTLFLAG_RD, &stats->ictxatc,
5973 "Interrupt Cause Tx Abs Timer Expire Count");
5974
5975 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5976 CTLFLAG_RD, &stats->ictxqec,
5977 "Interrupt Cause Tx Queue Empty Count");
5978
5979 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5980 CTLFLAG_RD, &stats->ictxqmtc,
5981 "Interrupt Cause Tx Queue Min Thresh Count");
5982
5983 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5984 CTLFLAG_RD, &stats->icrxdmtc,
5985 "Interrupt Cause Rx Desc Min Thresh Count");
5986
5987 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5988 CTLFLAG_RD, &stats->icrxoc,
5989 "Interrupt Cause Receiver Overrun Count");
5990
5991 /* Host to Card Stats */
5992
5993 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5994 CTLFLAG_RD, NULL,
5995 "Host to Card Statistics");
5996
5997 host_list = SYSCTL_CHILDREN(host_node);
5998
5999 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6000 CTLFLAG_RD, &stats->cbtmpc,
6001 "Circuit Breaker Tx Packet Count");
6002
6003 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6004 CTLFLAG_RD, &stats->htdpmc,
6005 "Host Transmit Discarded Packets");
6006
6007 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6008 CTLFLAG_RD, &stats->rpthc,
6009 "Rx Packets To Host");
6010
6011 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6012 CTLFLAG_RD, &stats->cbrmpc,
6013 "Circuit Breaker Rx Packet Count");
6014
6015 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6016 CTLFLAG_RD, &stats->cbrdpc,
6017 "Circuit Breaker Rx Dropped Count");
6018
6019 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6020 CTLFLAG_RD, &stats->hgptc,
6021 "Host Good Packets Tx Count");
6022
6023 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6024 CTLFLAG_RD, &stats->htcbdpc,
6025 "Host Tx Circuit Breaker Dropped Count");
6026
6027 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6028 CTLFLAG_RD, &stats->hgorc,
6029 "Host Good Octets Received Count");
6030
6031 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6032 CTLFLAG_RD, &stats->hgotc,
6033 "Host Good Octets Transmit Count");
6034
6035 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6036 CTLFLAG_RD, &stats->lenerrs,
6037 "Length Errors");
6038
6039 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6040 CTLFLAG_RD, &stats->scvpc,
6041 "SerDes/SGMII Code Violation Pkt Count");
6042
6043 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6044 CTLFLAG_RD, &stats->hrmpc,
6045 "Header Redirection Missed Packet Count");
6046 }
6047
6048
6049 /**********************************************************************
6050 *
6051 * This routine provides a way to dump out the adapter eeprom,
6052 * often a useful debug/service tool. This only dumps the first
6053 * 32 words, stuff that matters is in that extent.
6054 *
6055 **********************************************************************/
6056 static int
6057 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6058 {
6059 struct adapter *adapter;
6060 int error;
6061 int result;
6062
6063 result = -1;
6064 error = sysctl_handle_int(oidp, &result, 0, req);
6065
6066 if (error || !req->newptr)
6067 return (error);
6068
6069 /*
6070 * This value will cause a hex dump of the
6071 * first 32 16-bit words of the EEPROM to
6072 * the screen.
6073 */
6074 if (result == 1) {
6075 adapter = (struct adapter *)arg1;
6076 igb_print_nvm_info(adapter);
6077 }
6078
6079 return (error);
6080 }
6081
6082 static void
6083 igb_print_nvm_info(struct adapter *adapter)
6084 {
6085 u16 eeprom_data;
6086 int i, j, row = 0;
6087
6088 /* Its a bit crude, but it gets the job done */
6089 printf("\nInterface EEPROM Dump:\n");
6090 printf("Offset\n0x0000 ");
6091 for (i = 0, j = 0; i < 32; i++, j++) {
6092 if (j == 8) { /* Make the offset block */
6093 j = 0; ++row;
6094 printf("\n0x00%x0 ",row);
6095 }
6096 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6097 printf("%04x ", eeprom_data);
6098 }
6099 printf("\n");
6100 }
6101
6102 static void
6103 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6104 const char *description, int *limit, int value)
6105 {
6106 *limit = value;
6107 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6108 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6109 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6110 }
6111
6112 /*
6113 ** Set flow control using sysctl:
6114 ** Flow control values:
6115 ** 0 - off
6116 ** 1 - rx pause
6117 ** 2 - tx pause
6118 ** 3 - full
6119 */
6120 static int
6121 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6122 {
6123 int error;
6124 static int input = 3; /* default is full */
6125 struct adapter *adapter = (struct adapter *) arg1;
6126
6127 error = sysctl_handle_int(oidp, &input, 0, req);
6128
6129 if ((error) || (req->newptr == NULL))
6130 return (error);
6131
6132 switch (input) {
6133 case e1000_fc_rx_pause:
6134 case e1000_fc_tx_pause:
6135 case e1000_fc_full:
6136 case e1000_fc_none:
6137 adapter->hw.fc.requested_mode = input;
6138 adapter->fc = input;
6139 break;
6140 default:
6141 /* Do nothing */
6142 return (error);
6143 }
6144
6145 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6146 e1000_force_mac_fc(&adapter->hw);
6147 /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6148 return (error);
6149 }
6150
6151 /*
6152 ** Manage DMA Coalesce:
6153 ** Control values:
6154 ** 0/1 - off/on
6155 ** Legal timer values are:
6156 ** 250,500,1000-10000 in thousands
6157 */
6158 static int
6159 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6160 {
6161 struct adapter *adapter = (struct adapter *) arg1;
6162 int error;
6163
6164 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6165
6166 if ((error) || (req->newptr == NULL))
6167 return (error);
6168
6169 switch (adapter->dmac) {
6170 case 0:
6171 /* Disabling */
6172 break;
6173 case 1: /* Just enable and use default */
6174 adapter->dmac = 1000;
6175 break;
6176 case 250:
6177 case 500:
6178 case 1000:
6179 case 2000:
6180 case 3000:
6181 case 4000:
6182 case 5000:
6183 case 6000:
6184 case 7000:
6185 case 8000:
6186 case 9000:
6187 case 10000:
6188 /* Legal values - allow */
6189 break;
6190 default:
6191 /* Do nothing, illegal value */
6192 adapter->dmac = 0;
6193 return (EINVAL);
6194 }
6195 /* Reinit the interface */
6196 igb_init(adapter);
6197 return (error);
6198 }
6199
6200 /*
6201 ** Manage Energy Efficient Ethernet:
6202 ** Control values:
6203 ** 0/1 - enabled/disabled
6204 */
6205 static int
6206 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6207 {
6208 struct adapter *adapter = (struct adapter *) arg1;
6209 int error, value;
6210
6211 value = adapter->hw.dev_spec._82575.eee_disable;
6212 error = sysctl_handle_int(oidp, &value, 0, req);
6213 if (error || req->newptr == NULL)
6214 return (error);
6215 IGB_CORE_LOCK(adapter);
6216 adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6217 igb_init_locked(adapter);
6218 IGB_CORE_UNLOCK(adapter);
6219 return (0);
6220 }
Cache object: 9ab94dda0f577c7700bd755278ca7ce0
|