1 /*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 __FBSDID("$FreeBSD$");
28
29 #ifndef __IOAT_INTERNAL_H__
30 #define __IOAT_INTERNAL_H__
31
32 #include <sys/_task.h>
33
34 #define DEVICE2SOFTC(dev) ((struct ioat_softc *) device_get_softc(dev))
35 #define KTR_IOAT KTR_SPARE3
36
37 #define ioat_read_chancnt(ioat) \
38 ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
39
40 #define ioat_read_xfercap(ioat) \
41 (ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
42
43 #define ioat_write_intrctrl(ioat, value) \
44 ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
45
46 #define ioat_read_cbver(ioat) \
47 (ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
48
49 #define ioat_read_dmacapability(ioat) \
50 ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
51
52 #define ioat_write_chanctrl(ioat, value) \
53 ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
54
55 static __inline uint64_t
56 ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
57 bus_space_handle_t handle, bus_size_t offset)
58 {
59 return (bus_space_read_4(tag, handle, offset) |
60 ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
61 }
62
63 static __inline void
64 ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
65 bus_space_handle_t handle, bus_size_t offset, uint64_t val)
66 {
67 bus_space_write_4(tag, handle, offset, val);
68 bus_space_write_4(tag, handle, offset + 4, val >> 32);
69 }
70
71 #ifdef __i386__
72 #define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
73 #define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
74 #else
75 #define ioat_bus_space_read_8(tag, handle, offset) \
76 bus_space_read_8((tag), (handle), (offset))
77 #define ioat_bus_space_write_8(tag, handle, offset, val) \
78 bus_space_write_8((tag), (handle), (offset), (val))
79 #endif
80
81 #define ioat_read_1(ioat, offset) \
82 bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
83 (offset))
84
85 #define ioat_read_2(ioat, offset) \
86 bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
87 (offset))
88
89 #define ioat_read_4(ioat, offset) \
90 bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
91 (offset))
92
93 #define ioat_read_8(ioat, offset) \
94 ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
95 (offset))
96
97 #define ioat_read_double_4(ioat, offset) \
98 ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
99 (ioat)->pci_bus_handle, (offset))
100
101 #define ioat_write_1(ioat, offset, value) \
102 bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
103 (offset), (value))
104
105 #define ioat_write_2(ioat, offset, value) \
106 bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
107 (offset), (value))
108
109 #define ioat_write_4(ioat, offset, value) \
110 bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
111 (offset), (value))
112
113 #define ioat_write_8(ioat, offset, value) \
114 ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
115 (offset), (value))
116
117 #define ioat_write_double_4(ioat, offset, value) \
118 ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
119 (ioat)->pci_bus_handle, (offset), (value))
120
121 MALLOC_DECLARE(M_IOAT);
122
123 SYSCTL_DECL(_hw_ioat);
124
125 extern int g_ioat_debug_level;
126
127 struct generic_dma_control {
128 uint32_t int_enable:1;
129 uint32_t src_snoop_disable:1;
130 uint32_t dest_snoop_disable:1;
131 uint32_t completion_update:1;
132 uint32_t fence:1;
133 uint32_t reserved1:1;
134 uint32_t src_page_break:1;
135 uint32_t dest_page_break:1;
136 uint32_t bundle:1;
137 uint32_t dest_dca:1;
138 uint32_t hint:1;
139 uint32_t reserved2:13;
140 uint32_t op:8;
141 };
142
143 struct ioat_generic_hw_descriptor {
144 uint32_t size;
145 union {
146 uint32_t control_raw;
147 struct generic_dma_control control_generic;
148 } u;
149 uint64_t src_addr;
150 uint64_t dest_addr;
151 uint64_t next;
152 uint64_t reserved[4];
153 };
154
155 struct ioat_dma_hw_descriptor {
156 uint32_t size;
157 union {
158 uint32_t control_raw;
159 struct generic_dma_control control_generic;
160 struct {
161 uint32_t int_enable:1;
162 uint32_t src_snoop_disable:1;
163 uint32_t dest_snoop_disable:1;
164 uint32_t completion_update:1;
165 uint32_t fence:1;
166 uint32_t null:1;
167 uint32_t src_page_break:1;
168 uint32_t dest_page_break:1;
169 uint32_t bundle:1;
170 uint32_t dest_dca:1;
171 uint32_t hint:1;
172 uint32_t reserved:13;
173 #define IOAT_OP_COPY 0x00
174 uint32_t op:8;
175 } control;
176 } u;
177 uint64_t src_addr;
178 uint64_t dest_addr;
179 uint64_t next;
180 uint64_t next_src_addr;
181 uint64_t next_dest_addr;
182 uint64_t user1;
183 uint64_t user2;
184 };
185
186 struct ioat_fill_hw_descriptor {
187 uint32_t size;
188 union {
189 uint32_t control_raw;
190 struct generic_dma_control control_generic;
191 struct {
192 uint32_t int_enable:1;
193 uint32_t reserved:1;
194 uint32_t dest_snoop_disable:1;
195 uint32_t completion_update:1;
196 uint32_t fence:1;
197 uint32_t reserved2:2;
198 uint32_t dest_page_break:1;
199 uint32_t bundle:1;
200 uint32_t reserved3:15;
201 #define IOAT_OP_FILL 0x01
202 uint32_t op:8;
203 } control;
204 } u;
205 uint64_t src_data;
206 uint64_t dest_addr;
207 uint64_t next;
208 uint64_t reserved;
209 uint64_t next_dest_addr;
210 uint64_t user1;
211 uint64_t user2;
212 };
213
214 struct ioat_crc32_hw_descriptor {
215 uint32_t size;
216 union {
217 uint32_t control_raw;
218 struct generic_dma_control control_generic;
219 struct {
220 uint32_t int_enable:1;
221 uint32_t src_snoop_disable:1;
222 uint32_t dest_snoop_disable:1;
223 uint32_t completion_update:1;
224 uint32_t fence:1;
225 uint32_t reserved1:3;
226 uint32_t bundle:1;
227 uint32_t dest_dca:1;
228 uint32_t hint:1;
229 uint32_t use_seed:1;
230 /*
231 * crc_location:
232 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
233 * 0: comparison value is pointed to by CRC Address
234 * field.
235 * 1: comparison value follows data in wire format
236 * ("inverted reflected bit order") in the 4 bytes
237 * following the source data.
238 *
239 * For IOAT_OP_CRC_STORE:
240 * 0: Result will be stored at location pointed to by
241 * CRC Address field (in wire format).
242 * 1: Result will be stored directly following the
243 * source data.
244 *
245 * For IOAT_OP_MOVECRC_STORE:
246 * 0: Result will be stored at location pointed to by
247 * CRC Address field (in wire format).
248 * 1: Result will be stored directly following the
249 * *destination* data.
250 */
251 uint32_t crc_location:1;
252 uint32_t reserved2:11;
253 /*
254 * MOVECRC - Move data in the same way as standard copy
255 * operation, but also compute CRC32.
256 *
257 * CRC - Only compute CRC on source data.
258 *
259 * There is a CRC accumulator register in the hardware.
260 * If 'initial' is set, it is initialized to the value
261 * in 'seed.'
262 *
263 * In all modes, these operators accumulate size bytes
264 * at src_addr into the running CRC32C.
265 *
266 * Store mode emits the accumulated CRC, in wire
267 * format, as specified by the crc_location bit above.
268 *
269 * Test mode compares the accumulated CRC against the
270 * reference CRC, as described in crc_location above.
271 * On failure, halts the DMA engine with a CRC error
272 * status.
273 */
274 #define IOAT_OP_MOVECRC 0x41
275 #define IOAT_OP_MOVECRC_TEST 0x42
276 #define IOAT_OP_MOVECRC_STORE 0x43
277 #define IOAT_OP_CRC 0x81
278 #define IOAT_OP_CRC_TEST 0x82
279 #define IOAT_OP_CRC_STORE 0x83
280 uint32_t op:8;
281 } control;
282 } u;
283 uint64_t src_addr;
284 uint64_t dest_addr;
285 uint64_t next;
286 uint64_t next_src_addr;
287 uint64_t next_dest_addr;
288 uint32_t seed;
289 uint32_t reserved;
290 uint64_t crc_address;
291 };
292
293 struct ioat_xor_hw_descriptor {
294 uint32_t size;
295 union {
296 uint32_t control_raw;
297 struct generic_dma_control control_generic;
298 struct {
299 uint32_t int_enable:1;
300 uint32_t src_snoop_disable:1;
301 uint32_t dest_snoop_disable:1;
302 uint32_t completion_update:1;
303 uint32_t fence:1;
304 uint32_t src_count:3;
305 uint32_t bundle:1;
306 uint32_t dest_dca:1;
307 uint32_t hint:1;
308 uint32_t reserved:13;
309 #define IOAT_OP_XOR 0x87
310 #define IOAT_OP_XOR_VAL 0x88
311 uint32_t op:8;
312 } control;
313 } u;
314 uint64_t src_addr;
315 uint64_t dest_addr;
316 uint64_t next;
317 uint64_t src_addr2;
318 uint64_t src_addr3;
319 uint64_t src_addr4;
320 uint64_t src_addr5;
321 };
322
323 struct ioat_xor_ext_hw_descriptor {
324 uint64_t src_addr6;
325 uint64_t src_addr7;
326 uint64_t src_addr8;
327 uint64_t next;
328 uint64_t reserved[4];
329 };
330
331 struct ioat_pq_hw_descriptor {
332 uint32_t size;
333 union {
334 uint32_t control_raw;
335 struct generic_dma_control control_generic;
336 struct {
337 uint32_t int_enable:1;
338 uint32_t src_snoop_disable:1;
339 uint32_t dest_snoop_disable:1;
340 uint32_t completion_update:1;
341 uint32_t fence:1;
342 uint32_t src_count:3;
343 uint32_t bundle:1;
344 uint32_t dest_dca:1;
345 uint32_t hint:1;
346 uint32_t p_disable:1;
347 uint32_t q_disable:1;
348 uint32_t reserved:11;
349 #define IOAT_OP_PQ 0x89
350 #define IOAT_OP_PQ_VAL 0x8a
351 uint32_t op:8;
352 } control;
353 } u;
354 uint64_t src_addr;
355 uint64_t p_addr;
356 uint64_t next;
357 uint64_t src_addr2;
358 uint64_t src_addr3;
359 uint8_t coef[8];
360 uint64_t q_addr;
361 };
362
363 struct ioat_pq_ext_hw_descriptor {
364 uint64_t src_addr4;
365 uint64_t src_addr5;
366 uint64_t src_addr6;
367 uint64_t next;
368 uint64_t src_addr7;
369 uint64_t src_addr8;
370 uint64_t reserved[2];
371 };
372
373 struct ioat_pq_update_hw_descriptor {
374 uint32_t size;
375 union {
376 uint32_t control_raw;
377 struct generic_dma_control control_generic;
378 struct {
379 uint32_t int_enable:1;
380 uint32_t src_snoop_disable:1;
381 uint32_t dest_snoop_disable:1;
382 uint32_t completion_update:1;
383 uint32_t fence:1;
384 uint32_t src_cnt:3;
385 uint32_t bundle:1;
386 uint32_t dest_dca:1;
387 uint32_t hint:1;
388 uint32_t p_disable:1;
389 uint32_t q_disable:1;
390 uint32_t reserved:3;
391 uint32_t coef:8;
392 #define IOAT_OP_PQ_UP 0x8b
393 uint32_t op:8;
394 } control;
395 } u;
396 uint64_t src_addr;
397 uint64_t p_addr;
398 uint64_t next;
399 uint64_t src_addr2;
400 uint64_t p_src;
401 uint64_t q_src;
402 uint64_t q_addr;
403 };
404
405 struct ioat_raw_hw_descriptor {
406 uint64_t field[8];
407 };
408
409 struct bus_dmadesc {
410 bus_dmaengine_callback_t callback_fn;
411 void *callback_arg;
412 };
413
414 struct ioat_descriptor {
415 struct bus_dmadesc bus_dmadesc;
416 uint32_t id;
417 bus_dmamap_t src_dmamap;
418 bus_dmamap_t dst_dmamap;
419 bus_dmamap_t src2_dmamap;
420 bus_dmamap_t dst2_dmamap;
421 };
422
423 /* Unused by this driver at this time. */
424 #define IOAT_OP_MARKER 0x84
425
426 /*
427 * Deprecated OPs -- v3 DMA generates an abort if given these. And this driver
428 * doesn't support anything older than v3.
429 */
430 #define IOAT_OP_OLD_XOR 0x85
431 #define IOAT_OP_OLD_XOR_VAL 0x86
432
433 /* One of these per allocated PCI device. */
434 struct ioat_softc {
435 bus_dmaengine_t dmaengine;
436 #define to_ioat_softc(_dmaeng) \
437 ({ \
438 bus_dmaengine_t *_p = (_dmaeng); \
439 (struct ioat_softc *)((char *)_p - \
440 offsetof(struct ioat_softc, dmaengine)); \
441 })
442
443 device_t device;
444 int domain;
445 int cpu;
446 int version;
447 unsigned chan_idx;
448
449 bus_space_tag_t pci_bus_tag;
450 bus_space_handle_t pci_bus_handle;
451 struct resource *pci_resource;
452 int pci_resource_id;
453 uint32_t max_xfer_size;
454 uint32_t capabilities;
455 uint32_t ring_size_order;
456 uint16_t intrdelay_max;
457 uint16_t cached_intrdelay;
458
459 int rid;
460 struct resource *res;
461 void *tag;
462
463 bus_dma_tag_t hw_desc_tag;
464 bus_dmamap_t hw_desc_map;
465
466 bus_dma_tag_t data_tag;
467
468 bus_dma_tag_t comp_update_tag;
469 bus_dmamap_t comp_update_map;
470 uint64_t *comp_update;
471 bus_addr_t comp_update_bus_addr;
472
473 boolean_t quiescing;
474 boolean_t destroying;
475 boolean_t is_submitter_processing;
476 boolean_t intrdelay_supported;
477 boolean_t resetting; /* submit_lock */
478 boolean_t resetting_cleanup; /* cleanup_lock */
479
480 struct ioat_descriptor *ring;
481
482 union ioat_hw_descriptor {
483 struct ioat_generic_hw_descriptor generic;
484 struct ioat_dma_hw_descriptor dma;
485 struct ioat_fill_hw_descriptor fill;
486 struct ioat_crc32_hw_descriptor crc32;
487 struct ioat_xor_hw_descriptor xor;
488 struct ioat_xor_ext_hw_descriptor xor_ext;
489 struct ioat_pq_hw_descriptor pq;
490 struct ioat_pq_ext_hw_descriptor pq_ext;
491 struct ioat_raw_hw_descriptor raw;
492 } *hw_desc_ring;
493 bus_addr_t hw_desc_bus_addr;
494 #define RING_PHYS_ADDR(sc, i) (sc)->hw_desc_bus_addr + \
495 (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor))
496
497 struct mtx_padalign submit_lock;
498 struct callout poll_timer;
499 struct task reset_task;
500 struct mtx_padalign cleanup_lock;
501
502 uint32_t refcnt;
503 uint32_t head;
504 uint32_t acq_head;
505 uint32_t tail;
506 bus_addr_t last_seen;
507
508 struct {
509 uint64_t interrupts;
510 uint64_t descriptors_processed;
511 uint64_t descriptors_error;
512 uint64_t descriptors_submitted;
513
514 uint32_t channel_halts;
515 uint32_t last_halt_chanerr;
516 } stats;
517 };
518
519 void ioat_test_attach(void);
520 void ioat_test_detach(void);
521
522 /*
523 * XXX DO NOT USE this routine for obtaining the current completed descriptor.
524 *
525 * The double_4 read on ioat<3.3 appears to result in torn reads. And v3.2
526 * hardware is still commonplace (Broadwell Xeon has it). Instead, use the
527 * device-pushed *comp_update.
528 *
529 * It is safe to use ioat_get_chansts() for the low status bits.
530 */
531 static inline uint64_t
532 ioat_get_chansts(struct ioat_softc *ioat)
533 {
534 uint64_t status;
535
536 if (ioat->version >= IOAT_VER_3_3)
537 status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
538 else
539 /* Must read lower 4 bytes before upper 4 bytes. */
540 status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
541 return (status);
542 }
543
544 static inline void
545 ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
546 {
547
548 if (ioat->version >= IOAT_VER_3_3)
549 ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
550 else
551 ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
552 }
553
554 static inline void
555 ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
556 {
557
558 if (ioat->version >= IOAT_VER_3_3)
559 ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
560 else
561 ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
562 }
563
564 static inline boolean_t
565 is_ioat_active(uint64_t status)
566 {
567 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
568 }
569
570 static inline boolean_t
571 is_ioat_idle(uint64_t status)
572 {
573 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
574 }
575
576 static inline boolean_t
577 is_ioat_halted(uint64_t status)
578 {
579 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
580 }
581
582 static inline boolean_t
583 is_ioat_suspended(uint64_t status)
584 {
585 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
586 }
587
588 static inline void
589 ioat_suspend(struct ioat_softc *ioat)
590 {
591 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
592 }
593
594 static inline void
595 ioat_reset(struct ioat_softc *ioat)
596 {
597 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
598 }
599
600 static inline boolean_t
601 ioat_reset_pending(struct ioat_softc *ioat)
602 {
603 uint8_t cmd;
604
605 cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
606 return ((cmd & IOAT_CHANCMD_RESET) != 0);
607 }
608
609 #endif /* __IOAT_INTERNAL_H__ */
Cache object: 0efe9f6f113d8447f68f475b9bf33082
|