1 /*-
2 * Copyright (c) 2021-2022 NVIDIA corporation & affiliates.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD$
26 */
27
28 /*
29 * The internal queue, IQ, code is more or less a stripped down copy
30 * of the existing SQ managing code with exception of:
31 *
32 * - an optional single segment memory buffer which can be read or
33 * written as a whole by the hardware, may be provided.
34 *
35 * - an optional completion callback for all transmit operations, may
36 * be provided.
37 *
38 * - does not support mbufs.
39 */
40
41 #include <dev/mlx5/mlx5_en/en.h>
42
43 static void
44 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget)
45 {
46 const struct mlx5_cqe64 *cqe;
47 u16 ci;
48 u16 iqcc;
49
50 /*
51 * iq->cc must be updated only after mlx5_cqwq_update_db_record(),
52 * otherwise a cq overrun may occur
53 */
54 iqcc = iq->cc;
55
56 while (budget-- > 0) {
57
58 cqe = mlx5e_get_cqe(&iq->cq);
59 if (!cqe)
60 break;
61
62 mlx5_cqwq_pop(&iq->cq.wq);
63
64 ci = iqcc & iq->wq.sz_m1;
65
66 if (likely(iq->data[ci].dma_sync != 0)) {
67 /* make sure data written by hardware is visible to CPU */
68 bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync);
69 bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map);
70
71 iq->data[ci].dma_sync = 0;
72 }
73
74 if (likely(iq->data[ci].callback != NULL)) {
75 iq->data[ci].callback(iq->data[ci].arg);
76 iq->data[ci].callback = NULL;
77 }
78
79 if (unlikely(iq->data[ci].p_refcount != NULL)) {
80 atomic_add_int(iq->data[ci].p_refcount, -1);
81 iq->data[ci].p_refcount = NULL;
82 }
83 iqcc += iq->data[ci].num_wqebbs;
84 }
85
86 mlx5_cqwq_update_db_record(&iq->cq.wq);
87
88 /* Ensure cq space is freed before enabling more cqes */
89 atomic_thread_fence_rel();
90
91 iq->cc = iqcc;
92 }
93
94 static void
95 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
96 {
97 struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq);
98
99 mtx_lock(&iq->comp_lock);
100 mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX);
101 mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
102 mtx_unlock(&iq->comp_lock);
103 }
104
105 void
106 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt)
107 {
108 u16 pi = iq->pc & iq->wq.sz_m1;
109 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
110
111 mtx_assert(&iq->lock, MA_OWNED);
112
113 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
114
115 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP);
116 wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
117 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
118
119 /* Copy data for doorbell */
120 memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
121
122 iq->data[pi].callback = NULL;
123 iq->data[pi].arg = NULL;
124 iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
125 iq->data[pi].dma_sync = 0;
126 iq->pc += iq->data[pi].num_wqebbs;
127 }
128
129 static void
130 mlx5e_iq_free_db(struct mlx5e_iq *iq)
131 {
132 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
133 int x;
134
135 for (x = 0; x != wq_sz; x++) {
136 if (likely(iq->data[x].dma_sync != 0)) {
137 bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map);
138 iq->data[x].dma_sync = 0;
139 }
140 if (likely(iq->data[x].callback != NULL)) {
141 iq->data[x].callback(iq->data[x].arg);
142 iq->data[x].callback = NULL;
143 }
144 if (unlikely(iq->data[x].p_refcount != NULL)) {
145 atomic_add_int(iq->data[x].p_refcount, -1);
146 iq->data[x].p_refcount = NULL;
147 }
148 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
149 }
150 free(iq->data, M_MLX5EN);
151 }
152
153 static int
154 mlx5e_iq_alloc_db(struct mlx5e_iq *iq)
155 {
156 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
157 int err;
158 int x;
159
160 iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN,
161 mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO);
162
163 /* Create DMA descriptor maps */
164 for (x = 0; x != wq_sz; x++) {
165 err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map);
166 if (err != 0) {
167 while (x--)
168 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
169 free(iq->data, M_MLX5EN);
170 return (err);
171 }
172 }
173 return (0);
174 }
175
176 static int
177 mlx5e_iq_create(struct mlx5e_channel *c,
178 struct mlx5e_sq_param *param,
179 struct mlx5e_iq *iq)
180 {
181 struct mlx5e_priv *priv = c->priv;
182 struct mlx5_core_dev *mdev = priv->mdev;
183 void *sqc = param->sqc;
184 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
185 int err;
186
187 /* Create DMA descriptor TAG */
188 if ((err = -bus_dma_tag_create(
189 bus_get_dma_tag(mdev->pdev->dev.bsddev),
190 1, /* any alignment */
191 0, /* no boundary */
192 BUS_SPACE_MAXADDR, /* lowaddr */
193 BUS_SPACE_MAXADDR, /* highaddr */
194 NULL, NULL, /* filter, filterarg */
195 PAGE_SIZE, /* maxsize */
196 1, /* nsegments */
197 PAGE_SIZE, /* maxsegsize */
198 0, /* flags */
199 NULL, NULL, /* lockfunc, lockfuncarg */
200 &iq->dma_tag)))
201 goto done;
202
203 iq->mkey_be = cpu_to_be32(priv->mr.key);
204 iq->priv = priv;
205
206 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq,
207 &iq->wq, &iq->wq_ctrl);
208 if (err)
209 goto err_free_dma_tag;
210
211 iq->wq.db = &iq->wq.db[MLX5_SND_DBR];
212
213 err = mlx5e_iq_alloc_db(iq);
214 if (err)
215 goto err_iq_wq_destroy;
216
217 return (0);
218
219 err_iq_wq_destroy:
220 mlx5_wq_destroy(&iq->wq_ctrl);
221
222 err_free_dma_tag:
223 bus_dma_tag_destroy(iq->dma_tag);
224 done:
225 return (err);
226 }
227
228 static void
229 mlx5e_iq_destroy(struct mlx5e_iq *iq)
230 {
231 mlx5e_iq_free_db(iq);
232 mlx5_wq_destroy(&iq->wq_ctrl);
233 bus_dma_tag_destroy(iq->dma_tag);
234 }
235
236 static int
237 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
238 const struct mlx5_sq_bfreg *bfreg, int tis_num)
239 {
240 void *in;
241 void *sqc;
242 void *wq;
243 int inlen;
244 int err;
245 u8 ts_format;
246
247 inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
248 sizeof(u64) * iq->wq_ctrl.buf.npages;
249 in = mlx5_vzalloc(inlen);
250 if (in == NULL)
251 return (-ENOMEM);
252
253 iq->uar_map = bfreg->map;
254
255 ts_format = mlx5_get_sq_default_ts(iq->priv->mdev);
256 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
257 wq = MLX5_ADDR_OF(sqc, sqc, wq);
258
259 memcpy(sqc, param->sqc, sizeof(param->sqc));
260
261 MLX5_SET(sqc, sqc, tis_num_0, tis_num);
262 MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn);
263 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
264 MLX5_SET(sqc, sqc, ts_format, ts_format);
265 MLX5_SET(sqc, sqc, tis_lst_sz, 1);
266 MLX5_SET(sqc, sqc, flush_in_error_en, 1);
267 MLX5_SET(sqc, sqc, allow_swp, 1);
268
269 /* SQ remap support requires reg_umr privileges level */
270 if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) {
271 MLX5_SET(sqc, sqc, qos_remap_en, 1);
272 if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq))
273 MLX5_SET(sqc, sqc, reg_umr, 1);
274 else
275 mlx5_en_err(iq->priv->ifp,
276 "No reg umr SQ capability, SQ remap disabled\n");
277 }
278
279 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
280 MLX5_SET(wq, wq, uar_page, bfreg->index);
281 MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
282 MLX5_ADAPTER_PAGE_SHIFT);
283 MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma);
284
285 mlx5_fill_page_array(&iq->wq_ctrl.buf,
286 (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
287
288 err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn);
289
290 kvfree(in);
291
292 return (err);
293 }
294
295 static int
296 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state)
297 {
298 void *in;
299 void *sqc;
300 int inlen;
301 int err;
302
303 inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
304 in = mlx5_vzalloc(inlen);
305 if (in == NULL)
306 return (-ENOMEM);
307
308 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
309
310 MLX5_SET(modify_sq_in, in, sqn, iq->sqn);
311 MLX5_SET(modify_sq_in, in, sq_state, curr_state);
312 MLX5_SET(sqc, sqc, state, next_state);
313
314 err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen);
315
316 kvfree(in);
317
318 return (err);
319 }
320
321 static void
322 mlx5e_iq_disable(struct mlx5e_iq *iq)
323 {
324 mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn);
325 }
326
327 int
328 mlx5e_iq_open(struct mlx5e_channel *c,
329 struct mlx5e_sq_param *sq_param,
330 struct mlx5e_cq_param *cq_param,
331 struct mlx5e_iq *iq)
332 {
333 int err;
334
335 err = mlx5e_open_cq(c->priv, cq_param, &iq->cq,
336 &mlx5e_iq_completion, c->ix);
337 if (err)
338 return (err);
339
340 err = mlx5e_iq_create(c, sq_param, iq);
341 if (err)
342 goto err_close_cq;
343
344 err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]);
345 if (err)
346 goto err_destroy_sq;
347
348 err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
349 if (err)
350 goto err_disable_sq;
351
352 WRITE_ONCE(iq->running, 1);
353
354 return (0);
355
356 err_disable_sq:
357 mlx5e_iq_disable(iq);
358 err_destroy_sq:
359 mlx5e_iq_destroy(iq);
360 err_close_cq:
361 mlx5e_close_cq(&iq->cq);
362
363 return (err);
364 }
365
366 static void
367 mlx5e_iq_drain(struct mlx5e_iq *iq)
368 {
369 struct mlx5_core_dev *mdev = iq->priv->mdev;
370
371 /*
372 * Check if already stopped.
373 *
374 * NOTE: Serialization of this function is managed by the
375 * caller ensuring the priv's state lock is locked or in case
376 * of rate limit support, a single thread manages drain and
377 * resume of SQs. The "running" variable can therefore safely
378 * be read without any locks.
379 */
380 if (READ_ONCE(iq->running) == 0)
381 return;
382
383 /* don't put more packets into the SQ */
384 WRITE_ONCE(iq->running, 0);
385
386 /* wait till SQ is empty or link is down */
387 mtx_lock(&iq->lock);
388 while (iq->cc != iq->pc &&
389 (iq->priv->media_status_last & IFM_ACTIVE) != 0 &&
390 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
391 pci_channel_offline(mdev->pdev) == 0) {
392 mtx_unlock(&iq->lock);
393 msleep(1);
394 iq->cq.mcq.comp(&iq->cq.mcq, NULL);
395 mtx_lock(&iq->lock);
396 }
397 mtx_unlock(&iq->lock);
398
399 /* error out remaining requests */
400 (void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
401
402 /* wait till SQ is empty */
403 mtx_lock(&iq->lock);
404 while (iq->cc != iq->pc &&
405 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
406 pci_channel_offline(mdev->pdev) == 0) {
407 mtx_unlock(&iq->lock);
408 msleep(1);
409 iq->cq.mcq.comp(&iq->cq.mcq, NULL);
410 mtx_lock(&iq->lock);
411 }
412 mtx_unlock(&iq->lock);
413 }
414
415 void
416 mlx5e_iq_close(struct mlx5e_iq *iq)
417 {
418 mlx5e_iq_drain(iq);
419 mlx5e_iq_disable(iq);
420 mlx5e_iq_destroy(iq);
421 mlx5e_close_cq(&iq->cq);
422 }
423
424 void
425 mlx5e_iq_static_init(struct mlx5e_iq *iq)
426 {
427 mtx_init(&iq->lock, "mlx5iq",
428 MTX_NETWORK_LOCK " IQ", MTX_DEF);
429 mtx_init(&iq->comp_lock, "mlx5iq_comp",
430 MTX_NETWORK_LOCK " IQ COMP", MTX_DEF);
431 }
432
433 void
434 mlx5e_iq_static_destroy(struct mlx5e_iq *iq)
435 {
436 mtx_destroy(&iq->lock);
437 mtx_destroy(&iq->comp_lock);
438 }
439
440 void
441 mlx5e_iq_notify_hw(struct mlx5e_iq *iq)
442 {
443 mtx_assert(&iq->lock, MA_OWNED);
444
445 /* Check if we need to write the doorbell */
446 if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0))
447 return;
448
449 /* Ensure wqe is visible to device before updating doorbell record */
450 wmb();
451
452 *iq->wq.db = cpu_to_be32(iq->pc);
453
454 /*
455 * Ensure the doorbell record is visible to device before ringing
456 * the doorbell:
457 */
458 wmb();
459
460 mlx5_write64(iq->doorbell.d32, iq->uar_map,
461 MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
462
463 iq->doorbell.d64 = 0;
464 }
465
466 static inline bool
467 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n)
468 {
469 u16 cc = iq->cc;
470 u16 pc = iq->pc;
471
472 return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
473 }
474
475 int
476 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq)
477 {
478 u16 pi;
479
480 mtx_assert(&iq->lock, MA_OWNED);
481
482 if (unlikely(iq->running == 0))
483 return (-1);
484 if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS)))
485 return (-1);
486
487 /* Align IQ edge with NOPs to avoid WQE wrap around */
488 pi = ((~iq->pc) & iq->wq.sz_m1);
489 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) {
490 /* Send one multi NOP message instead of many */
491 mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
492 pi = ((~iq->pc) & iq->wq.sz_m1);
493 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)))
494 return (-1);
495 }
496 return (iq->pc & iq->wq.sz_m1);
497 }
498
499 static void
500 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs,
501 int nseg, int error)
502 {
503 u64 *pdma_address = arg;
504
505 if (unlikely(error || nseg != 1))
506 panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg);
507
508 *pdma_address = segs[0].ds_addr;
509 }
510
511 CTASSERT(BUS_DMASYNC_POSTREAD != 0);
512 CTASSERT(BUS_DMASYNC_POSTWRITE != 0);
513
514 void
515 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size,
516 u64 *pdma_address, u32 dma_sync)
517 {
518 int error;
519
520 error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size,
521 &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT);
522 if (unlikely(error))
523 panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size);
524
525 switch (dma_sync) {
526 case BUS_DMASYNC_PREREAD:
527 iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD;
528 break;
529 case BUS_DMASYNC_PREWRITE:
530 iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE;
531 break;
532 default:
533 panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync);
534 }
535
536 /* make sure data in buffer is visible to hardware */
537 bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync);
538 }
Cache object: a143909f02428400a5adf2d041431da1
|