1 /*-
2 * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD$
26 */
27
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30
31 #include <linux/kref.h>
32 #include <linux/random.h>
33 #include <linux/delay.h>
34 #include <linux/sched.h>
35 #include <rdma/ib_umem.h>
36 #include <rdma/ib_umem_odp.h>
37 #include <rdma/ib_verbs.h>
38 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
39
40 enum {
41 MAX_PENDING_REG_MR = 8,
42 };
43
44 #define MLX5_UMR_ALIGN 2048
45 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
46 static __be64 mlx5_ib_update_mtt_emergency_buffer[
47 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
48 __aligned(MLX5_UMR_ALIGN);
49 static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
50 #endif
51
52 static int clean_mr(struct mlx5_ib_mr *mr);
53
54 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
55 {
56 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
57
58 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
59 /* Wait until all page fault handlers using the mr complete. */
60 synchronize_srcu(&dev->mr_srcu);
61 #endif
62
63 return err;
64 }
65
66 static int order2idx(struct mlx5_ib_dev *dev, int order)
67 {
68 struct mlx5_mr_cache *cache = &dev->cache;
69
70 if (order < cache->ent[0].order)
71 return 0;
72 else
73 return order - cache->ent[0].order;
74 }
75
76 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
77 {
78 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
79 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
80 }
81
82 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
83 static void update_odp_mr(struct mlx5_ib_mr *mr)
84 {
85 if (mr->umem->odp_data) {
86 /*
87 * This barrier prevents the compiler from moving the
88 * setting of umem->odp_data->private to point to our
89 * MR, before reg_umr finished, to ensure that the MR
90 * initialization have finished before starting to
91 * handle invalidations.
92 */
93 smp_wmb();
94 mr->umem->odp_data->private = mr;
95 /*
96 * Make sure we will see the new
97 * umem->odp_data->private value in the invalidation
98 * routines, before we can get page faults on the
99 * MR. Page faults can happen once we put the MR in
100 * the tree, below this line. Without the barrier,
101 * there can be a fault handling and an invalidation
102 * before umem->odp_data->private == mr is visible to
103 * the invalidation handler.
104 */
105 smp_wmb();
106 }
107 }
108 #endif
109
110 static void reg_mr_callback(int status, struct mlx5_async_work *context)
111 {
112 struct mlx5_ib_mr *mr =
113 container_of(context, struct mlx5_ib_mr, cb_work);
114 struct mlx5_ib_dev *dev = mr->dev;
115 struct mlx5_mr_cache *cache = &dev->cache;
116 int c = order2idx(dev, mr->order);
117 struct mlx5_cache_ent *ent = &cache->ent[c];
118 u8 key;
119 unsigned long flags;
120 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
121 int err;
122
123 spin_lock_irqsave(&ent->lock, flags);
124 ent->pending--;
125 spin_unlock_irqrestore(&ent->lock, flags);
126 if (status) {
127 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
128 kfree(mr);
129 dev->fill_delay = 1;
130 mod_timer(&dev->delay_timer, jiffies + HZ);
131 return;
132 }
133
134 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
135 key = dev->mdev->priv.mkey_key++;
136 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
137 mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
138
139 cache->last_add = jiffies;
140
141 spin_lock_irqsave(&ent->lock, flags);
142 list_add_tail(&mr->list, &ent->head);
143 ent->cur++;
144 ent->size++;
145 spin_unlock_irqrestore(&ent->lock, flags);
146
147 spin_lock_irqsave(&table->lock, flags);
148 err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mr->mmkey.key),
149 &mr->mmkey);
150 if (err)
151 pr_err("Error inserting to mkey tree. 0x%x\n", -err);
152 spin_unlock_irqrestore(&table->lock, flags);
153 }
154
155 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
156 {
157 struct mlx5_mr_cache *cache = &dev->cache;
158 struct mlx5_cache_ent *ent = &cache->ent[c];
159 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
160 struct mlx5_ib_mr *mr;
161 int npages = 1 << ent->order;
162 void *mkc;
163 u32 *in;
164 int err = 0;
165 int i;
166
167 in = kzalloc(inlen, GFP_KERNEL);
168 if (!in)
169 return -ENOMEM;
170
171 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
172 for (i = 0; i < num; i++) {
173 if (ent->pending >= MAX_PENDING_REG_MR) {
174 err = -EAGAIN;
175 break;
176 }
177
178 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
179 if (!mr) {
180 err = -ENOMEM;
181 break;
182 }
183 mr->order = ent->order;
184 mr->umred = 1;
185 mr->dev = dev;
186
187 MLX5_SET(mkc, mkc, free, 1);
188 MLX5_SET(mkc, mkc, umr_en, 1);
189 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT);
190
191 MLX5_SET(mkc, mkc, qpn, 0xffffff);
192 MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
193 MLX5_SET(mkc, mkc, log_page_size, 12);
194
195 spin_lock_irq(&ent->lock);
196 ent->pending++;
197 spin_unlock_irq(&ent->lock);
198 err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
199 &dev->async_ctx, in, inlen,
200 mr->out, sizeof(mr->out),
201 reg_mr_callback, &mr->cb_work);
202 if (err) {
203 spin_lock_irq(&ent->lock);
204 ent->pending--;
205 spin_unlock_irq(&ent->lock);
206 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
207 kfree(mr);
208 break;
209 }
210 }
211
212 kfree(in);
213 return err;
214 }
215
216 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
217 {
218 struct mlx5_mr_cache *cache = &dev->cache;
219 struct mlx5_cache_ent *ent = &cache->ent[c];
220 struct mlx5_ib_mr *mr;
221 int err;
222 int i;
223
224 for (i = 0; i < num; i++) {
225 spin_lock_irq(&ent->lock);
226 if (list_empty(&ent->head)) {
227 spin_unlock_irq(&ent->lock);
228 return;
229 }
230 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
231 list_del(&mr->list);
232 ent->cur--;
233 ent->size--;
234 spin_unlock_irq(&ent->lock);
235 err = destroy_mkey(dev, mr);
236 if (err)
237 mlx5_ib_warn(dev, "failed destroy mkey\n");
238 else
239 kfree(mr);
240 }
241 }
242
243 static int someone_adding(struct mlx5_mr_cache *cache)
244 {
245 int i;
246
247 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
248 if (cache->ent[i].cur < cache->ent[i].limit)
249 return 1;
250 }
251
252 return 0;
253 }
254
255 static void __cache_work_func(struct mlx5_cache_ent *ent)
256 {
257 struct mlx5_ib_dev *dev = ent->dev;
258 struct mlx5_mr_cache *cache = &dev->cache;
259 int i = order2idx(dev, ent->order);
260 int err;
261
262 if (cache->stopped)
263 return;
264
265 ent = &dev->cache.ent[i];
266 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
267 err = add_keys(dev, i, 1);
268 if (ent->cur < 2 * ent->limit) {
269 if (err == -EAGAIN) {
270 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
271 i + 2);
272 queue_delayed_work(cache->wq, &ent->dwork,
273 msecs_to_jiffies(3));
274 } else if (err) {
275 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
276 i + 2, err);
277 queue_delayed_work(cache->wq, &ent->dwork,
278 msecs_to_jiffies(1000));
279 } else {
280 queue_work(cache->wq, &ent->work);
281 }
282 }
283 } else if (ent->cur > 2 * ent->limit) {
284 /*
285 * The remove_keys() logic is performed as garbage collection
286 * task. Such task is intended to be run when no other active
287 * processes are running.
288 *
289 * The need_resched() will return TRUE if there are user tasks
290 * to be activated in near future.
291 *
292 * In such case, we don't execute remove_keys() and postpone
293 * the garbage collection work to try to run in next cycle,
294 * in order to free CPU resources to other tasks.
295 */
296 if (!need_resched() && !someone_adding(cache) &&
297 time_after(jiffies, cache->last_add + 300 * HZ)) {
298 remove_keys(dev, i, 1);
299 if (ent->cur > ent->limit)
300 queue_work(cache->wq, &ent->work);
301 } else {
302 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
303 }
304 }
305 }
306
307 static void delayed_cache_work_func(struct work_struct *work)
308 {
309 struct mlx5_cache_ent *ent;
310
311 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
312 __cache_work_func(ent);
313 }
314
315 static void cache_work_func(struct work_struct *work)
316 {
317 struct mlx5_cache_ent *ent;
318
319 ent = container_of(work, struct mlx5_cache_ent, work);
320 __cache_work_func(ent);
321 }
322
323 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
324 {
325 struct mlx5_mr_cache *cache = &dev->cache;
326 struct mlx5_ib_mr *mr = NULL;
327 struct mlx5_cache_ent *ent;
328 int c;
329 int i;
330
331 c = order2idx(dev, order);
332 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
333 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
334 return NULL;
335 }
336
337 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
338 ent = &cache->ent[i];
339
340 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
341
342 spin_lock_irq(&ent->lock);
343 if (!list_empty(&ent->head)) {
344 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
345 list);
346 list_del(&mr->list);
347 ent->cur--;
348 spin_unlock_irq(&ent->lock);
349 if (ent->cur < ent->limit)
350 queue_work(cache->wq, &ent->work);
351 break;
352 }
353 spin_unlock_irq(&ent->lock);
354
355 queue_work(cache->wq, &ent->work);
356 }
357
358 if (!mr)
359 cache->ent[c].miss++;
360
361 return mr;
362 }
363
364 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
365 {
366 struct mlx5_mr_cache *cache = &dev->cache;
367 struct mlx5_cache_ent *ent;
368 int shrink = 0;
369 int c;
370
371 c = order2idx(dev, mr->order);
372 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
373 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
374 return;
375 }
376 ent = &cache->ent[c];
377 spin_lock_irq(&ent->lock);
378 list_add_tail(&mr->list, &ent->head);
379 ent->cur++;
380 if (ent->cur > 2 * ent->limit)
381 shrink = 1;
382 spin_unlock_irq(&ent->lock);
383
384 if (shrink)
385 queue_work(cache->wq, &ent->work);
386 }
387
388 static void clean_keys(struct mlx5_ib_dev *dev, int c)
389 {
390 struct mlx5_mr_cache *cache = &dev->cache;
391 struct mlx5_cache_ent *ent = &cache->ent[c];
392 struct mlx5_ib_mr *mr;
393 int err;
394
395 cancel_delayed_work(&ent->dwork);
396 while (1) {
397 spin_lock_irq(&ent->lock);
398 if (list_empty(&ent->head)) {
399 spin_unlock_irq(&ent->lock);
400 return;
401 }
402 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
403 list_del(&mr->list);
404 ent->cur--;
405 ent->size--;
406 spin_unlock_irq(&ent->lock);
407 err = destroy_mkey(dev, mr);
408 if (err)
409 mlx5_ib_warn(dev, "failed destroy mkey\n");
410 else
411 kfree(mr);
412 }
413 }
414
415 static void delay_time_func(unsigned long ctx)
416 {
417 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
418
419 dev->fill_delay = 0;
420 }
421
422 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
423 {
424 struct mlx5_mr_cache *cache = &dev->cache;
425 struct mlx5_cache_ent *ent;
426 int limit;
427 int i;
428
429 mutex_init(&dev->slow_path_mutex);
430 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
431 if (!cache->wq) {
432 mlx5_ib_warn(dev, "failed to create work queue\n");
433 return -ENOMEM;
434 }
435
436 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
437 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
438 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
439 INIT_LIST_HEAD(&cache->ent[i].head);
440 spin_lock_init(&cache->ent[i].lock);
441
442 ent = &cache->ent[i];
443 INIT_LIST_HEAD(&ent->head);
444 spin_lock_init(&ent->lock);
445 ent->order = i + 2;
446 ent->dev = dev;
447
448 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
449 limit = dev->mdev->profile->mr_cache[i].limit;
450 else
451 limit = 0;
452
453 INIT_WORK(&ent->work, cache_work_func);
454 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
455 ent->limit = limit;
456 queue_work(cache->wq, &ent->work);
457 }
458
459 return 0;
460 }
461
462 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
463 {
464 int i;
465
466 dev->cache.stopped = 1;
467 flush_workqueue(dev->cache.wq);
468 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
469
470 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
471 clean_keys(dev, i);
472
473 destroy_workqueue(dev->cache.wq);
474 del_timer_sync(&dev->delay_timer);
475
476 return 0;
477 }
478
479 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
480 {
481 struct mlx5_ib_dev *dev = to_mdev(pd->device);
482 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
483 struct mlx5_core_dev *mdev = dev->mdev;
484 struct mlx5_ib_mr *mr;
485 void *mkc;
486 u32 *in;
487 int err;
488
489 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
490 if (!mr)
491 return ERR_PTR(-ENOMEM);
492
493 in = kzalloc(inlen, GFP_KERNEL);
494 if (!in) {
495 err = -ENOMEM;
496 goto err_free;
497 }
498
499 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
500
501 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
502 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
503 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
504 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
505 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
506 MLX5_SET(mkc, mkc, lr, 1);
507
508 MLX5_SET(mkc, mkc, length64, 1);
509 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
510 MLX5_SET(mkc, mkc, qpn, 0xffffff);
511 MLX5_SET64(mkc, mkc, start_addr, 0);
512
513 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
514 if (err)
515 goto err_in;
516
517 kfree(in);
518 mr->ibmr.lkey = mr->mmkey.key;
519 mr->ibmr.rkey = mr->mmkey.key;
520 mr->umem = NULL;
521
522 return &mr->ibmr;
523
524 err_in:
525 kfree(in);
526
527 err_free:
528 kfree(mr);
529
530 return ERR_PTR(err);
531 }
532
533 static int get_octo_len(u64 addr, u64 len, int page_size)
534 {
535 u64 offset;
536 int npages;
537
538 offset = addr & (page_size - 1);
539 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
540 return (npages + 1) / 2;
541 }
542
543 static int use_umr(int order)
544 {
545 return order <= MLX5_MAX_UMR_SHIFT;
546 }
547
548 static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
549 int npages, int page_shift, int *size,
550 __be64 **mr_pas, dma_addr_t *dma)
551 {
552 __be64 *pas;
553 struct device *ddev = dev->ib_dev.dma_device;
554
555 /*
556 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
557 * To avoid copying garbage after the pas array, we allocate
558 * a little more.
559 */
560 *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
561 *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
562 if (!(*mr_pas))
563 return -ENOMEM;
564
565 pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
566 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
567 /* Clear padding after the actual pages. */
568 memset(pas + npages, 0, *size - npages * sizeof(u64));
569
570 *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
571 if (dma_mapping_error(ddev, *dma)) {
572 kfree(*mr_pas);
573 return -ENOMEM;
574 }
575
576 return 0;
577 }
578
579 static void prep_umr_wqe_common(struct ib_pd *pd, struct mlx5_umr_wr *umrwr,
580 struct ib_sge *sg, u64 dma, int n, u32 key,
581 int page_shift)
582 {
583 struct mlx5_ib_dev *dev = to_mdev(pd->device);
584
585 sg->addr = dma;
586 sg->length = ALIGN(sizeof(u64) * n, 64);
587 sg->lkey = dev->umrc.pd->local_dma_lkey;
588
589 umrwr->wr.next = NULL;
590 umrwr->wr.sg_list = sg;
591 if (n)
592 umrwr->wr.num_sge = 1;
593 else
594 umrwr->wr.num_sge = 0;
595
596 umrwr->wr.opcode = MLX5_IB_WR_UMR;
597
598 umrwr->npages = n;
599 umrwr->page_shift = page_shift;
600 umrwr->mkey = key;
601 }
602
603 static void prep_umr_reg_wqe(struct ib_pd *pd, struct mlx5_umr_wr *umrwr,
604 struct ib_sge *sg, u64 dma, int n, u32 key,
605 int page_shift, u64 virt_addr, u64 len,
606 int access_flags)
607 {
608 prep_umr_wqe_common(pd, umrwr, sg, dma, n, key, page_shift);
609
610 umrwr->wr.send_flags = 0;
611
612 umrwr->target.virt_addr = virt_addr;
613 umrwr->length = len;
614 umrwr->access_flags = access_flags;
615 umrwr->pd = pd;
616 }
617
618 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
619 struct mlx5_umr_wr *umrwr, u32 key)
620 {
621 umrwr->wr.send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
622 umrwr->wr.opcode = MLX5_IB_WR_UMR;
623 umrwr->mkey = key;
624 }
625
626 static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
627 int access_flags, int *npages,
628 int *page_shift, int *ncont, int *order)
629 {
630 struct mlx5_ib_dev *dev = to_mdev(pd->device);
631 struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
632 access_flags, 0);
633 if (IS_ERR(umem)) {
634 mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
635 return (void *)umem;
636 }
637
638 mlx5_ib_cont_pages(umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, page_shift, ncont, order);
639 if (!*npages) {
640 mlx5_ib_warn(dev, "avoid zero region\n");
641 ib_umem_release(umem);
642 return ERR_PTR(-EINVAL);
643 }
644
645 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
646 *npages, *ncont, *order, *page_shift);
647
648 return umem;
649 }
650
651 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
652 {
653 struct mlx5_ib_umr_context *context =
654 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
655
656 context->status = wc->status;
657 complete(&context->done);
658 }
659
660 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
661 {
662 context->cqe.done = mlx5_ib_umr_done;
663 context->status = -1;
664 init_completion(&context->done);
665 }
666
667 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
668 u64 virt_addr, u64 len, int npages,
669 int page_shift, int order, int access_flags)
670 {
671 struct mlx5_ib_dev *dev = to_mdev(pd->device);
672 struct device *ddev = dev->ib_dev.dma_device;
673 struct umr_common *umrc = &dev->umrc;
674 struct mlx5_ib_umr_context umr_context;
675 struct mlx5_umr_wr umrwr = {};
676 const struct ib_send_wr *bad;
677 struct mlx5_ib_mr *mr;
678 struct ib_sge sg;
679 int size;
680 __be64 *mr_pas;
681 dma_addr_t dma;
682 int err = 0;
683 int i;
684
685 for (i = 0; i < 1; i++) {
686 mr = alloc_cached_mr(dev, order);
687 if (mr)
688 break;
689
690 err = add_keys(dev, order2idx(dev, order), 1);
691 if (err && err != -EAGAIN) {
692 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
693 break;
694 }
695 }
696
697 if (!mr)
698 return ERR_PTR(-EAGAIN);
699
700 err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
701 &dma);
702 if (err)
703 goto free_mr;
704
705 mlx5_ib_init_umr_context(&umr_context);
706
707 umrwr.wr.wr_cqe = &umr_context.cqe;
708 prep_umr_reg_wqe(pd, &umrwr, &sg, dma, npages, mr->mmkey.key,
709 page_shift, virt_addr, len, access_flags);
710
711 down(&umrc->sem);
712 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
713 if (err) {
714 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
715 goto unmap_dma;
716 } else {
717 wait_for_completion(&umr_context.done);
718 if (umr_context.status != IB_WC_SUCCESS) {
719 mlx5_ib_warn(dev, "reg umr failed\n");
720 err = -EFAULT;
721 }
722 }
723
724 mr->mmkey.iova = virt_addr;
725 mr->mmkey.size = len;
726 mr->mmkey.pd = to_mpd(pd)->pdn;
727
728 mr->live = 1;
729
730 unmap_dma:
731 up(&umrc->sem);
732 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
733
734 kfree(mr_pas);
735
736 free_mr:
737 if (err) {
738 free_cached_mr(dev, mr);
739 return ERR_PTR(err);
740 }
741
742 return mr;
743 }
744
745 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
746 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
747 int zap)
748 {
749 struct mlx5_ib_dev *dev = mr->dev;
750 struct device *ddev = dev->ib_dev.dma_device;
751 struct umr_common *umrc = &dev->umrc;
752 struct mlx5_ib_umr_context umr_context;
753 struct ib_umem *umem = mr->umem;
754 int size;
755 __be64 *pas;
756 dma_addr_t dma;
757 const struct ib_send_wr *bad;
758 struct mlx5_umr_wr wr;
759 struct ib_sge sg;
760 int err = 0;
761 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
762 const int page_index_mask = page_index_alignment - 1;
763 size_t pages_mapped = 0;
764 size_t pages_to_map = 0;
765 size_t pages_iter = 0;
766 int use_emergency_buf = 0;
767
768 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
769 * so we need to align the offset and length accordingly */
770 if (start_page_index & page_index_mask) {
771 npages += start_page_index & page_index_mask;
772 start_page_index &= ~page_index_mask;
773 }
774
775 pages_to_map = ALIGN(npages, page_index_alignment);
776
777 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
778 return -EINVAL;
779
780 size = sizeof(u64) * pages_to_map;
781 size = min_t(int, PAGE_SIZE, size);
782 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
783 * code, when we are called from an invalidation. The pas buffer must
784 * be 2k-aligned for Connect-IB. */
785 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
786 if (!pas) {
787 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
788 pas = mlx5_ib_update_mtt_emergency_buffer;
789 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
790 use_emergency_buf = 1;
791 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
792 memset(pas, 0, size);
793 }
794 pages_iter = size / sizeof(u64);
795 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
796 if (dma_mapping_error(ddev, dma)) {
797 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
798 err = -ENOMEM;
799 goto free_pas;
800 }
801
802 for (pages_mapped = 0;
803 pages_mapped < pages_to_map && !err;
804 pages_mapped += pages_iter, start_page_index += pages_iter) {
805 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
806
807 npages = min_t(size_t,
808 pages_iter,
809 ib_umem_num_pages(umem) - start_page_index);
810
811 if (!zap) {
812 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
813 start_page_index, npages, pas,
814 MLX5_IB_MTT_PRESENT);
815 /* Clear padding after the pages brought from the
816 * umem. */
817 memset(pas + npages, 0, size - npages * sizeof(u64));
818 }
819
820 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
821
822 mlx5_ib_init_umr_context(&umr_context);
823
824 memset(&wr, 0, sizeof(wr));
825 wr.wr.wr_cqe = &umr_context.cqe;
826
827 sg.addr = dma;
828 sg.length = ALIGN(npages * sizeof(u64),
829 MLX5_UMR_MTT_ALIGNMENT);
830 sg.lkey = dev->umrc.pd->local_dma_lkey;
831
832 wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
833 MLX5_IB_SEND_UMR_UPDATE_MTT;
834 wr.wr.sg_list = &sg;
835 wr.wr.num_sge = 1;
836 wr.wr.opcode = MLX5_IB_WR_UMR;
837 wr.npages = sg.length / sizeof(u64);
838 wr.page_shift = PAGE_SHIFT;
839 wr.mkey = mr->mmkey.key;
840 wr.target.offset = start_page_index;
841
842 down(&umrc->sem);
843 err = ib_post_send(umrc->qp, &wr.wr, &bad);
844 if (err) {
845 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
846 } else {
847 wait_for_completion(&umr_context.done);
848 if (umr_context.status != IB_WC_SUCCESS) {
849 mlx5_ib_err(dev, "UMR completion failed, code %d\n",
850 umr_context.status);
851 err = -EFAULT;
852 }
853 }
854 up(&umrc->sem);
855 }
856 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
857
858 free_pas:
859 if (!use_emergency_buf)
860 free_page((unsigned long)pas);
861 else
862 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
863
864 return err;
865 }
866 #endif
867
868 /*
869 * If ibmr is NULL it will be allocated by reg_create.
870 * Else, the given ibmr will be used.
871 */
872 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
873 u64 virt_addr, u64 length,
874 struct ib_umem *umem, int npages,
875 int page_shift, int access_flags)
876 {
877 struct mlx5_ib_dev *dev = to_mdev(pd->device);
878 struct mlx5_ib_mr *mr;
879 __be64 *pas;
880 void *mkc;
881 int inlen;
882 u32 *in;
883 int err;
884 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
885
886 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
887 if (!mr)
888 return ERR_PTR(-ENOMEM);
889
890 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
891 sizeof(*pas) * ((npages + 1) / 2) * 2;
892 in = mlx5_vzalloc(inlen);
893 if (!in) {
894 err = -ENOMEM;
895 goto err_1;
896 }
897 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
898 mlx5_ib_populate_pas(dev, umem, page_shift, pas,
899 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
900
901 /* The pg_access bit allows setting the access flags
902 * in the page list submitted with the command. */
903 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
904
905 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
906 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT);
907 MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
908 MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
909 MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
910 MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
911 MLX5_SET(mkc, mkc, lr, 1);
912
913 MLX5_SET64(mkc, mkc, start_addr, virt_addr);
914 MLX5_SET64(mkc, mkc, len, length);
915 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
916 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
917 MLX5_SET(mkc, mkc, translations_octword_size,
918 get_octo_len(virt_addr, length, 1 << page_shift));
919 MLX5_SET(mkc, mkc, log_page_size, page_shift);
920 MLX5_SET(mkc, mkc, qpn, 0xffffff);
921 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
922 get_octo_len(virt_addr, length, 1 << page_shift));
923
924 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
925 if (err) {
926 mlx5_ib_warn(dev, "create mkey failed\n");
927 goto err_2;
928 }
929 mr->umem = umem;
930 mr->dev = dev;
931 mr->live = 1;
932 kvfree(in);
933
934 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
935
936 return mr;
937
938 err_2:
939 kvfree(in);
940
941 err_1:
942 if (!ibmr)
943 kfree(mr);
944
945 return ERR_PTR(err);
946 }
947
948 static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
949 int npages, u64 length, int access_flags)
950 {
951 mr->npages = npages;
952 atomic_add(npages, &dev->mdev->priv.reg_pages);
953 mr->ibmr.lkey = mr->mmkey.key;
954 mr->ibmr.rkey = mr->mmkey.key;
955 mr->ibmr.length = length;
956 mr->access_flags = access_flags;
957 }
958
959 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
960 u64 virt_addr, int access_flags,
961 struct ib_udata *udata)
962 {
963 struct mlx5_ib_dev *dev = to_mdev(pd->device);
964 struct mlx5_ib_mr *mr = NULL;
965 struct ib_umem *umem;
966 int page_shift;
967 int npages;
968 int ncont;
969 int order;
970 int err;
971
972 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
973 (long long)start, (long long)virt_addr, (long long)length, access_flags);
974 umem = mr_umem_get(pd, start, length, access_flags, &npages,
975 &page_shift, &ncont, &order);
976
977 if (IS_ERR(umem))
978 return (void *)umem;
979
980 if (use_umr(order)) {
981 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
982 order, access_flags);
983 if (PTR_ERR(mr) == -EAGAIN) {
984 mlx5_ib_dbg(dev, "cache empty for order %d", order);
985 mr = NULL;
986 }
987 } else if (access_flags & IB_ACCESS_ON_DEMAND) {
988 err = -EINVAL;
989 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
990 goto error;
991 }
992
993 if (!mr) {
994 mutex_lock(&dev->slow_path_mutex);
995 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
996 page_shift, access_flags);
997 mutex_unlock(&dev->slow_path_mutex);
998 }
999
1000 if (IS_ERR(mr)) {
1001 err = PTR_ERR(mr);
1002 goto error;
1003 }
1004
1005 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1006
1007 mr->umem = umem;
1008 set_mr_fileds(dev, mr, npages, length, access_flags);
1009
1010 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1011 update_odp_mr(mr);
1012 #endif
1013
1014 return &mr->ibmr;
1015
1016 error:
1017 ib_umem_release(umem);
1018 return ERR_PTR(err);
1019 }
1020
1021 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1022 {
1023 struct mlx5_core_dev *mdev = dev->mdev;
1024 struct umr_common *umrc = &dev->umrc;
1025 struct mlx5_ib_umr_context umr_context;
1026 struct mlx5_umr_wr umrwr = {};
1027 const struct ib_send_wr *bad;
1028 int err;
1029
1030 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1031 return 0;
1032
1033 mlx5_ib_init_umr_context(&umr_context);
1034
1035 umrwr.wr.wr_cqe = &umr_context.cqe;
1036 prep_umr_unreg_wqe(dev, &umrwr, mr->mmkey.key);
1037
1038 down(&umrc->sem);
1039 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1040 if (err) {
1041 up(&umrc->sem);
1042 mlx5_ib_dbg(dev, "err %d\n", err);
1043 goto error;
1044 } else {
1045 wait_for_completion(&umr_context.done);
1046 up(&umrc->sem);
1047 }
1048 if (umr_context.status != IB_WC_SUCCESS) {
1049 mlx5_ib_warn(dev, "unreg umr failed\n");
1050 err = -EFAULT;
1051 goto error;
1052 }
1053 return 0;
1054
1055 error:
1056 return err;
1057 }
1058
1059 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
1060 u64 length, int npages, int page_shift, int order,
1061 int access_flags, int flags)
1062 {
1063 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1064 struct device *ddev = dev->ib_dev.dma_device;
1065 struct mlx5_ib_umr_context umr_context;
1066 const struct ib_send_wr *bad;
1067 struct mlx5_umr_wr umrwr = {};
1068 struct ib_sge sg;
1069 struct umr_common *umrc = &dev->umrc;
1070 dma_addr_t dma = 0;
1071 __be64 *mr_pas = NULL;
1072 int size;
1073 int err;
1074
1075 mlx5_ib_init_umr_context(&umr_context);
1076
1077 umrwr.wr.wr_cqe = &umr_context.cqe;
1078 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1079
1080 if (flags & IB_MR_REREG_TRANS) {
1081 err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
1082 &mr_pas, &dma);
1083 if (err)
1084 return err;
1085
1086 umrwr.target.virt_addr = virt_addr;
1087 umrwr.length = length;
1088 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1089 }
1090
1091 prep_umr_wqe_common(pd, &umrwr, &sg, dma, npages, mr->mmkey.key,
1092 page_shift);
1093
1094 if (flags & IB_MR_REREG_PD) {
1095 umrwr.pd = pd;
1096 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
1097 }
1098
1099 if (flags & IB_MR_REREG_ACCESS) {
1100 umrwr.access_flags = access_flags;
1101 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
1102 }
1103
1104 /* post send request to UMR QP */
1105 down(&umrc->sem);
1106 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1107
1108 if (err) {
1109 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
1110 } else {
1111 wait_for_completion(&umr_context.done);
1112 if (umr_context.status != IB_WC_SUCCESS) {
1113 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
1114 umr_context.status);
1115 err = -EFAULT;
1116 }
1117 }
1118
1119 up(&umrc->sem);
1120 if (flags & IB_MR_REREG_TRANS) {
1121 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1122 kfree(mr_pas);
1123 }
1124 return err;
1125 }
1126
1127 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1128 u64 length, u64 virt_addr, int new_access_flags,
1129 struct ib_pd *new_pd, struct ib_udata *udata)
1130 {
1131 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1132 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1133 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1134 int access_flags = flags & IB_MR_REREG_ACCESS ?
1135 new_access_flags :
1136 mr->access_flags;
1137 u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
1138 u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
1139 int page_shift = 0;
1140 int npages = 0;
1141 int ncont = 0;
1142 int order = 0;
1143 int err;
1144
1145 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1146 (long long)start, (long long)virt_addr, (long long)length, access_flags);
1147
1148 if (flags != IB_MR_REREG_PD) {
1149 /*
1150 * Replace umem. This needs to be done whether or not UMR is
1151 * used.
1152 */
1153 flags |= IB_MR_REREG_TRANS;
1154 ib_umem_release(mr->umem);
1155 mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
1156 &page_shift, &ncont, &order);
1157 if (IS_ERR(mr->umem)) {
1158 err = PTR_ERR(mr->umem);
1159 mr->umem = NULL;
1160 return err;
1161 }
1162 }
1163
1164 if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
1165 /*
1166 * UMR can't be used - MKey needs to be replaced.
1167 */
1168 if (mr->umred) {
1169 err = unreg_umr(dev, mr);
1170 if (err)
1171 mlx5_ib_warn(dev, "Failed to unregister MR\n");
1172 } else {
1173 err = destroy_mkey(dev, mr);
1174 if (err)
1175 mlx5_ib_warn(dev, "Failed to destroy MKey\n");
1176 }
1177 if (err)
1178 return err;
1179
1180 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1181 page_shift, access_flags);
1182
1183 if (IS_ERR(mr))
1184 return PTR_ERR(mr);
1185
1186 mr->umred = 0;
1187 } else {
1188 /*
1189 * Send a UMR WQE
1190 */
1191 err = rereg_umr(pd, mr, addr, len, npages, page_shift,
1192 order, access_flags, flags);
1193 if (err) {
1194 mlx5_ib_warn(dev, "Failed to rereg UMR\n");
1195 return err;
1196 }
1197 }
1198
1199 if (flags & IB_MR_REREG_PD) {
1200 ib_mr->pd = pd;
1201 mr->mmkey.pd = to_mpd(pd)->pdn;
1202 }
1203
1204 if (flags & IB_MR_REREG_ACCESS)
1205 mr->access_flags = access_flags;
1206
1207 if (flags & IB_MR_REREG_TRANS) {
1208 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1209 set_mr_fileds(dev, mr, npages, len, access_flags);
1210 mr->mmkey.iova = addr;
1211 mr->mmkey.size = len;
1212 }
1213 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1214 update_odp_mr(mr);
1215 #endif
1216
1217 return 0;
1218 }
1219
1220 static int
1221 mlx5_alloc_priv_descs(struct ib_device *device,
1222 struct mlx5_ib_mr *mr,
1223 int ndescs,
1224 int desc_size)
1225 {
1226 int size = ndescs * desc_size;
1227 int add_size;
1228 int ret;
1229
1230 add_size = max_t(int, MLX5_UMR_ALIGN - 1, 0);
1231
1232 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1233 if (!mr->descs_alloc)
1234 return -ENOMEM;
1235
1236 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1237
1238 mr->desc_map = dma_map_single(device->dma_device, mr->descs,
1239 size, DMA_TO_DEVICE);
1240 if (dma_mapping_error(device->dma_device, mr->desc_map)) {
1241 ret = -ENOMEM;
1242 goto err;
1243 }
1244
1245 return 0;
1246 err:
1247 kfree(mr->descs_alloc);
1248
1249 return ret;
1250 }
1251
1252 static void
1253 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1254 {
1255 if (mr->descs) {
1256 struct ib_device *device = mr->ibmr.device;
1257 int size = mr->max_descs * mr->desc_size;
1258
1259 dma_unmap_single(device->dma_device, mr->desc_map,
1260 size, DMA_TO_DEVICE);
1261 kfree(mr->descs_alloc);
1262 mr->descs = NULL;
1263 }
1264 }
1265
1266 static int clean_mr(struct mlx5_ib_mr *mr)
1267 {
1268 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1269 int umred = mr->umred;
1270 int err;
1271
1272 if (mr->sig) {
1273 if (mlx5_core_destroy_psv(dev->mdev,
1274 mr->sig->psv_memory.psv_idx))
1275 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1276 mr->sig->psv_memory.psv_idx);
1277 if (mlx5_core_destroy_psv(dev->mdev,
1278 mr->sig->psv_wire.psv_idx))
1279 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1280 mr->sig->psv_wire.psv_idx);
1281 kfree(mr->sig);
1282 mr->sig = NULL;
1283 }
1284
1285 mlx5_free_priv_descs(mr);
1286
1287 if (!umred) {
1288 u32 key = mr->mmkey.key;
1289
1290 err = destroy_mkey(dev, mr);
1291 kfree(mr);
1292 if (err) {
1293 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1294 key, err);
1295 return err;
1296 }
1297 } else {
1298 err = unreg_umr(dev, mr);
1299 if (err) {
1300 mlx5_ib_warn(dev, "failed unregister\n");
1301 return err;
1302 }
1303 free_cached_mr(dev, mr);
1304 }
1305
1306 return 0;
1307 }
1308
1309 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1310 {
1311 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1312 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1313 int npages = mr->npages;
1314 struct ib_umem *umem = mr->umem;
1315
1316 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1317 if (umem && umem->odp_data) {
1318 /* Prevent new page faults from succeeding */
1319 mr->live = 0;
1320 /* Wait for all running page-fault handlers to finish. */
1321 synchronize_srcu(&dev->mr_srcu);
1322 /* Destroy all page mappings */
1323 mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1324 ib_umem_end(umem));
1325 /*
1326 * We kill the umem before the MR for ODP,
1327 * so that there will not be any invalidations in
1328 * flight, looking at the *mr struct.
1329 */
1330 ib_umem_release(umem);
1331 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1332
1333 /* Avoid double-freeing the umem. */
1334 umem = NULL;
1335 }
1336 #endif
1337
1338 clean_mr(mr);
1339
1340 if (umem) {
1341 ib_umem_release(umem);
1342 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1343 }
1344
1345 return 0;
1346 }
1347
1348 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1349 enum ib_mr_type mr_type,
1350 u32 max_num_sg, struct ib_udata *udata)
1351 {
1352 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1353 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1354 int ndescs = ALIGN(max_num_sg, 4);
1355 struct mlx5_ib_mr *mr;
1356 void *mkc;
1357 u32 *in;
1358 int err;
1359
1360 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1361 if (!mr)
1362 return ERR_PTR(-ENOMEM);
1363
1364 in = kzalloc(inlen, GFP_KERNEL);
1365 if (!in) {
1366 err = -ENOMEM;
1367 goto err_free;
1368 }
1369
1370 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1371 MLX5_SET(mkc, mkc, free, 1);
1372 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1373 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1374 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1375
1376 if (mr_type == IB_MR_TYPE_MEM_REG) {
1377 mr->access_mode = MLX5_ACCESS_MODE_MTT;
1378 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
1379 err = mlx5_alloc_priv_descs(pd->device, mr,
1380 ndescs, sizeof(u64));
1381 if (err)
1382 goto err_free_in;
1383
1384 mr->desc_size = sizeof(u64);
1385 mr->max_descs = ndescs;
1386 } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
1387 mr->access_mode = MLX5_ACCESS_MODE_KLM;
1388
1389 err = mlx5_alloc_priv_descs(pd->device, mr,
1390 ndescs, sizeof(struct mlx5_klm));
1391 if (err)
1392 goto err_free_in;
1393 mr->desc_size = sizeof(struct mlx5_klm);
1394 mr->max_descs = ndescs;
1395 } else if (mr_type == IB_MR_TYPE_INTEGRITY) {
1396 u32 psv_index[2];
1397
1398 MLX5_SET(mkc, mkc, bsf_en, 1);
1399 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1400 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1401 if (!mr->sig) {
1402 err = -ENOMEM;
1403 goto err_free_in;
1404 }
1405
1406 /* create mem & wire PSVs */
1407 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1408 2, psv_index);
1409 if (err)
1410 goto err_free_sig;
1411
1412 mr->access_mode = MLX5_ACCESS_MODE_KLM;
1413 mr->sig->psv_memory.psv_idx = psv_index[0];
1414 mr->sig->psv_wire.psv_idx = psv_index[1];
1415
1416 mr->sig->sig_status_checked = true;
1417 mr->sig->sig_err_exists = false;
1418 /* Next UMR, Arm SIGERR */
1419 ++mr->sig->sigerr_count;
1420 } else {
1421 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1422 err = -EINVAL;
1423 goto err_free_in;
1424 }
1425
1426 MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
1427 MLX5_SET(mkc, mkc, umr_en, 1);
1428
1429 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1430 if (err)
1431 goto err_destroy_psv;
1432
1433 mr->ibmr.lkey = mr->mmkey.key;
1434 mr->ibmr.rkey = mr->mmkey.key;
1435 mr->umem = NULL;
1436 kfree(in);
1437
1438 return &mr->ibmr;
1439
1440 err_destroy_psv:
1441 if (mr->sig) {
1442 if (mlx5_core_destroy_psv(dev->mdev,
1443 mr->sig->psv_memory.psv_idx))
1444 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1445 mr->sig->psv_memory.psv_idx);
1446 if (mlx5_core_destroy_psv(dev->mdev,
1447 mr->sig->psv_wire.psv_idx))
1448 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1449 mr->sig->psv_wire.psv_idx);
1450 }
1451 mlx5_free_priv_descs(mr);
1452 err_free_sig:
1453 kfree(mr->sig);
1454 err_free_in:
1455 kfree(in);
1456 err_free:
1457 kfree(mr);
1458 return ERR_PTR(err);
1459 }
1460
1461 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1462 struct ib_udata *udata)
1463 {
1464 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1465 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1466 struct mlx5_ib_mw *mw = NULL;
1467 u32 *in = NULL;
1468 void *mkc;
1469 int ndescs;
1470 int err;
1471 struct mlx5_ib_alloc_mw req = {};
1472 struct {
1473 __u32 comp_mask;
1474 __u32 response_length;
1475 } resp = {};
1476
1477 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1478 if (err)
1479 return ERR_PTR(err);
1480
1481 if (req.comp_mask || req.reserved1 || req.reserved2)
1482 return ERR_PTR(-EOPNOTSUPP);
1483
1484 if (udata->inlen > sizeof(req) &&
1485 !ib_is_udata_cleared(udata, sizeof(req),
1486 udata->inlen - sizeof(req)))
1487 return ERR_PTR(-EOPNOTSUPP);
1488
1489 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1490
1491 mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1492 in = kzalloc(inlen, GFP_KERNEL);
1493 if (!mw || !in) {
1494 err = -ENOMEM;
1495 goto free;
1496 }
1497
1498 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1499
1500 MLX5_SET(mkc, mkc, free, 1);
1501 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1502 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1503 MLX5_SET(mkc, mkc, umr_en, 1);
1504 MLX5_SET(mkc, mkc, lr, 1);
1505 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_KLM);
1506 MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
1507 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1508
1509 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
1510 if (err)
1511 goto free;
1512
1513 mw->ibmw.rkey = mw->mmkey.key;
1514
1515 resp.response_length = min(offsetof(typeof(resp), response_length) +
1516 sizeof(resp.response_length), udata->outlen);
1517 if (resp.response_length) {
1518 err = ib_copy_to_udata(udata, &resp, resp.response_length);
1519 if (err) {
1520 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1521 goto free;
1522 }
1523 }
1524
1525 kfree(in);
1526 return &mw->ibmw;
1527
1528 free:
1529 kfree(mw);
1530 kfree(in);
1531 return ERR_PTR(err);
1532 }
1533
1534 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1535 {
1536 struct mlx5_ib_mw *mmw = to_mmw(mw);
1537 int err;
1538
1539 err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
1540 &mmw->mmkey);
1541 if (!err)
1542 kfree(mmw);
1543 return err;
1544 }
1545
1546 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1547 struct ib_mr_status *mr_status)
1548 {
1549 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1550 int ret = 0;
1551
1552 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1553 pr_err("Invalid status check mask\n");
1554 ret = -EINVAL;
1555 goto done;
1556 }
1557
1558 mr_status->fail_status = 0;
1559 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1560 if (!mmr->sig) {
1561 ret = -EINVAL;
1562 pr_err("signature status check requested on a non-signature enabled MR\n");
1563 goto done;
1564 }
1565
1566 mmr->sig->sig_status_checked = true;
1567 if (!mmr->sig->sig_err_exists)
1568 goto done;
1569
1570 if (ibmr->lkey == mmr->sig->err_item.key)
1571 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1572 sizeof(mr_status->sig_err));
1573 else {
1574 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1575 mr_status->sig_err.sig_err_offset = 0;
1576 mr_status->sig_err.key = mmr->sig->err_item.key;
1577 }
1578
1579 mmr->sig->sig_err_exists = false;
1580 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1581 }
1582
1583 done:
1584 return ret;
1585 }
1586
1587 static int
1588 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1589 struct scatterlist *sgl,
1590 unsigned short sg_nents,
1591 unsigned int *sg_offset_p)
1592 {
1593 struct scatterlist *sg = sgl;
1594 struct mlx5_klm *klms = mr->descs;
1595 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1596 u32 lkey = mr->ibmr.pd->local_dma_lkey;
1597 int i;
1598
1599 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
1600 mr->ibmr.length = 0;
1601 mr->ndescs = sg_nents;
1602
1603 for_each_sg(sgl, sg, sg_nents, i) {
1604 if (unlikely(i > mr->max_descs))
1605 break;
1606 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
1607 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
1608 klms[i].key = cpu_to_be32(lkey);
1609 mr->ibmr.length += sg_dma_len(sg);
1610
1611 sg_offset = 0;
1612 }
1613
1614 if (sg_offset_p)
1615 *sg_offset_p = sg_offset;
1616
1617 return i;
1618 }
1619
1620 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1621 {
1622 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1623 __be64 *descs;
1624
1625 if (unlikely(mr->ndescs == mr->max_descs))
1626 return -ENOMEM;
1627
1628 descs = mr->descs;
1629 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1630
1631 return 0;
1632 }
1633
1634 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1635 unsigned int *sg_offset)
1636 {
1637 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1638 int n;
1639
1640 mr->ndescs = 0;
1641
1642 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1643 mr->desc_size * mr->max_descs,
1644 DMA_TO_DEVICE);
1645
1646 if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
1647 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
1648 else
1649 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
1650 mlx5_set_page);
1651
1652 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1653 mr->desc_size * mr->max_descs,
1654 DMA_TO_DEVICE);
1655
1656 return n;
1657 }
Cache object: c0adcb2aa64a66810b472b16cce4a812
|