The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ofed/drivers/infiniband/core/ib_umem.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
    3  *
    4  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
    5  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
    6  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
    7  *
    8  * This software is available to you under a choice of one of two
    9  * licenses.  You may choose to be licensed under the terms of the GNU
   10  * General Public License (GPL) Version 2, available from the file
   11  * COPYING in the main directory of this source tree, or the
   12  * OpenIB.org BSD license below:
   13  *
   14  *     Redistribution and use in source and binary forms, with or
   15  *     without modification, are permitted provided that the following
   16  *     conditions are met:
   17  *
   18  *      - Redistributions of source code must retain the above
   19  *        copyright notice, this list of conditions and the following
   20  *        disclaimer.
   21  *
   22  *      - Redistributions in binary form must reproduce the above
   23  *        copyright notice, this list of conditions and the following
   24  *        disclaimer in the documentation and/or other materials
   25  *        provided with the distribution.
   26  *
   27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   34  * SOFTWARE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #define LINUXKPI_PARAM_PREFIX ibcore_
   41 
   42 #include <linux/mm.h>
   43 #include <linux/dma-mapping.h>
   44 #include <linux/sched.h>
   45 #include <linux/slab.h>
   46 #include <linux/wait.h>
   47 #include <rdma/ib_umem_odp.h>
   48 
   49 #include "uverbs.h"
   50 
   51 #include <sys/priv.h>
   52 
   53 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
   54 {
   55         struct scatterlist *sg;
   56         struct page *page;
   57         int i;
   58 
   59         if (umem->nmap > 0)
   60                 ib_dma_unmap_sg(dev, umem->sg_head.sgl,
   61                                 umem->nmap,
   62                                 DMA_BIDIRECTIONAL);
   63 
   64         for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
   65 
   66                 page = sg_page(sg);
   67                 put_page(page);
   68         }
   69 
   70         sg_free_table(&umem->sg_head);
   71         return;
   72 
   73 }
   74 
   75 /**
   76  * ib_umem_get - Pin and DMA map userspace memory.
   77  *
   78  * If access flags indicate ODP memory, avoid pinning. Instead, stores
   79  * the mm for future page fault handling in conjunction with MMU notifiers.
   80  *
   81  * @context: userspace context to pin memory for
   82  * @addr: userspace virtual address to start at
   83  * @size: length of region to pin
   84  * @access: IB_ACCESS_xxx flags for memory being pinned
   85  * @dmasync: flush in-flight DMA when the memory region is written
   86  */
   87 struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
   88                             size_t size, int access, int dmasync)
   89 {
   90         struct ib_umem *umem;
   91         struct page **page_list;
   92         struct vm_area_struct **vma_list;
   93         unsigned long locked;
   94         unsigned long cur_base;
   95         unsigned long npages;
   96         int ret;
   97         int i;
   98         struct dma_attrs dma_attrs = { 0 };
   99         struct scatterlist *sg, *sg_list_start;
  100         int need_release = 0;
  101         unsigned int gup_flags = FOLL_WRITE;
  102 
  103         if (dmasync)
  104                 dma_attrs.flags |= DMA_ATTR_WRITE_BARRIER;
  105 
  106         if (!size)
  107                 return ERR_PTR(-EINVAL);
  108 
  109         /*
  110          * If the combination of the addr and size requested for this memory
  111          * region causes an integer overflow, return error.
  112          */
  113         if (((addr + size) < addr) ||
  114             PAGE_ALIGN(addr + size) < (addr + size))
  115                 return ERR_PTR(-EINVAL);
  116 
  117         if (priv_check(curthread, PRIV_VM_MLOCK) != 0)
  118                 return ERR_PTR(-EPERM);
  119 
  120         umem = kzalloc(sizeof *umem, GFP_KERNEL);
  121         if (!umem)
  122                 return ERR_PTR(-ENOMEM);
  123 
  124         umem->context   = context;
  125         umem->length    = size;
  126         umem->address   = addr;
  127         umem->page_size = PAGE_SIZE;
  128         umem->pid       = get_pid(task_pid(current));
  129         /*
  130          * We ask for writable memory if any of the following
  131          * access flags are set.  "Local write" and "remote write"
  132          * obviously require write access.  "Remote atomic" can do
  133          * things like fetch and add, which will modify memory, and
  134          * "MW bind" can change permissions by binding a window.
  135          */
  136         umem->writable  = !!(access &
  137                 (IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
  138                  IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
  139 
  140         if (access & IB_ACCESS_ON_DEMAND) {
  141                 ret = ib_umem_odp_get(context, umem);
  142                 if (ret) {
  143                         kfree(umem);
  144                         return ERR_PTR(ret);
  145                 }
  146                 return umem;
  147         }
  148 
  149         umem->odp_data = NULL;
  150 
  151         page_list = (struct page **) __get_free_page(GFP_KERNEL);
  152         if (!page_list) {
  153                 kfree(umem);
  154                 return ERR_PTR(-ENOMEM);
  155         }
  156 
  157         vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
  158 
  159         npages = ib_umem_num_pages(umem);
  160 
  161         down_write(&current->mm->mmap_sem);
  162 
  163         locked     = npages + current->mm->pinned_vm;
  164 
  165         cur_base = addr & PAGE_MASK;
  166 
  167         if (npages == 0 || npages > UINT_MAX) {
  168                 ret = -EINVAL;
  169                 goto out;
  170         }
  171 
  172         ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
  173         if (ret)
  174                 goto out;
  175 
  176         if (!umem->writable)
  177                 gup_flags |= FOLL_FORCE;
  178 
  179         need_release = 1;
  180         sg_list_start = umem->sg_head.sgl;
  181 
  182         while (npages) {
  183                 ret = get_user_pages(cur_base,
  184                                      min_t(unsigned long, npages,
  185                                            PAGE_SIZE / sizeof (struct page *)),
  186                                      gup_flags, page_list, vma_list);
  187 
  188                 if (ret < 0)
  189                         goto out;
  190 
  191                 umem->npages += ret;
  192                 cur_base += ret * PAGE_SIZE;
  193                 npages   -= ret;
  194 
  195                 for_each_sg(sg_list_start, sg, ret, i) {
  196                         sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
  197                 }
  198 
  199                 /* preparing for next loop */
  200                 sg_list_start = sg;
  201         }
  202 
  203         umem->nmap = ib_dma_map_sg_attrs(context->device,
  204                                   umem->sg_head.sgl,
  205                                   umem->npages,
  206                                   DMA_BIDIRECTIONAL,
  207                                   &dma_attrs);
  208 
  209         if (umem->nmap <= 0) {
  210                 ret = -ENOMEM;
  211                 goto out;
  212         }
  213 
  214         ret = 0;
  215 
  216 out:
  217         if (ret < 0) {
  218                 if (need_release)
  219                         __ib_umem_release(context->device, umem, 0);
  220                 put_pid(umem->pid);
  221                 kfree(umem);
  222         } else
  223                 current->mm->pinned_vm = locked;
  224 
  225         up_write(&current->mm->mmap_sem);
  226         if (vma_list)
  227                 free_page((unsigned long) vma_list);
  228         free_page((unsigned long) page_list);
  229 
  230         return ret < 0 ? ERR_PTR(ret) : umem;
  231 }
  232 EXPORT_SYMBOL(ib_umem_get);
  233 
  234 static void ib_umem_account(struct work_struct *work)
  235 {
  236         struct ib_umem *umem = container_of(work, struct ib_umem, work);
  237 
  238         down_write(&umem->mm->mmap_sem);
  239         umem->mm->pinned_vm -= umem->diff;
  240         up_write(&umem->mm->mmap_sem);
  241         mmput(umem->mm);
  242         kfree(umem);
  243 }
  244 
  245 /**
  246  * ib_umem_release - release memory pinned with ib_umem_get
  247  * @umem: umem struct to release
  248  */
  249 void ib_umem_release(struct ib_umem *umem)
  250 {
  251         struct mm_struct *mm;
  252         struct task_struct *task;
  253         unsigned long diff;
  254 
  255         if (!umem)
  256                 return;
  257 
  258         if (umem->odp_data) {
  259                 ib_umem_odp_release(umem);
  260                 return;
  261         }
  262 
  263         __ib_umem_release(umem->context->device, umem, 1);
  264 
  265         task = get_pid_task(umem->pid, PIDTYPE_PID);
  266         put_pid(umem->pid);
  267         if (!task)
  268                 goto out;
  269         mm = get_task_mm(task);
  270         put_task_struct(task);
  271         if (!mm)
  272                 goto out;
  273 
  274         diff = ib_umem_num_pages(umem);
  275 
  276         /*
  277          * We may be called with the mm's mmap_sem already held.  This
  278          * can happen when a userspace munmap() is the call that drops
  279          * the last reference to our file and calls our release
  280          * method.  If there are memory regions to destroy, we'll end
  281          * up here and not be able to take the mmap_sem.  In that case
  282          * we defer the vm_locked accounting to the system workqueue.
  283          */
  284         if (umem->context->closing) {
  285                 if (!down_write_trylock(&mm->mmap_sem)) {
  286                         INIT_WORK(&umem->work, ib_umem_account);
  287                         umem->mm   = mm;
  288                         umem->diff = diff;
  289 
  290                         queue_work(ib_wq, &umem->work);
  291                         return;
  292                 }
  293         } else
  294                 down_write(&mm->mmap_sem);
  295 
  296         mm->pinned_vm -= diff;
  297         up_write(&mm->mmap_sem);
  298         mmput(mm);
  299 out:
  300         kfree(umem);
  301 }
  302 EXPORT_SYMBOL(ib_umem_release);
  303 
  304 int ib_umem_page_count(struct ib_umem *umem)
  305 {
  306         int shift;
  307         int i;
  308         int n;
  309         struct scatterlist *sg;
  310 
  311         if (umem->odp_data)
  312                 return ib_umem_num_pages(umem);
  313 
  314         shift = ilog2(umem->page_size);
  315 
  316         n = 0;
  317         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
  318                 n += sg_dma_len(sg) >> shift;
  319 
  320         return n;
  321 }
  322 EXPORT_SYMBOL(ib_umem_page_count);
  323 
  324 /*
  325  * Copy from the given ib_umem's pages to the given buffer.
  326  *
  327  * umem - the umem to copy from
  328  * offset - offset to start copying from
  329  * dst - destination buffer
  330  * length - buffer length
  331  *
  332  * Returns 0 on success, or an error code.
  333  */
  334 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
  335                       size_t length)
  336 {
  337         size_t end = offset + length;
  338         int ret;
  339 
  340         if (offset > umem->length || length > umem->length - offset) {
  341                 pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
  342                        offset, umem->length, end);
  343                 return -EINVAL;
  344         }
  345 
  346 #ifdef __linux__
  347         ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
  348                                  offset + ib_umem_offset(umem));
  349 #else
  350         ret = 0;
  351 #endif
  352         if (ret < 0)
  353                 return ret;
  354         else if (ret != length)
  355                 return -EINVAL;
  356         else
  357                 return 0;
  358 }
  359 EXPORT_SYMBOL(ib_umem_copy_from);

Cache object: ced23ec96cff42bd2930d447bf3ecf85


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.