The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ofed/drivers/infiniband/core/ib_core_uverbs.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
    3  *
    4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
    5  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
    6  * Copyright 2019 Marvell. All rights reserved.
    7  *
    8  * This software is available to you under a choice of one of two
    9  * licenses.  You may choose to be licensed under the terms of the GNU
   10  * General Public License (GPL) Version 2, available from the file
   11  * COPYING in the main directory of this source tree, or the
   12  * OpenIB.org BSD license below:
   13  *
   14  *     Redistribution and use in source and binary forms, with or
   15  *     without modification, are permitted provided that the following
   16  *     conditions are met:
   17  *
   18  *      - Redistributions of source code must retain the above
   19  *        copyright notice, this list of conditions and the following
   20  *        disclaimer.
   21  *
   22  *      - Redistributions in binary form must reproduce the above
   23  *        copyright notice, this list of conditions and the following
   24  *        disclaimer in the documentation and/or other materials
   25  *        provided with the distribution.
   26  *
   27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   34  * SOFTWARE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include <linux/xarray.h>
   41 #include "uverbs.h"
   42 #include "core_priv.h"
   43 
   44 /**
   45  * rdma_umap_priv_init() - Initialize the private data of a vma
   46  *
   47  * @priv: The already allocated private data
   48  * @vma: The vm area struct that needs private data
   49  * @entry: entry into the mmap_xa that needs to be linked with
   50  *       this vma
   51  *
   52  * Each time we map IO memory into user space this keeps track of the
   53  * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
   54  * to point to the zero page and allow the hot unplug to proceed.
   55  *
   56  * This is necessary for cases like PCI physical hot unplug as the actual BAR
   57  * memory may vanish after this and access to it from userspace could MCE.
   58  *
   59  * RDMA drivers supporting disassociation must have their user space designed
   60  * to cope in some way with their IO pages going to the zero page.
   61  *
   62  */
   63 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
   64                          struct vm_area_struct *vma,
   65                          struct rdma_user_mmap_entry *entry)
   66 {
   67         struct ib_uverbs_file *ufile = vma->vm_file->private_data;
   68 
   69         priv->vma = vma;
   70         if (entry) {
   71                 kref_get(&entry->ref);
   72                 priv->entry = entry;
   73         }
   74         vma->vm_private_data = priv;
   75         /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
   76 
   77         mutex_lock(&ufile->umap_lock);
   78         list_add(&priv->list, &ufile->umaps);
   79         mutex_unlock(&ufile->umap_lock);
   80 }
   81 EXPORT_SYMBOL(rdma_umap_priv_init);
   82 
   83 /**
   84  * rdma_user_mmap_io() - Map IO memory into a process
   85  *
   86  * @ucontext: associated user context
   87  * @vma: the vma related to the current mmap call
   88  * @pfn: pfn to map
   89  * @size: size to map
   90  * @prot: pgprot to use in remap call
   91  * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
   92  *         if mmap_entry is not used by the driver
   93  *
   94  * This is to be called by drivers as part of their mmap() functions if they
   95  * wish to send something like PCI-E BAR memory to userspace.
   96  *
   97  * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
   98  * success.
   99  */
  100 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
  101                       unsigned long pfn, unsigned long size, pgprot_t prot,
  102                       struct rdma_user_mmap_entry *entry)
  103 {
  104         struct ib_uverbs_file *ufile = ucontext->ufile;
  105         struct rdma_umap_priv *priv;
  106 
  107         if (!(vma->vm_flags & VM_SHARED))
  108                 return -EINVAL;
  109 
  110         if (vma->vm_end - vma->vm_start != size)
  111                 return -EINVAL;
  112 
  113         /* Driver is using this wrong, must be called by ib_uverbs_mmap */
  114         if (WARN_ON(!vma->vm_file ||
  115                     vma->vm_file->private_data != ufile))
  116                 return -EINVAL;
  117 
  118         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
  119         if (!priv)
  120                 return -ENOMEM;
  121 
  122         vma->vm_page_prot = prot;
  123         if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
  124                 kfree(priv);
  125                 return -EAGAIN;
  126         }
  127 
  128         rdma_umap_priv_init(priv, vma, entry);
  129         return 0;
  130 }
  131 EXPORT_SYMBOL(rdma_user_mmap_io);
  132 
  133 /**
  134  * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
  135  *
  136  * @ucontext: associated user context
  137  * @pgoff: The mmap offset >> PAGE_SHIFT
  138  *
  139  * This function is called when a user tries to mmap with an offset (returned
  140  * by rdma_user_mmap_get_offset()) it initially received from the driver. The
  141  * rdma_user_mmap_entry was created by the function
  142  * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
  143  * entry so that it won't be deleted from the xarray in the meantime.
  144  *
  145  * Return an reference to an entry if exists or NULL if there is no
  146  * match. rdma_user_mmap_entry_put() must be called to put the reference.
  147  */
  148 struct rdma_user_mmap_entry *
  149 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
  150                                unsigned long pgoff)
  151 {
  152         struct rdma_user_mmap_entry *entry;
  153 
  154         if (pgoff > U32_MAX)
  155                 return NULL;
  156 
  157         xa_lock(&ucontext->mmap_xa);
  158 
  159         entry = xa_load(&ucontext->mmap_xa, pgoff);
  160 
  161         /*
  162          * If refcount is zero, entry is already being deleted, driver_removed
  163          * indicates that the no further mmaps are possible and we waiting for
  164          * the active VMAs to be closed.
  165          */
  166         if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
  167             !kref_get_unless_zero(&entry->ref))
  168                 goto err;
  169 
  170         xa_unlock(&ucontext->mmap_xa);
  171 
  172         return entry;
  173 
  174 err:
  175         xa_unlock(&ucontext->mmap_xa);
  176         return NULL;
  177 }
  178 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
  179 
  180 /**
  181  * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
  182  *
  183  * @ucontext: associated user context
  184  * @vma: the vma being mmap'd into
  185  *
  186  * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
  187  * checks that the VMA is correct.
  188  */
  189 struct rdma_user_mmap_entry *
  190 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
  191                          struct vm_area_struct *vma)
  192 {
  193         struct rdma_user_mmap_entry *entry;
  194 
  195         if (!(vma->vm_flags & VM_SHARED))
  196                 return NULL;
  197         entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
  198         if (!entry)
  199                 return NULL;
  200         if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
  201                 rdma_user_mmap_entry_put(entry);
  202                 return NULL;
  203         }
  204         return entry;
  205 }
  206 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
  207 
  208 static void rdma_user_mmap_entry_free(struct kref *kref)
  209 {
  210         struct rdma_user_mmap_entry *entry =
  211                 container_of(kref, struct rdma_user_mmap_entry, ref);
  212         struct ib_ucontext *ucontext = entry->ucontext;
  213         unsigned long i;
  214 
  215         /*
  216          * Erase all entries occupied by this single entry, this is deferred
  217          * until all VMA are closed so that the mmap offsets remain unique.
  218          */
  219         xa_lock(&ucontext->mmap_xa);
  220         for (i = 0; i < entry->npages; i++)
  221                 __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
  222         xa_unlock(&ucontext->mmap_xa);
  223 
  224         if (ucontext->device->mmap_free)
  225                 ucontext->device->mmap_free(entry);
  226 }
  227 
  228 /**
  229  * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
  230  *
  231  * @entry: an entry in the mmap_xa
  232  *
  233  * This function is called when the mapping is closed if it was
  234  * an io mapping or when the driver is done with the entry for
  235  * some other reason.
  236  * Should be called after rdma_user_mmap_entry_get was called
  237  * and entry is no longer needed. This function will erase the
  238  * entry and free it if its refcnt reaches zero.
  239  */
  240 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
  241 {
  242         kref_put(&entry->ref, rdma_user_mmap_entry_free);
  243 }
  244 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
  245 
  246 /**
  247  * rdma_user_mmap_entry_remove() - Drop reference to entry and
  248  *                                 mark it as unmmapable
  249  *
  250  * @entry: the entry to insert into the mmap_xa
  251  *
  252  * Drivers can call this to prevent userspace from creating more mappings for
  253  * entry, however existing mmaps continue to exist and ops->mmap_free() will
  254  * not be called until all user mmaps are destroyed.
  255  */
  256 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
  257 {
  258         if (!entry)
  259                 return;
  260 
  261         xa_lock(&entry->ucontext->mmap_xa);
  262         entry->driver_removed = true;
  263         xa_unlock(&entry->ucontext->mmap_xa);
  264         kref_put(&entry->ref, rdma_user_mmap_entry_free);
  265 }
  266 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
  267 
  268 /**
  269  * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
  270  *                                       in a given range.
  271  *
  272  * @ucontext: associated user context.
  273  * @entry: the entry to insert into the mmap_xa
  274  * @length: length of the address that will be mmapped
  275  * @min_pgoff: minimum pgoff to be returned
  276  * @max_pgoff: maximum pgoff to be returned
  277  *
  278  * This function should be called by drivers that use the rdma_user_mmap
  279  * interface for implementing their mmap syscall A database of mmap offsets is
  280  * handled in the core and helper functions are provided to insert entries
  281  * into the database and extract entries when the user calls mmap with the
  282  * given offset. The function allocates a unique page offset in a given range
  283  * that should be provided to user, the user will use the offset to retrieve
  284  * information such as address to be mapped and how.
  285  *
  286  * Return: 0 on success and -ENOMEM on failure
  287  */
  288 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
  289                                       struct rdma_user_mmap_entry *entry,
  290                                       size_t length, u32 min_pgoff,
  291                                       u32 max_pgoff)
  292 {
  293         struct ib_uverbs_file *ufile = ucontext->ufile;
  294         u32 xa_first, xa_last, npages;
  295         int err;
  296         u32 i;
  297         u32 j;
  298 
  299         if (!entry)
  300                 return -EINVAL;
  301 
  302         kref_init(&entry->ref);
  303         entry->ucontext = ucontext;
  304 
  305         /*
  306          * We want the whole allocation to be done without interruption from a
  307          * different thread. The allocation requires finding a free range and
  308          * storing. During the xa_insert the lock could be released, possibly
  309          * allowing another thread to choose the same range.
  310          */
  311         mutex_lock(&ufile->umap_lock);
  312 
  313         xa_lock(&ucontext->mmap_xa);
  314 
  315         /* We want to find an empty range */
  316         npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
  317         entry->npages = npages;
  318 
  319         /* Find an empty range */
  320         for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) {
  321                 if (xa_load(&ucontext->mmap_xa, i + j) != NULL) {
  322                         if (unlikely(i + j == max_pgoff))
  323                                 break;
  324                         i = i + j + 1;
  325                         j = 0;
  326                 } else {
  327                         if (unlikely(i + j == max_pgoff))
  328                                 break;
  329                         j++;
  330                 }
  331         }
  332 
  333         if (j != npages)
  334                 goto err_unlock;
  335 
  336         xa_first = i;
  337         xa_last = i + j;
  338 
  339         for (i = xa_first; i < xa_last; i++) {
  340                 err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
  341                 if (err)
  342                         goto err_undo;
  343         }
  344 
  345         /*
  346          * Internally the kernel uses a page offset, in libc this is a byte
  347          * offset. Drivers should not return pgoff to userspace.
  348          */
  349         entry->start_pgoff = xa_first;
  350         xa_unlock(&ucontext->mmap_xa);
  351         mutex_unlock(&ufile->umap_lock);
  352 
  353         return 0;
  354 
  355 err_undo:
  356         for (; i > xa_first; i--)
  357                 __xa_erase(&ucontext->mmap_xa, i - 1);
  358 
  359 err_unlock:
  360         xa_unlock(&ucontext->mmap_xa);
  361         mutex_unlock(&ufile->umap_lock);
  362         return -ENOMEM;
  363 }
  364 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
  365 
  366 /**
  367  * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
  368  *
  369  * @ucontext: associated user context.
  370  * @entry: the entry to insert into the mmap_xa
  371  * @length: length of the address that will be mmapped
  372  *
  373  * This function should be called by drivers that use the rdma_user_mmap
  374  * interface for handling user mmapped addresses. The database is handled in
  375  * the core and helper functions are provided to insert entries into the
  376  * database and extract entries when the user calls mmap with the given offset.
  377  * The function allocates a unique page offset that should be provided to user,
  378  * the user will use the offset to retrieve information such as address to
  379  * be mapped and how.
  380  *
  381  * Return: 0 on success and -ENOMEM on failure
  382  */
  383 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
  384                                 struct rdma_user_mmap_entry *entry,
  385                                 size_t length)
  386 {
  387         return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
  388                                                  U32_MAX);
  389 }
  390 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);

Cache object: 54f91a81454489620ebfdd08fc6846af


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.