1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
6 * Copyright 2019 Marvell. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include <linux/xarray.h>
41 #include "uverbs.h"
42 #include "core_priv.h"
43
44 /**
45 * rdma_umap_priv_init() - Initialize the private data of a vma
46 *
47 * @priv: The already allocated private data
48 * @vma: The vm area struct that needs private data
49 * @entry: entry into the mmap_xa that needs to be linked with
50 * this vma
51 *
52 * Each time we map IO memory into user space this keeps track of the
53 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
54 * to point to the zero page and allow the hot unplug to proceed.
55 *
56 * This is necessary for cases like PCI physical hot unplug as the actual BAR
57 * memory may vanish after this and access to it from userspace could MCE.
58 *
59 * RDMA drivers supporting disassociation must have their user space designed
60 * to cope in some way with their IO pages going to the zero page.
61 *
62 */
63 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
64 struct vm_area_struct *vma,
65 struct rdma_user_mmap_entry *entry)
66 {
67 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
68
69 priv->vma = vma;
70 if (entry) {
71 kref_get(&entry->ref);
72 priv->entry = entry;
73 }
74 vma->vm_private_data = priv;
75 /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
76
77 mutex_lock(&ufile->umap_lock);
78 list_add(&priv->list, &ufile->umaps);
79 mutex_unlock(&ufile->umap_lock);
80 }
81 EXPORT_SYMBOL(rdma_umap_priv_init);
82
83 /**
84 * rdma_user_mmap_io() - Map IO memory into a process
85 *
86 * @ucontext: associated user context
87 * @vma: the vma related to the current mmap call
88 * @pfn: pfn to map
89 * @size: size to map
90 * @prot: pgprot to use in remap call
91 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
92 * if mmap_entry is not used by the driver
93 *
94 * This is to be called by drivers as part of their mmap() functions if they
95 * wish to send something like PCI-E BAR memory to userspace.
96 *
97 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
98 * success.
99 */
100 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
101 unsigned long pfn, unsigned long size, pgprot_t prot,
102 struct rdma_user_mmap_entry *entry)
103 {
104 struct ib_uverbs_file *ufile = ucontext->ufile;
105 struct rdma_umap_priv *priv;
106
107 if (!(vma->vm_flags & VM_SHARED))
108 return -EINVAL;
109
110 if (vma->vm_end - vma->vm_start != size)
111 return -EINVAL;
112
113 /* Driver is using this wrong, must be called by ib_uverbs_mmap */
114 if (WARN_ON(!vma->vm_file ||
115 vma->vm_file->private_data != ufile))
116 return -EINVAL;
117
118 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
119 if (!priv)
120 return -ENOMEM;
121
122 vma->vm_page_prot = prot;
123 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
124 kfree(priv);
125 return -EAGAIN;
126 }
127
128 rdma_umap_priv_init(priv, vma, entry);
129 return 0;
130 }
131 EXPORT_SYMBOL(rdma_user_mmap_io);
132
133 /**
134 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
135 *
136 * @ucontext: associated user context
137 * @pgoff: The mmap offset >> PAGE_SHIFT
138 *
139 * This function is called when a user tries to mmap with an offset (returned
140 * by rdma_user_mmap_get_offset()) it initially received from the driver. The
141 * rdma_user_mmap_entry was created by the function
142 * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
143 * entry so that it won't be deleted from the xarray in the meantime.
144 *
145 * Return an reference to an entry if exists or NULL if there is no
146 * match. rdma_user_mmap_entry_put() must be called to put the reference.
147 */
148 struct rdma_user_mmap_entry *
149 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
150 unsigned long pgoff)
151 {
152 struct rdma_user_mmap_entry *entry;
153
154 if (pgoff > U32_MAX)
155 return NULL;
156
157 xa_lock(&ucontext->mmap_xa);
158
159 entry = xa_load(&ucontext->mmap_xa, pgoff);
160
161 /*
162 * If refcount is zero, entry is already being deleted, driver_removed
163 * indicates that the no further mmaps are possible and we waiting for
164 * the active VMAs to be closed.
165 */
166 if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
167 !kref_get_unless_zero(&entry->ref))
168 goto err;
169
170 xa_unlock(&ucontext->mmap_xa);
171
172 return entry;
173
174 err:
175 xa_unlock(&ucontext->mmap_xa);
176 return NULL;
177 }
178 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
179
180 /**
181 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
182 *
183 * @ucontext: associated user context
184 * @vma: the vma being mmap'd into
185 *
186 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
187 * checks that the VMA is correct.
188 */
189 struct rdma_user_mmap_entry *
190 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
191 struct vm_area_struct *vma)
192 {
193 struct rdma_user_mmap_entry *entry;
194
195 if (!(vma->vm_flags & VM_SHARED))
196 return NULL;
197 entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
198 if (!entry)
199 return NULL;
200 if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
201 rdma_user_mmap_entry_put(entry);
202 return NULL;
203 }
204 return entry;
205 }
206 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
207
208 static void rdma_user_mmap_entry_free(struct kref *kref)
209 {
210 struct rdma_user_mmap_entry *entry =
211 container_of(kref, struct rdma_user_mmap_entry, ref);
212 struct ib_ucontext *ucontext = entry->ucontext;
213 unsigned long i;
214
215 /*
216 * Erase all entries occupied by this single entry, this is deferred
217 * until all VMA are closed so that the mmap offsets remain unique.
218 */
219 xa_lock(&ucontext->mmap_xa);
220 for (i = 0; i < entry->npages; i++)
221 __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
222 xa_unlock(&ucontext->mmap_xa);
223
224 if (ucontext->device->mmap_free)
225 ucontext->device->mmap_free(entry);
226 }
227
228 /**
229 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
230 *
231 * @entry: an entry in the mmap_xa
232 *
233 * This function is called when the mapping is closed if it was
234 * an io mapping or when the driver is done with the entry for
235 * some other reason.
236 * Should be called after rdma_user_mmap_entry_get was called
237 * and entry is no longer needed. This function will erase the
238 * entry and free it if its refcnt reaches zero.
239 */
240 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
241 {
242 kref_put(&entry->ref, rdma_user_mmap_entry_free);
243 }
244 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
245
246 /**
247 * rdma_user_mmap_entry_remove() - Drop reference to entry and
248 * mark it as unmmapable
249 *
250 * @entry: the entry to insert into the mmap_xa
251 *
252 * Drivers can call this to prevent userspace from creating more mappings for
253 * entry, however existing mmaps continue to exist and ops->mmap_free() will
254 * not be called until all user mmaps are destroyed.
255 */
256 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
257 {
258 if (!entry)
259 return;
260
261 xa_lock(&entry->ucontext->mmap_xa);
262 entry->driver_removed = true;
263 xa_unlock(&entry->ucontext->mmap_xa);
264 kref_put(&entry->ref, rdma_user_mmap_entry_free);
265 }
266 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
267
268 /**
269 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
270 * in a given range.
271 *
272 * @ucontext: associated user context.
273 * @entry: the entry to insert into the mmap_xa
274 * @length: length of the address that will be mmapped
275 * @min_pgoff: minimum pgoff to be returned
276 * @max_pgoff: maximum pgoff to be returned
277 *
278 * This function should be called by drivers that use the rdma_user_mmap
279 * interface for implementing their mmap syscall A database of mmap offsets is
280 * handled in the core and helper functions are provided to insert entries
281 * into the database and extract entries when the user calls mmap with the
282 * given offset. The function allocates a unique page offset in a given range
283 * that should be provided to user, the user will use the offset to retrieve
284 * information such as address to be mapped and how.
285 *
286 * Return: 0 on success and -ENOMEM on failure
287 */
288 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
289 struct rdma_user_mmap_entry *entry,
290 size_t length, u32 min_pgoff,
291 u32 max_pgoff)
292 {
293 struct ib_uverbs_file *ufile = ucontext->ufile;
294 u32 xa_first, xa_last, npages;
295 int err;
296 u32 i;
297 u32 j;
298
299 if (!entry)
300 return -EINVAL;
301
302 kref_init(&entry->ref);
303 entry->ucontext = ucontext;
304
305 /*
306 * We want the whole allocation to be done without interruption from a
307 * different thread. The allocation requires finding a free range and
308 * storing. During the xa_insert the lock could be released, possibly
309 * allowing another thread to choose the same range.
310 */
311 mutex_lock(&ufile->umap_lock);
312
313 xa_lock(&ucontext->mmap_xa);
314
315 /* We want to find an empty range */
316 npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
317 entry->npages = npages;
318
319 /* Find an empty range */
320 for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) {
321 if (xa_load(&ucontext->mmap_xa, i + j) != NULL) {
322 if (unlikely(i + j == max_pgoff))
323 break;
324 i = i + j + 1;
325 j = 0;
326 } else {
327 if (unlikely(i + j == max_pgoff))
328 break;
329 j++;
330 }
331 }
332
333 if (j != npages)
334 goto err_unlock;
335
336 xa_first = i;
337 xa_last = i + j;
338
339 for (i = xa_first; i < xa_last; i++) {
340 err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
341 if (err)
342 goto err_undo;
343 }
344
345 /*
346 * Internally the kernel uses a page offset, in libc this is a byte
347 * offset. Drivers should not return pgoff to userspace.
348 */
349 entry->start_pgoff = xa_first;
350 xa_unlock(&ucontext->mmap_xa);
351 mutex_unlock(&ufile->umap_lock);
352
353 return 0;
354
355 err_undo:
356 for (; i > xa_first; i--)
357 __xa_erase(&ucontext->mmap_xa, i - 1);
358
359 err_unlock:
360 xa_unlock(&ucontext->mmap_xa);
361 mutex_unlock(&ufile->umap_lock);
362 return -ENOMEM;
363 }
364 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
365
366 /**
367 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
368 *
369 * @ucontext: associated user context.
370 * @entry: the entry to insert into the mmap_xa
371 * @length: length of the address that will be mmapped
372 *
373 * This function should be called by drivers that use the rdma_user_mmap
374 * interface for handling user mmapped addresses. The database is handled in
375 * the core and helper functions are provided to insert entries into the
376 * database and extract entries when the user calls mmap with the given offset.
377 * The function allocates a unique page offset that should be provided to user,
378 * the user will use the offset to retrieve information such as address to
379 * be mapped and how.
380 *
381 * Return: 0 on success and -ENOMEM on failure
382 */
383 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
384 struct rdma_user_mmap_entry *entry,
385 size_t length)
386 {
387 return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
388 U32_MAX);
389 }
390 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
Cache object: 54f91a81454489620ebfdd08fc6846af
|