The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ofed/drivers/infiniband/core/ib_fmr_pool.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
    3  *
    4  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
    5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
    6  *
    7  * This software is available to you under a choice of one of two
    8  * licenses.  You may choose to be licensed under the terms of the GNU
    9  * General Public License (GPL) Version 2, available from the file
   10  * COPYING in the main directory of this source tree, or the
   11  * OpenIB.org BSD license below:
   12  *
   13  *     Redistribution and use in source and binary forms, with or
   14  *     without modification, are permitted provided that the following
   15  *     conditions are met:
   16  *
   17  *      - Redistributions of source code must retain the above
   18  *        copyright notice, this list of conditions and the following
   19  *        disclaimer.
   20  *
   21  *      - Redistributions in binary form must reproduce the above
   22  *        copyright notice, this list of conditions and the following
   23  *        disclaimer in the documentation and/or other materials
   24  *        provided with the distribution.
   25  *
   26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   33  * SOFTWARE.
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD$");
   38 
   39 #include <linux/errno.h>
   40 #include <linux/spinlock.h>
   41 #include <linux/slab.h>
   42 #include <linux/jhash.h>
   43 #include <linux/kthread.h>
   44 #include <linux/wait.h>
   45 
   46 #include <rdma/ib_fmr_pool.h>
   47 
   48 #include "core_priv.h"
   49 
   50 #define PFX "fmr_pool: "
   51 
   52 enum {
   53         IB_FMR_MAX_REMAPS = 32,
   54 
   55         IB_FMR_HASH_BITS  = 8,
   56         IB_FMR_HASH_SIZE  = 1 << IB_FMR_HASH_BITS,
   57         IB_FMR_HASH_MASK  = IB_FMR_HASH_SIZE - 1
   58 };
   59 
   60 /*
   61  * If an FMR is not in use, then the list member will point to either
   62  * its pool's free_list (if the FMR can be mapped again; that is,
   63  * remap_count < pool->max_remaps) or its pool's dirty_list (if the
   64  * FMR needs to be unmapped before being remapped).  In either of
   65  * these cases it is a bug if the ref_count is not 0.  In other words,
   66  * if ref_count is > 0, then the list member must not be linked into
   67  * either free_list or dirty_list.
   68  *
   69  * The cache_node member is used to link the FMR into a cache bucket
   70  * (if caching is enabled).  This is independent of the reference
   71  * count of the FMR.  When a valid FMR is released, its ref_count is
   72  * decremented, and if ref_count reaches 0, the FMR is placed in
   73  * either free_list or dirty_list as appropriate.  However, it is not
   74  * removed from the cache and may be "revived" if a call to
   75  * ib_fmr_register_physical() occurs before the FMR is remapped.  In
   76  * this case we just increment the ref_count and remove the FMR from
   77  * free_list/dirty_list.
   78  *
   79  * Before we remap an FMR from free_list, we remove it from the cache
   80  * (to prevent another user from obtaining a stale FMR).  When an FMR
   81  * is released, we add it to the tail of the free list, so that our
   82  * cache eviction policy is "least recently used."
   83  *
   84  * All manipulation of ref_count, list and cache_node is protected by
   85  * pool_lock to maintain consistency.
   86  */
   87 
   88 struct ib_fmr_pool {
   89         spinlock_t                pool_lock;
   90 
   91         int                       pool_size;
   92         int                       max_pages;
   93         int                       max_remaps;
   94         int                       dirty_watermark;
   95         int                       dirty_len;
   96         struct list_head          free_list;
   97         struct list_head          dirty_list;
   98         struct hlist_head        *cache_bucket;
   99 
  100         void                     (*flush_function)(struct ib_fmr_pool *pool,
  101                                                    void *              arg);
  102         void                     *flush_arg;
  103 
  104         struct task_struct       *thread;
  105 
  106         atomic_t                  req_ser;
  107         atomic_t                  flush_ser;
  108 
  109         wait_queue_head_t         force_wait;
  110 };
  111 
  112 static inline u32 ib_fmr_hash(u64 first_page)
  113 {
  114         return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
  115                 (IB_FMR_HASH_SIZE - 1);
  116 }
  117 
  118 /* Caller must hold pool_lock */
  119 static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
  120                                                       u64 *page_list,
  121                                                       int  page_list_len,
  122                                                       u64  io_virtual_address)
  123 {
  124         struct hlist_head *bucket;
  125         struct ib_pool_fmr *fmr;
  126 
  127         if (!pool->cache_bucket)
  128                 return NULL;
  129 
  130         bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
  131 
  132         hlist_for_each_entry(fmr, bucket, cache_node)
  133                 if (io_virtual_address == fmr->io_virtual_address &&
  134                     page_list_len      == fmr->page_list_len      &&
  135                     !memcmp(page_list, fmr->page_list,
  136                             page_list_len * sizeof *page_list))
  137                         return fmr;
  138 
  139         return NULL;
  140 }
  141 
  142 static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
  143 {
  144         int                 ret;
  145         struct ib_pool_fmr *fmr;
  146         LIST_HEAD(unmap_list);
  147         LIST_HEAD(fmr_list);
  148 
  149         spin_lock_irq(&pool->pool_lock);
  150 
  151         list_for_each_entry(fmr, &pool->dirty_list, list) {
  152                 hlist_del_init(&fmr->cache_node);
  153                 fmr->remap_count = 0;
  154                 list_add_tail(&fmr->fmr->list, &fmr_list);
  155 
  156 #ifdef DEBUG
  157                 if (fmr->ref_count !=0) {
  158                         pr_warn(PFX "Unmapping FMR %p with ref count %d\n",
  159                                 fmr, fmr->ref_count);
  160                 }
  161 #endif
  162         }
  163 
  164         list_splice_init(&pool->dirty_list, &unmap_list);
  165         pool->dirty_len = 0;
  166 
  167         spin_unlock_irq(&pool->pool_lock);
  168 
  169         if (list_empty(&unmap_list)) {
  170                 return;
  171         }
  172 
  173         ret = ib_unmap_fmr(&fmr_list);
  174         if (ret)
  175                 pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
  176 
  177         spin_lock_irq(&pool->pool_lock);
  178         list_splice(&unmap_list, &pool->free_list);
  179         spin_unlock_irq(&pool->pool_lock);
  180 }
  181 
  182 static int ib_fmr_cleanup_thread(void *pool_ptr)
  183 {
  184         struct ib_fmr_pool *pool = pool_ptr;
  185 
  186         do {
  187                 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
  188                         ib_fmr_batch_release(pool);
  189 
  190                         atomic_inc(&pool->flush_ser);
  191                         wake_up_interruptible(&pool->force_wait);
  192 
  193                         if (pool->flush_function)
  194                                 pool->flush_function(pool, pool->flush_arg);
  195                 }
  196 
  197                 set_current_state(TASK_INTERRUPTIBLE);
  198                 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
  199                     !kthread_should_stop())
  200                         schedule();
  201                 __set_current_state(TASK_RUNNING);
  202         } while (!kthread_should_stop());
  203 
  204         return 0;
  205 }
  206 
  207 /**
  208  * ib_create_fmr_pool - Create an FMR pool
  209  * @pd:Protection domain for FMRs
  210  * @params:FMR pool parameters
  211  *
  212  * Create a pool of FMRs.  Return value is pointer to new pool or
  213  * error code if creation failed.
  214  */
  215 struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
  216                                        struct ib_fmr_pool_param *params)
  217 {
  218         struct ib_device   *device;
  219         struct ib_fmr_pool *pool;
  220         int i;
  221         int ret;
  222         int max_remaps;
  223 
  224         if (!params)
  225                 return ERR_PTR(-EINVAL);
  226 
  227         device = pd->device;
  228         if (!device->alloc_fmr    || !device->dealloc_fmr  ||
  229             !device->map_phys_fmr || !device->unmap_fmr) {
  230                 pr_info(PFX "Device %s does not support FMRs\n", device->name);
  231                 return ERR_PTR(-ENOSYS);
  232         }
  233 
  234         if (!device->attrs.max_map_per_fmr)
  235                 max_remaps = IB_FMR_MAX_REMAPS;
  236         else
  237                 max_remaps = device->attrs.max_map_per_fmr;
  238 
  239         pool = kmalloc(sizeof *pool, GFP_KERNEL);
  240         if (!pool)
  241                 return ERR_PTR(-ENOMEM);
  242 
  243         pool->cache_bucket   = NULL;
  244         pool->flush_function = params->flush_function;
  245         pool->flush_arg      = params->flush_arg;
  246 
  247         INIT_LIST_HEAD(&pool->free_list);
  248         INIT_LIST_HEAD(&pool->dirty_list);
  249 
  250         if (params->cache) {
  251                 pool->cache_bucket =
  252                         kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
  253                                 GFP_KERNEL);
  254                 if (!pool->cache_bucket) {
  255                         pr_warn(PFX "Failed to allocate cache in pool\n");
  256                         ret = -ENOMEM;
  257                         goto out_free_pool;
  258                 }
  259 
  260                 for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
  261                         INIT_HLIST_HEAD(pool->cache_bucket + i);
  262         }
  263 
  264         pool->pool_size       = 0;
  265         pool->max_pages       = params->max_pages_per_fmr;
  266         pool->max_remaps      = max_remaps;
  267         pool->dirty_watermark = params->dirty_watermark;
  268         pool->dirty_len       = 0;
  269         spin_lock_init(&pool->pool_lock);
  270         atomic_set(&pool->req_ser,   0);
  271         atomic_set(&pool->flush_ser, 0);
  272         init_waitqueue_head(&pool->force_wait);
  273 
  274         pool->thread = kthread_run(ib_fmr_cleanup_thread,
  275                                    pool,
  276                                    "ib_fmr(%s)",
  277                                    device->name);
  278         if (IS_ERR(pool->thread)) {
  279                 pr_warn(PFX "couldn't start cleanup thread\n");
  280                 ret = PTR_ERR(pool->thread);
  281                 goto out_free_pool;
  282         }
  283 
  284         {
  285                 struct ib_pool_fmr *fmr;
  286                 struct ib_fmr_attr fmr_attr = {
  287                         .max_pages  = params->max_pages_per_fmr,
  288                         .max_maps   = pool->max_remaps,
  289                         .page_shift = params->page_shift
  290                 };
  291                 int bytes_per_fmr = sizeof *fmr;
  292 
  293                 if (pool->cache_bucket)
  294                         bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
  295 
  296                 for (i = 0; i < params->pool_size; ++i) {
  297                         fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
  298                         if (!fmr)
  299                                 goto out_fail;
  300 
  301                         fmr->pool             = pool;
  302                         fmr->remap_count      = 0;
  303                         fmr->ref_count        = 0;
  304                         INIT_HLIST_NODE(&fmr->cache_node);
  305 
  306                         fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
  307                         if (IS_ERR(fmr->fmr)) {
  308                                 pr_warn(PFX "fmr_create failed for FMR %d\n",
  309                                         i);
  310                                 kfree(fmr);
  311                                 goto out_fail;
  312                         }
  313 
  314                         list_add_tail(&fmr->list, &pool->free_list);
  315                         ++pool->pool_size;
  316                 }
  317         }
  318 
  319         return pool;
  320 
  321  out_free_pool:
  322         kfree(pool->cache_bucket);
  323         kfree(pool);
  324 
  325         return ERR_PTR(ret);
  326 
  327  out_fail:
  328         ib_destroy_fmr_pool(pool);
  329 
  330         return ERR_PTR(-ENOMEM);
  331 }
  332 EXPORT_SYMBOL(ib_create_fmr_pool);
  333 
  334 /**
  335  * ib_destroy_fmr_pool - Free FMR pool
  336  * @pool:FMR pool to free
  337  *
  338  * Destroy an FMR pool and free all associated resources.
  339  */
  340 void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
  341 {
  342         struct ib_pool_fmr *fmr;
  343         struct ib_pool_fmr *tmp;
  344         LIST_HEAD(fmr_list);
  345         int                 i;
  346 
  347         kthread_stop(pool->thread);
  348         ib_fmr_batch_release(pool);
  349 
  350         i = 0;
  351         list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
  352                 if (fmr->remap_count) {
  353                         INIT_LIST_HEAD(&fmr_list);
  354                         list_add_tail(&fmr->fmr->list, &fmr_list);
  355                         ib_unmap_fmr(&fmr_list);
  356                 }
  357                 ib_dealloc_fmr(fmr->fmr);
  358                 list_del(&fmr->list);
  359                 kfree(fmr);
  360                 ++i;
  361         }
  362 
  363         if (i < pool->pool_size)
  364                 pr_warn(PFX "pool still has %d regions registered\n",
  365                         pool->pool_size - i);
  366 
  367         kfree(pool->cache_bucket);
  368         kfree(pool);
  369 }
  370 EXPORT_SYMBOL(ib_destroy_fmr_pool);
  371 
  372 /**
  373  * ib_flush_fmr_pool - Invalidate all unmapped FMRs
  374  * @pool:FMR pool to flush
  375  *
  376  * Ensure that all unmapped FMRs are fully invalidated.
  377  */
  378 int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
  379 {
  380         int serial;
  381         struct ib_pool_fmr *fmr, *next;
  382 
  383         /*
  384          * The free_list holds FMRs that may have been used
  385          * but have not been remapped enough times to be dirty.
  386          * Put them on the dirty list now so that the cleanup
  387          * thread will reap them too.
  388          */
  389         spin_lock_irq(&pool->pool_lock);
  390         list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
  391                 if (fmr->remap_count > 0)
  392                         list_move(&fmr->list, &pool->dirty_list);
  393         }
  394         spin_unlock_irq(&pool->pool_lock);
  395 
  396         serial = atomic_inc_return(&pool->req_ser);
  397         wake_up_process(pool->thread);
  398 
  399         if (wait_event_interruptible(pool->force_wait,
  400                                      atomic_read(&pool->flush_ser) - serial >= 0))
  401                 return -EINTR;
  402 
  403         return 0;
  404 }
  405 EXPORT_SYMBOL(ib_flush_fmr_pool);
  406 
  407 /**
  408  * ib_fmr_pool_map_phys -
  409  * @pool:FMR pool to allocate FMR from
  410  * @page_list:List of pages to map
  411  * @list_len:Number of pages in @page_list
  412  * @io_virtual_address:I/O virtual address for new FMR
  413  *
  414  * Map an FMR from an FMR pool.
  415  */
  416 struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
  417                                          u64                *page_list,
  418                                          int                 list_len,
  419                                          u64                 io_virtual_address)
  420 {
  421         struct ib_fmr_pool *pool = pool_handle;
  422         struct ib_pool_fmr *fmr;
  423         unsigned long       flags;
  424         int                 result;
  425 
  426         if (list_len < 1 || list_len > pool->max_pages)
  427                 return ERR_PTR(-EINVAL);
  428 
  429         spin_lock_irqsave(&pool->pool_lock, flags);
  430         fmr = ib_fmr_cache_lookup(pool,
  431                                   page_list,
  432                                   list_len,
  433                                   io_virtual_address);
  434         if (fmr) {
  435                 /* found in cache */
  436                 ++fmr->ref_count;
  437                 if (fmr->ref_count == 1) {
  438                         list_del(&fmr->list);
  439                 }
  440 
  441                 spin_unlock_irqrestore(&pool->pool_lock, flags);
  442 
  443                 return fmr;
  444         }
  445 
  446         if (list_empty(&pool->free_list)) {
  447                 spin_unlock_irqrestore(&pool->pool_lock, flags);
  448                 return ERR_PTR(-EAGAIN);
  449         }
  450 
  451         fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
  452         list_del(&fmr->list);
  453         hlist_del_init(&fmr->cache_node);
  454         spin_unlock_irqrestore(&pool->pool_lock, flags);
  455 
  456         result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
  457                                  io_virtual_address);
  458 
  459         if (result) {
  460                 spin_lock_irqsave(&pool->pool_lock, flags);
  461                 list_add(&fmr->list, &pool->free_list);
  462                 spin_unlock_irqrestore(&pool->pool_lock, flags);
  463 
  464                 pr_warn(PFX "fmr_map returns %d\n", result);
  465 
  466                 return ERR_PTR(result);
  467         }
  468 
  469         ++fmr->remap_count;
  470         fmr->ref_count = 1;
  471 
  472         if (pool->cache_bucket) {
  473                 fmr->io_virtual_address = io_virtual_address;
  474                 fmr->page_list_len      = list_len;
  475                 memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
  476 
  477                 spin_lock_irqsave(&pool->pool_lock, flags);
  478                 hlist_add_head(&fmr->cache_node,
  479                                pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
  480                 spin_unlock_irqrestore(&pool->pool_lock, flags);
  481         }
  482 
  483         return fmr;
  484 }
  485 EXPORT_SYMBOL(ib_fmr_pool_map_phys);
  486 
  487 /**
  488  * ib_fmr_pool_unmap - Unmap FMR
  489  * @fmr:FMR to unmap
  490  *
  491  * Unmap an FMR.  The FMR mapping may remain valid until the FMR is
  492  * reused (or until ib_flush_fmr_pool() is called).
  493  */
  494 int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
  495 {
  496         struct ib_fmr_pool *pool;
  497         unsigned long flags;
  498 
  499         pool = fmr->pool;
  500 
  501         spin_lock_irqsave(&pool->pool_lock, flags);
  502 
  503         --fmr->ref_count;
  504         if (!fmr->ref_count) {
  505                 if (fmr->remap_count < pool->max_remaps) {
  506                         list_add_tail(&fmr->list, &pool->free_list);
  507                 } else {
  508                         list_add_tail(&fmr->list, &pool->dirty_list);
  509                         if (++pool->dirty_len >= pool->dirty_watermark) {
  510                                 atomic_inc(&pool->req_ser);
  511                                 wake_up_process(pool->thread);
  512                         }
  513                 }
  514         }
  515 
  516 #ifdef DEBUG
  517         if (fmr->ref_count < 0)
  518                 pr_warn(PFX "FMR %p has ref count %d < 0\n",
  519                         fmr, fmr->ref_count);
  520 #endif
  521 
  522         spin_unlock_irqrestore(&pool->pool_lock, flags);
  523 
  524         return 0;
  525 }
  526 EXPORT_SYMBOL(ib_fmr_pool_unmap);

Cache object: 9a04bf1d49d1ff14011743f30e591b32


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.