The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
    3  *
    4  * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
    5  *
    6  * This software is available to you under a choice of one of two
    7  * licenses.  You may choose to be licensed under the terms of the GNU
    8  * General Public License (GPL) Version 2, available from the file
    9  * COPYING in the main directory of this source tree, or the
   10  * OpenIB.org BSD license below:
   11  *
   12  *     Redistribution and use in source and binary forms, with or
   13  *     without modification, are permitted provided that the following
   14  *     conditions are met:
   15  *
   16  *      - Redistributions of source code must retain the above
   17  *        copyright notice, this list of conditions and the following
   18  *        disclaimer.
   19  *
   20  *      - Redistributions in binary form must reproduce the above
   21  *        copyright notice, this list of conditions and the following
   22  *        disclaimer in the documentation and/or other materials
   23  *        provided with the distribution.
   24  *
   25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   32  * SOFTWARE.
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 #include "core_priv.h"
   39 #include <sys/eventhandler.h>
   40 
   41 #include <linux/in.h>
   42 #include <linux/in6.h>
   43 #include <linux/rcupdate.h>
   44 
   45 #include <rdma/ib_cache.h>
   46 #include <rdma/ib_addr.h>
   47 
   48 #include <netinet6/scope6_var.h>
   49 
   50 static struct workqueue_struct *roce_gid_mgmt_wq;
   51 
   52 enum gid_op_type {
   53         GID_DEL = 0,
   54         GID_ADD
   55 };
   56 
   57 struct roce_netdev_event_work {
   58         struct work_struct work;
   59         struct ifnet *ndev;
   60 };
   61 
   62 struct roce_rescan_work {
   63         struct work_struct      work;
   64         struct ib_device        *ib_dev;
   65 };
   66 
   67 static const struct {
   68         bool (*is_supported)(const struct ib_device *device, u8 port_num);
   69         enum ib_gid_type gid_type;
   70 } PORT_CAP_TO_GID_TYPE[] = {
   71         {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
   72         {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
   73 };
   74 
   75 #define CAP_TO_GID_TABLE_SIZE   ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
   76 
   77 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
   78 {
   79         int i;
   80         unsigned int ret_flags = 0;
   81 
   82         if (!rdma_protocol_roce(ib_dev, port))
   83                 return 1UL << IB_GID_TYPE_IB;
   84 
   85         for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
   86                 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
   87                         ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
   88 
   89         return ret_flags;
   90 }
   91 EXPORT_SYMBOL(roce_gid_type_mask_support);
   92 
   93 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
   94     u8 port, union ib_gid *gid, struct ifnet *ndev)
   95 {
   96         int i;
   97         unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
   98         struct ib_gid_attr gid_attr;
   99 
  100         memset(&gid_attr, 0, sizeof(gid_attr));
  101         gid_attr.ndev = ndev;
  102 
  103         for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
  104                 if ((1UL << i) & gid_type_mask) {
  105                         gid_attr.gid_type = i;
  106                         switch (gid_op) {
  107                         case GID_ADD:
  108                                 ib_cache_gid_add(ib_dev, port,
  109                                                  gid, &gid_attr);
  110                                 break;
  111                         case GID_DEL:
  112                                 ib_cache_gid_del(ib_dev, port,
  113                                                  gid, &gid_attr);
  114                                 break;
  115                         }
  116                 }
  117         }
  118 }
  119 
  120 static int
  121 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
  122     struct ifnet *idev, void *cookie)
  123 {
  124         struct ifnet *ndev = (struct ifnet *)cookie;
  125         if (idev == NULL)
  126                 return (0);
  127         return (ndev == idev);
  128 }
  129 
  130 static int
  131 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
  132     struct ifnet *idev, void *cookie)
  133 {
  134         if (idev == NULL)
  135                 return (0);
  136         return (1);
  137 }
  138 
  139 static int
  140 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
  141     u8 port, struct ifnet *idev)
  142 {
  143         unsigned long gid_type_mask;
  144 
  145         gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
  146 
  147         ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
  148                                      IB_CACHE_GID_DEFAULT_MODE_SET);
  149 
  150         return (hweight_long(gid_type_mask));
  151 }
  152 
  153 static void
  154 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
  155     struct ifnet *ndev, void *cookie)
  156 {
  157         struct ipx_entry {
  158                 STAILQ_ENTRY(ipx_entry) entry;
  159                 union ipx_addr {
  160                         struct sockaddr sa[0];
  161                         struct sockaddr_in v4;
  162                         struct sockaddr_in6 v6;
  163                 } ipx_addr;
  164                 struct ifnet *ndev;
  165         };
  166         struct ipx_entry *entry;
  167         struct ifnet *idev;
  168 #if defined(INET) || defined(INET6)
  169         struct ifaddr *ifa;
  170 #endif
  171         VNET_ITERATOR_DECL(vnet_iter);
  172         struct ib_gid_attr gid_attr;
  173         union ib_gid gid;
  174         int default_gids;
  175         u16 index_num;
  176         int i;
  177 
  178         STAILQ_HEAD(, ipx_entry) ipx_head;
  179 
  180         STAILQ_INIT(&ipx_head);
  181 
  182         /* make sure default GIDs are in */
  183         default_gids = roce_gid_enum_netdev_default(device, port, ndev);
  184 
  185         VNET_LIST_RLOCK();
  186         VNET_FOREACH(vnet_iter) {
  187             CURVNET_SET(vnet_iter);
  188             IFNET_RLOCK();
  189             CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) {
  190                 struct epoch_tracker et;
  191 
  192                 if (idev != ndev) {
  193                         if (idev->if_type != IFT_L2VLAN)
  194                                 continue;
  195                         if (ndev != rdma_vlan_dev_real_dev(idev))
  196                                 continue;
  197                 }
  198 
  199                 /* clone address information for IPv4 and IPv6 */
  200                 NET_EPOCH_ENTER(et);
  201 #if defined(INET)
  202                 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
  203                         if (ifa->ifa_addr == NULL ||
  204                             ifa->ifa_addr->sa_family != AF_INET)
  205                                 continue;
  206                         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
  207                         if (entry == NULL) {
  208                                 pr_warn("roce_gid_update_addr_callback: "
  209                                     "couldn't allocate entry for IPv4 update\n");
  210                                 continue;
  211                         }
  212                         entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
  213                         entry->ndev = idev;
  214                         STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
  215                 }
  216 #endif
  217 #if defined(INET6)
  218                 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
  219                         if (ifa->ifa_addr == NULL ||
  220                             ifa->ifa_addr->sa_family != AF_INET6)
  221                                 continue;
  222                         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
  223                         if (entry == NULL) {
  224                                 pr_warn("roce_gid_update_addr_callback: "
  225                                     "couldn't allocate entry for IPv6 update\n");
  226                                 continue;
  227                         }
  228                         entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
  229                         entry->ndev = idev;
  230 
  231                         /* trash IPv6 scope ID */
  232                         sa6_recoverscope(&entry->ipx_addr.v6);
  233                         entry->ipx_addr.v6.sin6_scope_id = 0;
  234 
  235                         STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
  236                 }
  237 #endif
  238                 NET_EPOCH_EXIT(et);
  239             }
  240             IFNET_RUNLOCK();
  241             CURVNET_RESTORE();
  242         }
  243         VNET_LIST_RUNLOCK();
  244 
  245         /* add missing GIDs, if any */
  246         STAILQ_FOREACH(entry, &ipx_head, entry) {
  247                 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
  248 
  249                 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
  250                         continue;
  251 
  252                 for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
  253                         if (!((1UL << i) & gid_type_mask))
  254                                 continue;
  255                         /* check if entry found */
  256                         if (ib_find_cached_gid_by_port(device, &gid, i,
  257                             port, entry->ndev, &index_num) == 0)
  258                                 break;
  259                 }
  260                 if (i != IB_GID_TYPE_SIZE)
  261                         continue;
  262                 /* add new GID */
  263                 update_gid(GID_ADD, device, port, &gid, entry->ndev);
  264         }
  265 
  266         /* remove stale GIDs, if any */
  267         for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
  268                 union ipx_addr ipx;
  269 
  270                 /* check for valid network device pointer */
  271                 ndev = gid_attr.ndev;
  272                 if (ndev == NULL)
  273                         continue;
  274                 dev_put(ndev);
  275 
  276                 /* don't delete empty entries */
  277                 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
  278                         continue;
  279 
  280                 /* zero default */
  281                 memset(&ipx, 0, sizeof(ipx));
  282 
  283                 rdma_gid2ip(&ipx.sa[0], &gid);
  284 
  285                 STAILQ_FOREACH(entry, &ipx_head, entry) {
  286                         if (entry->ndev == ndev &&
  287                             memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
  288                                 break;
  289                 }
  290                 /* check if entry found */
  291                 if (entry != NULL)
  292                         continue;
  293 
  294                 /* remove GID */
  295                 update_gid(GID_DEL, device, port, &gid, ndev);
  296         }
  297 
  298         while ((entry = STAILQ_FIRST(&ipx_head))) {
  299                 STAILQ_REMOVE_HEAD(&ipx_head, entry);
  300                 kfree(entry);
  301         }
  302 }
  303 
  304 static void
  305 roce_gid_queue_scan_event_handler(struct work_struct *_work)
  306 {
  307         struct roce_netdev_event_work *work =
  308                 container_of(_work, struct roce_netdev_event_work, work);
  309 
  310         ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
  311             roce_gid_update_addr_callback, NULL);
  312 
  313         dev_put(work->ndev);
  314         kfree(work);
  315 }
  316 
  317 static void
  318 roce_gid_queue_scan_event(struct ifnet *ndev)
  319 {
  320         struct roce_netdev_event_work *work;
  321 
  322 retry:
  323         switch (ndev->if_type) {
  324         case IFT_ETHER:
  325                 break;
  326         case IFT_L2VLAN:
  327                 ndev = rdma_vlan_dev_real_dev(ndev);
  328                 if (ndev != NULL)
  329                         goto retry;
  330                 /* FALLTHROUGH */
  331         default:
  332                 return;
  333         }
  334 
  335         work = kmalloc(sizeof(*work), GFP_ATOMIC);
  336         if (!work) {
  337                 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
  338                 return;
  339         }
  340 
  341         INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
  342         dev_hold(ndev);
  343 
  344         work->ndev = ndev;
  345 
  346         queue_work(roce_gid_mgmt_wq, &work->work);
  347 }
  348 
  349 static void
  350 roce_gid_delete_all_event_handler(struct work_struct *_work)
  351 {
  352         struct roce_netdev_event_work *work =
  353                 container_of(_work, struct roce_netdev_event_work, work);
  354 
  355         ib_cache_gid_del_all_by_netdev(work->ndev);
  356         dev_put(work->ndev);
  357         kfree(work);
  358 }
  359 
  360 static void
  361 roce_gid_delete_all_event(struct ifnet *ndev)
  362 {
  363         struct roce_netdev_event_work *work;
  364 
  365         work = kmalloc(sizeof(*work), GFP_ATOMIC);
  366         if (!work) {
  367                 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
  368                 return;
  369         }
  370 
  371         INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
  372         dev_hold(ndev);
  373         work->ndev = ndev;
  374         queue_work(roce_gid_mgmt_wq, &work->work);
  375 
  376         /* make sure job is complete before returning */
  377         flush_workqueue(roce_gid_mgmt_wq);
  378 }
  379 
  380 static int
  381 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
  382 {
  383         struct ifnet *ndev = netdev_notifier_info_to_ifp(ptr);
  384 
  385         switch (event) {
  386         case NETDEV_UNREGISTER:
  387                 roce_gid_delete_all_event(ndev);
  388                 break;
  389         case NETDEV_REGISTER:
  390         case NETDEV_CHANGEADDR:
  391         case NETDEV_CHANGEIFADDR:
  392                 roce_gid_queue_scan_event(ndev);
  393                 break;
  394         default:
  395                 break;
  396         }
  397         return NOTIFY_DONE;
  398 }
  399 
  400 static struct notifier_block nb_inetaddr = {
  401         .notifier_call = inetaddr_event
  402 };
  403 
  404 static eventhandler_tag eh_ifnet_event;
  405 
  406 static void
  407 roce_ifnet_event(void *arg, struct ifnet *ifp, int event)
  408 {
  409         if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
  410                 return;
  411 
  412         /* make sure GID table is reloaded */
  413         roce_gid_delete_all_event(ifp);
  414         roce_gid_queue_scan_event(ifp);
  415 }
  416 
  417 static void
  418 roce_rescan_device_handler(struct work_struct *_work)
  419 {
  420         struct roce_rescan_work *work =
  421             container_of(_work, struct roce_rescan_work, work);
  422 
  423         ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
  424             roce_gid_update_addr_callback, NULL);
  425         kfree(work);
  426 }
  427 
  428 /* Caller must flush system workqueue before removing the ib_device */
  429 int roce_rescan_device(struct ib_device *ib_dev)
  430 {
  431         struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
  432 
  433         if (!work)
  434                 return -ENOMEM;
  435 
  436         work->ib_dev = ib_dev;
  437         INIT_WORK(&work->work, roce_rescan_device_handler);
  438         queue_work(roce_gid_mgmt_wq, &work->work);
  439 
  440         return 0;
  441 }
  442 
  443 int __init roce_gid_mgmt_init(void)
  444 {
  445         roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
  446         if (!roce_gid_mgmt_wq) {
  447                 pr_warn("roce_gid_mgmt: can't allocate work queue\n");
  448                 return -ENOMEM;
  449         }
  450 
  451         register_inetaddr_notifier(&nb_inetaddr);
  452 
  453         /*
  454          * We rely on the netdevice notifier to enumerate all existing
  455          * devices in the system. Register to this notifier last to
  456          * make sure we will not miss any IP add/del callbacks.
  457          */
  458         register_netdevice_notifier(&nb_inetaddr);
  459 
  460         eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
  461             roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
  462 
  463         return 0;
  464 }
  465 
  466 void __exit roce_gid_mgmt_cleanup(void)
  467 {
  468 
  469         if (eh_ifnet_event != NULL)
  470                 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
  471 
  472         unregister_inetaddr_notifier(&nb_inetaddr);
  473         unregister_netdevice_notifier(&nb_inetaddr);
  474 
  475         /*
  476          * Ensure all gid deletion tasks complete before we go down,
  477          * to avoid any reference to free'd memory. By the time
  478          * ib-core is removed, all physical devices have been removed,
  479          * so no issue with remaining hardware contexts.
  480          */
  481         synchronize_rcu();
  482         drain_workqueue(roce_gid_mgmt_wq);
  483         destroy_workqueue(roce_gid_mgmt_wq);
  484 }

Cache object: d0d5ec455ba16b77052bdebd96c41f0f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.