The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
    3  *
    4  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
    5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
    6  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
    7  *
    8  * This software is available to you under a choice of one of two
    9  * licenses.  You may choose to be licensed under the terms of the GNU
   10  * General Public License (GPL) Version 2, available from the file
   11  * COPYING in the main directory of this source tree, or the
   12  * OpenIB.org BSD license below:
   13  *
   14  *     Redistribution and use in source and binary forms, with or
   15  *     without modification, are permitted provided that the following
   16  *     conditions are met:
   17  *
   18  *      - Redistributions of source code must retain the above
   19  *        copyright notice, this list of conditions and the following
   20  *        disclaimer.
   21  *
   22  *      - Redistributions in binary form must reproduce the above
   23  *        copyright notice, this list of conditions and the following
   24  *        disclaimer in the documentation and/or other materials
   25  *        provided with the distribution.
   26  *
   27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   34  * SOFTWARE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include "ipoib.h"
   41 #include <sys/eventhandler.h>
   42 
   43 #include <linux/module.h>
   44 
   45 #include <linux/slab.h>
   46 #include <linux/kernel.h>
   47 #include <linux/vmalloc.h>
   48 
   49 #include <linux/if_vlan.h>
   50 
   51 #include <net/infiniband.h>
   52 
   53 #include <rdma/ib_addr.h>
   54 #include <rdma/ib_cache.h>
   55 
   56 MODULE_AUTHOR("Roland Dreier");
   57 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
   58 MODULE_LICENSE("Dual BSD/GPL");
   59 
   60 int ipoib_sendq_size = IPOIB_TX_RING_SIZE;
   61 int ipoib_recvq_size = IPOIB_RX_RING_SIZE;
   62 
   63 module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
   64 MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
   65 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
   66 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
   67 
   68 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
   69 int ipoib_debug_level = 1;
   70 
   71 module_param_named(debug_level, ipoib_debug_level, int, 0644);
   72 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
   73 #endif
   74 
   75 struct ipoib_path_iter {
   76         struct ipoib_dev_priv *priv;
   77         struct ipoib_path  path;
   78 };
   79 
   80 static const u8 ipv4_bcast_addr[] = {
   81         0x00, 0xff, 0xff, 0xff,
   82         0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
   83         0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
   84 };
   85 
   86 struct workqueue_struct *ipoib_workqueue;
   87 
   88 struct ib_sa_client ipoib_sa_client;
   89 
   90 static void ipoib_add_one(struct ib_device *device);
   91 static void ipoib_remove_one(struct ib_device *device, void *client_data);
   92 static struct ifnet *ipoib_get_net_dev_by_params(
   93                 struct ib_device *dev, u8 port, u16 pkey,
   94                 const union ib_gid *gid, const struct sockaddr *addr,
   95                 void *client_data);
   96 static void ipoib_start(struct ifnet *dev);
   97 static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
   98 
   99 static struct unrhdr *ipoib_unrhdr;
  100 
  101 static void
  102 ipoib_unrhdr_init(void *arg)
  103 {
  104 
  105         ipoib_unrhdr = new_unrhdr(0, 65535, NULL);
  106 }
  107 SYSINIT(ipoib_unrhdr_init, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_init, NULL);
  108 
  109 static void
  110 ipoib_unrhdr_uninit(void *arg)
  111 {
  112 
  113         if (ipoib_unrhdr != NULL) {
  114                 struct unrhdr *hdr;
  115 
  116                 hdr = ipoib_unrhdr;
  117                 ipoib_unrhdr = NULL;
  118 
  119                 delete_unrhdr(hdr);
  120         }
  121 }
  122 SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL);
  123 
  124 static struct ib_client ipoib_client = {
  125         .name   = "ipoib",
  126         .add    = ipoib_add_one,
  127         .remove = ipoib_remove_one,
  128         .get_net_dev_by_params = ipoib_get_net_dev_by_params,
  129 };
  130 
  131 int
  132 ipoib_open(struct ipoib_dev_priv *priv)
  133 {
  134         struct ifnet *dev = priv->dev;
  135 
  136         ipoib_dbg(priv, "bringing up interface\n");
  137 
  138         set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
  139 
  140         if (ipoib_pkey_dev_delay_open(priv))
  141                 return 0;
  142 
  143         if (ipoib_ib_dev_open(priv))
  144                 goto err_disable;
  145 
  146         if (ipoib_ib_dev_up(priv))
  147                 goto err_stop;
  148 
  149         if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
  150                 struct ipoib_dev_priv *cpriv;
  151 
  152                 /* Bring up any child interfaces too */
  153                 mutex_lock(&priv->vlan_mutex);
  154                 list_for_each_entry(cpriv, &priv->child_intfs, list)
  155                         if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
  156                                 ipoib_open(cpriv);
  157                 mutex_unlock(&priv->vlan_mutex);
  158         }
  159         dev->if_drv_flags |= IFF_DRV_RUNNING;
  160         dev->if_drv_flags &= ~IFF_DRV_OACTIVE;
  161 
  162         return 0;
  163 
  164 err_stop:
  165         ipoib_ib_dev_stop(priv, 1);
  166 
  167 err_disable:
  168         clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
  169 
  170         return -EINVAL;
  171 }
  172 
  173 static void
  174 ipoib_init(void *arg)
  175 {
  176         struct ifnet *dev;
  177         struct ipoib_dev_priv *priv;
  178 
  179         priv = arg;
  180         dev = priv->dev;
  181         if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
  182                 ipoib_open(priv);
  183         queue_work(ipoib_workqueue, &priv->flush_light);
  184 }
  185 
  186 
  187 static int
  188 ipoib_stop(struct ipoib_dev_priv *priv)
  189 {
  190         struct ifnet *dev = priv->dev;
  191 
  192         ipoib_dbg(priv, "stopping interface\n");
  193 
  194         clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
  195 
  196         dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
  197 
  198         ipoib_ib_dev_down(priv, 0);
  199         ipoib_ib_dev_stop(priv, 0);
  200 
  201         if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
  202                 struct ipoib_dev_priv *cpriv;
  203 
  204                 /* Bring down any child interfaces too */
  205                 mutex_lock(&priv->vlan_mutex);
  206                 list_for_each_entry(cpriv, &priv->child_intfs, list)
  207                         if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0)
  208                                 ipoib_stop(cpriv);
  209                 mutex_unlock(&priv->vlan_mutex);
  210         }
  211 
  212         return 0;
  213 }
  214 
  215 static int
  216 ipoib_propagate_ifnet_mtu(struct ipoib_dev_priv *priv, int new_mtu,
  217     bool propagate)
  218 {
  219         struct ifnet *ifp;
  220         struct ifreq ifr;
  221         int error;
  222 
  223         ifp = priv->dev;
  224         if (ifp->if_mtu == new_mtu)
  225                 return (0);
  226         if (propagate) {
  227                 strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
  228                 ifr.ifr_mtu = new_mtu;
  229                 CURVNET_SET(ifp->if_vnet);
  230                 error = ifhwioctl(SIOCSIFMTU, ifp, (caddr_t)&ifr, curthread);
  231                 CURVNET_RESTORE();
  232         } else {
  233                 ifp->if_mtu = new_mtu;
  234                 error = 0;
  235         }
  236         return (error);
  237 }
  238 
  239 int
  240 ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate)
  241 {
  242         int error, prev_admin_mtu;
  243 
  244         /* dev->if_mtu > 2K ==> connected mode */
  245         if (ipoib_cm_admin_enabled(priv)) {
  246                 if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)))
  247                         return -EINVAL;
  248 
  249                 if (new_mtu > priv->mcast_mtu)
  250                         ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
  251                                    priv->mcast_mtu);
  252 
  253                 return (ipoib_propagate_ifnet_mtu(priv, new_mtu, propagate));
  254         }
  255 
  256         if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
  257                 return -EINVAL;
  258 
  259         prev_admin_mtu = priv->admin_mtu;
  260         priv->admin_mtu = new_mtu;
  261         error = ipoib_propagate_ifnet_mtu(priv, min(priv->mcast_mtu,
  262             priv->admin_mtu), propagate);
  263         if (error == 0) {
  264                 /* check for MTU change to avoid infinite loop */
  265                 if (prev_admin_mtu != new_mtu)
  266                         queue_work(ipoib_workqueue, &priv->flush_light);
  267         } else
  268                 priv->admin_mtu = prev_admin_mtu;
  269         return (error);
  270 }
  271 
  272 static int
  273 ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
  274 {
  275         struct ipoib_dev_priv *priv = ifp->if_softc;
  276         struct ifaddr *ifa = (struct ifaddr *) data;
  277         struct ifreq *ifr = (struct ifreq *) data;
  278         int error = 0;
  279 
  280         /* check if detaching */
  281         if (priv == NULL)
  282                 return (ENXIO);
  283         /* wait for device to become ready, if any */
  284         while (priv->gone == 2)
  285                 pause("W", 1);
  286         /* check for device gone */
  287         if (priv->gone != 0)
  288                 return (ENXIO);
  289 
  290         switch (command) {
  291         case SIOCSIFFLAGS:
  292                 if (ifp->if_flags & IFF_UP) {
  293                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
  294                                 error = -ipoib_open(priv);
  295                 } else
  296                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
  297                                 ipoib_stop(priv);
  298                 break;
  299         case SIOCADDMULTI:
  300         case SIOCDELMULTI:
  301                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
  302                         queue_work(ipoib_workqueue, &priv->restart_task);
  303                 break;
  304         case SIOCSIFADDR:
  305                 ifp->if_flags |= IFF_UP;
  306 
  307                 switch (ifa->ifa_addr->sa_family) {
  308 #ifdef INET
  309                 case AF_INET:
  310                         ifp->if_init(ifp->if_softc);    /* before arpwhohas */
  311                         arp_ifinit(ifp, ifa);
  312                         break;
  313 #endif
  314                 default:
  315                         ifp->if_init(ifp->if_softc);
  316                         break;
  317                 }
  318                 break;
  319 
  320         case SIOCGIFADDR:
  321                         bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
  322                             INFINIBAND_ALEN);
  323                 break;
  324 
  325         case SIOCSIFMTU:
  326                 /*
  327                  * Set the interface MTU.
  328                  */
  329                 error = -ipoib_change_mtu(priv, ifr->ifr_mtu, false);
  330                 break;
  331         default:
  332                 error = EINVAL;
  333                 break;
  334         }
  335         return (error);
  336 }
  337 
  338 
  339 static struct ipoib_path *
  340 __path_find(struct ipoib_dev_priv *priv, void *gid)
  341 {
  342         struct rb_node *n = priv->path_tree.rb_node;
  343         struct ipoib_path *path;
  344         int ret;
  345 
  346         while (n) {
  347                 path = rb_entry(n, struct ipoib_path, rb_node);
  348 
  349                 ret = memcmp(gid, path->pathrec.dgid.raw,
  350                              sizeof (union ib_gid));
  351 
  352                 if (ret < 0)
  353                         n = n->rb_left;
  354                 else if (ret > 0)
  355                         n = n->rb_right;
  356                 else
  357                         return path;
  358         }
  359 
  360         return NULL;
  361 }
  362 
  363 static int
  364 __path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path)
  365 {
  366         struct rb_node **n = &priv->path_tree.rb_node;
  367         struct rb_node *pn = NULL;
  368         struct ipoib_path *tpath;
  369         int ret;
  370 
  371         while (*n) {
  372                 pn = *n;
  373                 tpath = rb_entry(pn, struct ipoib_path, rb_node);
  374 
  375                 ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
  376                              sizeof (union ib_gid));
  377                 if (ret < 0)
  378                         n = &pn->rb_left;
  379                 else if (ret > 0)
  380                         n = &pn->rb_right;
  381                 else
  382                         return -EEXIST;
  383         }
  384 
  385         rb_link_node(&path->rb_node, pn, n);
  386         rb_insert_color(&path->rb_node, &priv->path_tree);
  387 
  388         list_add_tail(&path->list, &priv->path_list);
  389 
  390         return 0;
  391 }
  392 
  393 void
  394 ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path)
  395 {
  396 
  397         _IF_DRAIN(&path->queue);
  398 
  399         if (path->ah)
  400                 ipoib_put_ah(path->ah);
  401         if (ipoib_cm_get(path))
  402                 ipoib_cm_destroy_tx(ipoib_cm_get(path));
  403 
  404         kfree(path);
  405 }
  406 
  407 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
  408 
  409 struct ipoib_path_iter *
  410 ipoib_path_iter_init(struct ipoib_dev_priv *priv)
  411 {
  412         struct ipoib_path_iter *iter;
  413 
  414         iter = kmalloc(sizeof *iter, GFP_KERNEL);
  415         if (!iter)
  416                 return NULL;
  417 
  418         iter->priv = priv;
  419         memset(iter->path.pathrec.dgid.raw, 0, 16);
  420 
  421         if (ipoib_path_iter_next(iter)) {
  422                 kfree(iter);
  423                 return NULL;
  424         }
  425 
  426         return iter;
  427 }
  428 
  429 int
  430 ipoib_path_iter_next(struct ipoib_path_iter *iter)
  431 {
  432         struct ipoib_dev_priv *priv = iter->priv;
  433         struct rb_node *n;
  434         struct ipoib_path *path;
  435         int ret = 1;
  436 
  437         spin_lock_irq(&priv->lock);
  438 
  439         n = rb_first(&priv->path_tree);
  440 
  441         while (n) {
  442                 path = rb_entry(n, struct ipoib_path, rb_node);
  443 
  444                 if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
  445                            sizeof (union ib_gid)) < 0) {
  446                         iter->path = *path;
  447                         ret = 0;
  448                         break;
  449                 }
  450 
  451                 n = rb_next(n);
  452         }
  453 
  454         spin_unlock_irq(&priv->lock);
  455 
  456         return ret;
  457 }
  458 
  459 void
  460 ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path)
  461 {
  462         *path = iter->path;
  463 }
  464 
  465 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
  466 
  467 void
  468 ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv)
  469 {
  470         struct ipoib_path *path, *tp;
  471 
  472         spin_lock_irq(&priv->lock);
  473 
  474         list_for_each_entry_safe(path, tp, &priv->path_list, list) {
  475                 ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n",
  476                         be16_to_cpu(path->pathrec.dlid),
  477                         path->pathrec.dgid.raw, ":");
  478                 path->valid =  0;
  479         }
  480 
  481         spin_unlock_irq(&priv->lock);
  482 }
  483 
  484 void
  485 ipoib_flush_paths(struct ipoib_dev_priv *priv)
  486 {
  487         struct ipoib_path *path, *tp;
  488         LIST_HEAD(remove_list);
  489         unsigned long flags;
  490 
  491         spin_lock_irqsave(&priv->lock, flags);
  492 
  493         list_splice_init(&priv->path_list, &remove_list);
  494 
  495         list_for_each_entry(path, &remove_list, list)
  496                 rb_erase(&path->rb_node, &priv->path_tree);
  497 
  498         list_for_each_entry_safe(path, tp, &remove_list, list) {
  499                 if (path->query)
  500                         ib_sa_cancel_query(path->query_id, path->query);
  501                 spin_unlock_irqrestore(&priv->lock, flags);
  502                 wait_for_completion(&path->done);
  503                 ipoib_path_free(priv, path);
  504                 spin_lock_irqsave(&priv->lock, flags);
  505         }
  506 
  507         spin_unlock_irqrestore(&priv->lock, flags);
  508 }
  509 
  510 static void
  511 path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr)
  512 {
  513         struct ipoib_path *path = path_ptr;
  514         struct ipoib_dev_priv *priv = path->priv;
  515         struct ifnet *dev = priv->dev;
  516         struct ipoib_ah *ah = NULL;
  517         struct ipoib_ah *old_ah = NULL;
  518         struct epoch_tracker et;
  519         struct ifqueue mbqueue;
  520         struct mbuf *mb;
  521         unsigned long flags;
  522 
  523         if (!status)
  524                 ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n",
  525                           be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":");
  526         else
  527                 ipoib_dbg(priv, "PathRec status %d for GID %16D\n",
  528                           status, path->pathrec.dgid.raw, ":");
  529 
  530         bzero(&mbqueue, sizeof(mbqueue));
  531 
  532         if (!status) {
  533                 struct ib_ah_attr av;
  534 
  535                 if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
  536                         ah = ipoib_create_ah(priv, priv->pd, &av);
  537         }
  538 
  539         spin_lock_irqsave(&priv->lock, flags);
  540 
  541         if (ah) {
  542                 path->pathrec = *pathrec;
  543 
  544                 old_ah   = path->ah;
  545                 path->ah = ah;
  546 
  547                 ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
  548                           ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
  549 
  550                 for (;;) {
  551                         _IF_DEQUEUE(&path->queue, mb);
  552                         if (mb == NULL)
  553                                 break;
  554                         _IF_ENQUEUE(&mbqueue, mb);
  555                 }
  556 
  557 #ifdef CONFIG_INFINIBAND_IPOIB_CM
  558                 if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path))
  559                         ipoib_cm_set(path, ipoib_cm_create_tx(priv, path));
  560 #endif
  561 
  562                 path->valid = 1;
  563         }
  564 
  565         path->query = NULL;
  566         complete(&path->done);
  567 
  568         spin_unlock_irqrestore(&priv->lock, flags);
  569 
  570         if (old_ah)
  571                 ipoib_put_ah(old_ah);
  572 
  573         NET_EPOCH_ENTER(et);
  574         for (;;) {
  575                 _IF_DEQUEUE(&mbqueue, mb);
  576                 if (mb == NULL)
  577                         break;
  578                 mb->m_pkthdr.rcvif = dev;
  579                 if (dev->if_transmit(dev, mb))
  580                         ipoib_warn(priv, "dev_queue_xmit failed "
  581                                    "to requeue packet\n");
  582         }
  583         NET_EPOCH_EXIT(et);
  584 }
  585 
  586 static struct ipoib_path *
  587 path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr)
  588 {
  589         struct ipoib_path *path;
  590 
  591         if (!priv->broadcast)
  592                 return NULL;
  593 
  594         path = kzalloc(sizeof *path, GFP_ATOMIC);
  595         if (!path)
  596                 return NULL;
  597 
  598         path->priv = priv;
  599 
  600         bzero(&path->queue, sizeof(path->queue));
  601 
  602 #ifdef CONFIG_INFINIBAND_IPOIB_CM
  603         memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN);
  604 #endif
  605         memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid));
  606         path->pathrec.sgid          = priv->local_gid;
  607         path->pathrec.pkey          = cpu_to_be16(priv->pkey);
  608         path->pathrec.numb_path     = 1;
  609         path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
  610 
  611         return path;
  612 }
  613 
  614 static int
  615 path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path)
  616 {
  617         struct ifnet *dev = priv->dev;
  618 
  619         ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU;
  620         struct ib_sa_path_rec p_rec;
  621 
  622         p_rec = path->pathrec;
  623         p_rec.mtu_selector = IB_SA_GT;
  624 
  625         switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) {
  626         case 512:
  627                 p_rec.mtu = IB_MTU_256;
  628                 break;
  629         case 1024:
  630                 p_rec.mtu = IB_MTU_512;
  631                 break;
  632         case 2048:
  633                 p_rec.mtu = IB_MTU_1024;
  634                 break;
  635         case 4096:
  636                 p_rec.mtu = IB_MTU_2048;
  637                 break;
  638         default:
  639                 /* Wildcard everything */
  640                 comp_mask = 0;
  641                 p_rec.mtu = 0;
  642                 p_rec.mtu_selector = 0;
  643         }
  644 
  645         ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n",
  646                   p_rec.dgid.raw, ":",
  647                   comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0);
  648 
  649         init_completion(&path->done);
  650 
  651         path->query_id =
  652                 ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
  653                                    &p_rec, comp_mask            |
  654                                    IB_SA_PATH_REC_DGID          |
  655                                    IB_SA_PATH_REC_SGID          |
  656                                    IB_SA_PATH_REC_NUMB_PATH     |
  657                                    IB_SA_PATH_REC_TRAFFIC_CLASS |
  658                                    IB_SA_PATH_REC_PKEY,
  659                                    1000, GFP_ATOMIC,
  660                                    path_rec_completion,
  661                                    path, &path->query);
  662         if (path->query_id < 0) {
  663                 ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
  664                 path->query = NULL;
  665                 complete(&path->done);
  666                 return path->query_id;
  667         }
  668 
  669         return 0;
  670 }
  671 
  672 static void
  673 ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh)
  674 {
  675         struct ipoib_path *path;
  676 
  677         path = __path_find(priv, eh->hwaddr + 4);
  678         if (!path || !path->valid) {
  679                 int new_path = 0;
  680 
  681                 if (!path) {
  682                         path = path_rec_create(priv, eh->hwaddr);
  683                         new_path = 1;
  684                 }
  685                 if (path) {
  686                         if (_IF_QLEN(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE)
  687                                 _IF_ENQUEUE(&path->queue, mb);
  688                         else {
  689                                 if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
  690                                 m_freem(mb);
  691                         }
  692 
  693                         if (!path->query && path_rec_start(priv, path)) {
  694                                 if (new_path)
  695                                         ipoib_path_free(priv, path);
  696                                 return;
  697                         } else
  698                                 __path_add(priv, path);
  699                 } else {
  700                         if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
  701                         m_freem(mb);
  702                 }
  703 
  704                 return;
  705         }
  706 
  707         if (ipoib_cm_get(path) && ipoib_cm_up(path)) {
  708                 ipoib_cm_send(priv, mb, ipoib_cm_get(path));
  709         } else if (path->ah) {
  710                 ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr));
  711         } else if ((path->query || !path_rec_start(priv, path)) &&
  712                     path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) {
  713                 _IF_ENQUEUE(&path->queue, mb);
  714         } else {
  715                 if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
  716                 m_freem(mb);
  717         }
  718 }
  719 
  720 static int
  721 ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb)
  722 {
  723         struct ipoib_header *eh;
  724 
  725         eh = mtod(mb, struct ipoib_header *);
  726         if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
  727                 /* Add in the P_Key for multicast*/
  728                 eh->hwaddr[8] = (priv->pkey >> 8) & 0xff;
  729                 eh->hwaddr[9] = priv->pkey & 0xff;
  730 
  731                 ipoib_mcast_send(priv, eh->hwaddr + 4, mb);
  732         } else
  733                 ipoib_unicast_send(mb, priv, eh);
  734 
  735         return 0;
  736 }
  737 
  738 void
  739 ipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv)
  740 {
  741         struct mbuf *mb;
  742 
  743         assert_spin_locked(&priv->lock);
  744 
  745         while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) &&
  746             (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
  747                 IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
  748                 if (mb == NULL)
  749                         break;
  750                 INFINIBAND_BPF_MTAP(dev, mb);
  751                 ipoib_send_one(priv, mb);
  752         }
  753 }
  754 
  755 static void
  756 _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv)
  757 {
  758 
  759         if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
  760             IFF_DRV_RUNNING)
  761                 return;
  762 
  763         spin_lock(&priv->lock);
  764         ipoib_start_locked(dev, priv);
  765         spin_unlock(&priv->lock);
  766 }
  767 
  768 static void
  769 ipoib_start(struct ifnet *dev)
  770 {
  771         _ipoib_start(dev, dev->if_softc);
  772 }
  773 
  774 static void
  775 ipoib_vlan_start(struct ifnet *dev)
  776 {
  777         struct ipoib_dev_priv *priv;
  778         struct mbuf *mb;
  779 
  780         priv = VLAN_COOKIE(dev);
  781         if (priv != NULL)
  782                 return _ipoib_start(dev, priv);
  783         while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) {
  784                 IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
  785                 if (mb == NULL)
  786                         break;
  787                 m_freem(mb);
  788                 if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
  789         }
  790 }
  791 
  792 int
  793 ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port)
  794 {
  795 
  796         /* Allocate RX/TX "rings" to hold queued mbs */
  797         priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
  798                                 GFP_KERNEL);
  799         if (!priv->rx_ring) {
  800                 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
  801                        ca->name, ipoib_recvq_size);
  802                 goto out;
  803         }
  804 
  805         priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL);
  806         if (!priv->tx_ring) {
  807                 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
  808                        ca->name, ipoib_sendq_size);
  809                 goto out_rx_ring_cleanup;
  810         }
  811         memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring);
  812 
  813         /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
  814 
  815         if (ipoib_ib_dev_init(priv, ca, port))
  816                 goto out_tx_ring_cleanup;
  817 
  818         return 0;
  819 
  820 out_tx_ring_cleanup:
  821         kfree(priv->tx_ring);
  822 
  823 out_rx_ring_cleanup:
  824         kfree(priv->rx_ring);
  825 
  826 out:
  827         return -ENOMEM;
  828 }
  829 
  830 static void
  831 ipoib_ifdetach(struct ipoib_dev_priv *priv)
  832 {
  833         struct ifnet *dev;
  834 
  835         dev = priv->dev;
  836         if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
  837                 priv->gone = 1;
  838                 infiniband_ifdetach(dev);
  839         }
  840 }
  841 
  842 static void
  843 ipoib_detach(struct ipoib_dev_priv *priv)
  844 {
  845         struct ifnet *dev;
  846 
  847         dev = priv->dev;
  848         if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
  849                 if_free(dev);
  850                 free_unr(ipoib_unrhdr, priv->unit);
  851         } else
  852                 VLAN_SETCOOKIE(priv->dev, NULL);
  853 
  854         free(priv, M_TEMP);
  855 }
  856 
  857 void
  858 ipoib_dev_cleanup(struct ipoib_dev_priv *priv)
  859 {
  860         struct ipoib_dev_priv *cpriv, *tcpriv;
  861 
  862         /* Delete any child interfaces first */
  863         list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
  864                 ipoib_ifdetach(cpriv);
  865                 ipoib_dev_cleanup(cpriv);
  866                 ipoib_detach(cpriv);
  867         }
  868 
  869         ipoib_ib_dev_cleanup(priv);
  870 
  871         kfree(priv->rx_ring);
  872         kfree(priv->tx_ring);
  873 
  874         priv->rx_ring = NULL;
  875         priv->tx_ring = NULL;
  876 }
  877 
  878 static struct ipoib_dev_priv *
  879 ipoib_priv_alloc(void)
  880 {
  881         struct ipoib_dev_priv *priv;
  882 
  883         priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK);
  884         spin_lock_init(&priv->lock);
  885         spin_lock_init(&priv->drain_lock);
  886         mutex_init(&priv->vlan_mutex);
  887         INIT_LIST_HEAD(&priv->path_list);
  888         INIT_LIST_HEAD(&priv->child_intfs);
  889         INIT_LIST_HEAD(&priv->dead_ahs);
  890         INIT_LIST_HEAD(&priv->multicast_list);
  891         INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
  892         INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
  893         INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
  894         INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
  895         INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
  896         INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
  897         INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
  898         INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
  899         memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN);
  900 
  901         return (priv);
  902 }
  903 
  904 struct ipoib_dev_priv *
  905 ipoib_intf_alloc(const char *name)
  906 {
  907         struct ipoib_dev_priv *priv;
  908         struct ifnet *dev;
  909 
  910         priv = ipoib_priv_alloc();
  911         dev = priv->dev = if_alloc(IFT_INFINIBAND);
  912         if (!dev) {
  913                 free(priv, M_TEMP);
  914                 return NULL;
  915         }
  916         dev->if_softc = priv;
  917         priv->gone = 2; /* initializing */
  918         priv->unit = alloc_unr(ipoib_unrhdr);
  919         if (priv->unit == -1) {
  920                 if_free(dev);
  921                 free(priv, M_TEMP);
  922                 return NULL;
  923         }
  924         if_initname(dev, name, priv->unit);
  925         dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
  926 
  927         infiniband_ifattach(priv->dev, NULL, priv->broadcastaddr);
  928 
  929         dev->if_init = ipoib_init;
  930         dev->if_ioctl = ipoib_ioctl;
  931         dev->if_start = ipoib_start;
  932 
  933         dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
  934 
  935         priv->dev = dev;
  936         if_link_state_change(priv->dev, LINK_STATE_DOWN);
  937 
  938         return dev->if_softc;
  939 }
  940 
  941 int
  942 ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
  943 {
  944         struct ib_device_attr *device_attr = &hca->attrs;
  945 
  946         priv->hca_caps = device_attr->device_cap_flags;
  947 
  948         priv->dev->if_hwassist = 0;
  949         priv->dev->if_capabilities = 0;
  950 
  951 #ifndef CONFIG_INFINIBAND_IPOIB_CM
  952         if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
  953                 set_bit(IPOIB_FLAG_CSUM, &priv->flags);
  954                 priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP;
  955                 priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
  956         }
  957 
  958 #if 0
  959         if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) {
  960                 priv->dev->if_capabilities |= IFCAP_TSO4;
  961                 priv->dev->if_hwassist |= CSUM_TSO;
  962         }
  963 #endif
  964 #endif
  965         priv->dev->if_capabilities |=
  966             IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE;
  967         priv->dev->if_capenable = priv->dev->if_capabilities;
  968 
  969         return 0;
  970 }
  971 
  972 
  973 static struct ifnet *
  974 ipoib_add_port(const char *format, struct ib_device *hca, u8 port)
  975 {
  976         struct ipoib_dev_priv *priv;
  977         struct ib_port_attr attr;
  978         int result = -ENOMEM;
  979 
  980         priv = ipoib_intf_alloc(format);
  981         if (!priv)
  982                 goto alloc_mem_failed;
  983 
  984         if (!ib_query_port(hca, port, &attr))
  985                 priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
  986         else {
  987                 printk(KERN_WARNING "%s: ib_query_port %d failed\n",
  988                        hca->name, port);
  989                 goto device_init_failed;
  990         }
  991 
  992         /* MTU will be reset when mcast join happens */
  993         priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
  994         priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu;
  995 
  996         result = ib_query_pkey(hca, port, 0, &priv->pkey);
  997         if (result) {
  998                 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
  999                        hca->name, port, result);
 1000                 goto device_init_failed;
 1001         }
 1002 
 1003         if (ipoib_set_dev_features(priv, hca))
 1004                 goto device_init_failed;
 1005 
 1006         /*
 1007          * Set the full membership bit, so that we join the right
 1008          * broadcast group, etc.
 1009          */
 1010         priv->pkey |= 0x8000;
 1011 
 1012         priv->broadcastaddr[8] = priv->pkey >> 8;
 1013         priv->broadcastaddr[9] = priv->pkey & 0xff;
 1014 
 1015         result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
 1016         if (result) {
 1017                 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
 1018                        hca->name, port, result);
 1019                 goto device_init_failed;
 1020         }
 1021         memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof(union ib_gid));
 1022 
 1023         result = ipoib_dev_init(priv, hca, port);
 1024         if (result < 0) {
 1025                 printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
 1026                        hca->name, port, result);
 1027                 goto device_init_failed;
 1028         }
 1029         if (ipoib_cm_admin_enabled(priv))
 1030                 priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv));
 1031 
 1032         INIT_IB_EVENT_HANDLER(&priv->event_handler,
 1033                               priv->ca, ipoib_event);
 1034         result = ib_register_event_handler(&priv->event_handler);
 1035         if (result < 0) {
 1036                 printk(KERN_WARNING "%s: ib_register_event_handler failed for "
 1037                        "port %d (ret = %d)\n",
 1038                        hca->name, port, result);
 1039                 goto event_failed;
 1040         }
 1041         if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port);
 1042 
 1043         priv->gone = 0; /* ready */
 1044 
 1045         return priv->dev;
 1046 
 1047 event_failed:
 1048         ipoib_dev_cleanup(priv);
 1049 
 1050 device_init_failed:
 1051         ipoib_ifdetach(priv);
 1052         ipoib_detach(priv);
 1053 
 1054 alloc_mem_failed:
 1055         return ERR_PTR(result);
 1056 }
 1057 
 1058 static void
 1059 ipoib_add_one(struct ib_device *device)
 1060 {
 1061         struct list_head *dev_list;
 1062         struct ifnet *dev;
 1063         struct ipoib_dev_priv *priv;
 1064         int s, e, p;
 1065 
 1066         if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 1067                 return;
 1068 
 1069         dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
 1070         if (!dev_list)
 1071                 return;
 1072 
 1073         INIT_LIST_HEAD(dev_list);
 1074 
 1075         if (device->node_type == RDMA_NODE_IB_SWITCH) {
 1076                 s = 0;
 1077                 e = 0;
 1078         } else {
 1079                 s = 1;
 1080                 e = device->phys_port_cnt;
 1081         }
 1082 
 1083         for (p = s; p <= e; ++p) {
 1084                 if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND)
 1085                         continue;
 1086                 dev = ipoib_add_port("ib", device, p);
 1087                 if (!IS_ERR(dev)) {
 1088                         priv = dev->if_softc;
 1089                         list_add_tail(&priv->list, dev_list);
 1090                 }
 1091         }
 1092 
 1093         ib_set_client_data(device, &ipoib_client, dev_list);
 1094 }
 1095 
 1096 static void
 1097 ipoib_remove_one(struct ib_device *device, void *client_data)
 1098 {
 1099         struct ipoib_dev_priv *priv, *tmp;
 1100         struct list_head *dev_list = client_data;
 1101 
 1102         if (!dev_list)
 1103                 return;
 1104 
 1105         if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 1106                 return;
 1107 
 1108         list_for_each_entry_safe(priv, tmp, dev_list, list) {
 1109                 if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND)
 1110                         continue;
 1111 
 1112                 ipoib_ifdetach(priv);
 1113                 ipoib_stop(priv);
 1114 
 1115                 ib_unregister_event_handler(&priv->event_handler);
 1116 
 1117                 flush_workqueue(ipoib_workqueue);
 1118 
 1119                 ipoib_dev_cleanup(priv);
 1120                 ipoib_detach(priv);
 1121         }
 1122 
 1123         kfree(dev_list);
 1124 }
 1125 
 1126 static int
 1127 ipoib_match_dev_addr(const struct sockaddr *addr, struct ifnet *dev)
 1128 {
 1129         struct epoch_tracker et;
 1130         struct ifaddr *ifa;
 1131         int retval = 0;
 1132 
 1133         NET_EPOCH_ENTER(et);
 1134         CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
 1135                 if (ifa->ifa_addr == NULL ||
 1136                     ifa->ifa_addr->sa_family != addr->sa_family ||
 1137                     ifa->ifa_addr->sa_len != addr->sa_len) {
 1138                         continue;
 1139                 }
 1140                 if (memcmp(ifa->ifa_addr, addr, addr->sa_len) == 0) {
 1141                         retval = 1;
 1142                         break;
 1143                 }
 1144         }
 1145         NET_EPOCH_EXIT(et);
 1146 
 1147         return (retval);
 1148 }
 1149 
 1150 /*
 1151  * ipoib_match_gid_pkey_addr - returns the number of IPoIB netdevs on
 1152  * top a given ipoib device matching a pkey_index and address, if one
 1153  * exists.
 1154  *
 1155  * @found_net_dev: contains a matching net_device if the return value
 1156  * >= 1, with a reference held.
 1157  */
 1158 static int
 1159 ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
 1160     const union ib_gid *gid, u16 pkey_index, const struct sockaddr *addr,
 1161     struct ifnet **found_net_dev)
 1162 {
 1163         struct ipoib_dev_priv *child_priv;
 1164         int matches = 0;
 1165 
 1166         if (priv->pkey_index == pkey_index &&
 1167             (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) {
 1168                 if (addr == NULL || ipoib_match_dev_addr(addr, priv->dev) != 0) {
 1169                         if (*found_net_dev == NULL) {
 1170                                 struct ifnet *net_dev;
 1171 
 1172                                 if (priv->parent != NULL)
 1173                                         net_dev = priv->parent;
 1174                                 else
 1175                                         net_dev = priv->dev;
 1176                                 *found_net_dev = net_dev;
 1177                                 dev_hold(net_dev);
 1178                         }
 1179                         matches++;
 1180                 }
 1181         }
 1182 
 1183         /* Check child interfaces */
 1184         mutex_lock(&priv->vlan_mutex);
 1185         list_for_each_entry(child_priv, &priv->child_intfs, list) {
 1186                 matches += ipoib_match_gid_pkey_addr(child_priv, gid,
 1187                     pkey_index, addr, found_net_dev);
 1188                 if (matches > 1)
 1189                         break;
 1190         }
 1191         mutex_unlock(&priv->vlan_mutex);
 1192 
 1193         return matches;
 1194 }
 1195 
 1196 /*
 1197  * __ipoib_get_net_dev_by_params - returns the number of matching
 1198  * net_devs found (between 0 and 2). Also return the matching
 1199  * net_device in the @net_dev parameter, holding a reference to the
 1200  * net_device, if the number of matches >= 1
 1201  */
 1202 static int
 1203 __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
 1204     u16 pkey_index, const union ib_gid *gid,
 1205     const struct sockaddr *addr, struct ifnet **net_dev)
 1206 {
 1207         struct ipoib_dev_priv *priv;
 1208         int matches = 0;
 1209 
 1210         *net_dev = NULL;
 1211 
 1212         list_for_each_entry(priv, dev_list, list) {
 1213                 if (priv->port != port)
 1214                         continue;
 1215 
 1216                 matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index,
 1217                     addr, net_dev);
 1218 
 1219                 if (matches > 1)
 1220                         break;
 1221         }
 1222 
 1223         return matches;
 1224 }
 1225 
 1226 static struct ifnet *
 1227 ipoib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey,
 1228     const union ib_gid *gid, const struct sockaddr *addr, void *client_data)
 1229 {
 1230         struct ifnet *net_dev;
 1231         struct list_head *dev_list = client_data;
 1232         u16 pkey_index;
 1233         int matches;
 1234         int ret;
 1235 
 1236         if (!rdma_protocol_ib(dev, port))
 1237                 return NULL;
 1238 
 1239         ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index);
 1240         if (ret)
 1241                 return NULL;
 1242 
 1243         if (!dev_list)
 1244                 return NULL;
 1245 
 1246         /* See if we can find a unique device matching the L2 parameters */
 1247         matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
 1248                                                 gid, NULL, &net_dev);
 1249 
 1250         switch (matches) {
 1251         case 0:
 1252                 return NULL;
 1253         case 1:
 1254                 return net_dev;
 1255         }
 1256 
 1257         dev_put(net_dev);
 1258 
 1259         /* Couldn't find a unique device with L2 parameters only. Use L3
 1260          * address to uniquely match the net device */
 1261         matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
 1262                                                 gid, addr, &net_dev);
 1263         switch (matches) {
 1264         case 0:
 1265                 return NULL;
 1266         default:
 1267                 dev_warn_ratelimited(&dev->dev,
 1268                                      "duplicate IP address detected\n");
 1269                 /* Fall through */
 1270         case 1:
 1271                 return net_dev;
 1272         }
 1273 }
 1274 
 1275 static void
 1276 ipoib_config_vlan(void *arg, struct ifnet *ifp, uint16_t vtag)
 1277 {
 1278         struct ipoib_dev_priv *parent;
 1279         struct ipoib_dev_priv *priv;
 1280         struct epoch_tracker et;
 1281         struct ifnet *dev;
 1282         uint16_t pkey;
 1283         int error;
 1284 
 1285         if (ifp->if_type != IFT_INFINIBAND)
 1286                 return;
 1287         NET_EPOCH_ENTER(et);
 1288         dev = VLAN_DEVAT(ifp, vtag);
 1289         NET_EPOCH_EXIT(et);
 1290         if (dev == NULL)
 1291                 return;
 1292         priv = NULL;
 1293         error = 0;
 1294         parent = ifp->if_softc;
 1295         /* We only support 15 bits of pkey. */
 1296         if (vtag & 0x8000)
 1297                 return;
 1298         pkey = vtag | 0x8000;   /* Set full membership bit. */
 1299         if (pkey == parent->pkey)
 1300                 return;
 1301         /* Check for dups */
 1302         mutex_lock(&parent->vlan_mutex);
 1303         list_for_each_entry(priv, &parent->child_intfs, list) {
 1304                 if (priv->pkey == pkey) {
 1305                         priv = NULL;
 1306                         error = EBUSY;
 1307                         goto out;
 1308                 }
 1309         }
 1310         priv = ipoib_priv_alloc();
 1311         priv->dev = dev;
 1312         priv->max_ib_mtu = parent->max_ib_mtu;
 1313         priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu;
 1314         set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
 1315         error = ipoib_set_dev_features(priv, parent->ca);
 1316         if (error)
 1317                 goto out;
 1318         priv->pkey = pkey;
 1319         priv->broadcastaddr[8] = pkey >> 8;
 1320         priv->broadcastaddr[9] = pkey & 0xff;
 1321         dev->if_broadcastaddr = priv->broadcastaddr;
 1322         error = ipoib_dev_init(priv, parent->ca, parent->port);
 1323         if (error)
 1324                 goto out;
 1325         priv->parent = parent->dev;
 1326         list_add_tail(&priv->list, &parent->child_intfs);
 1327         VLAN_SETCOOKIE(dev, priv);
 1328         dev->if_start = ipoib_vlan_start;
 1329         dev->if_drv_flags &= ~IFF_DRV_RUNNING;
 1330         dev->if_hdrlen = IPOIB_HEADER_LEN;
 1331         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 1332                 ipoib_open(priv);
 1333         mutex_unlock(&parent->vlan_mutex);
 1334         return;
 1335 out:
 1336         mutex_unlock(&parent->vlan_mutex);
 1337         if (priv)
 1338                 free(priv, M_TEMP);
 1339         if (error)
 1340                 ipoib_warn(parent,
 1341                     "failed to initialize subinterface: device %s, port %d vtag 0x%X",
 1342                     parent->ca->name, parent->port, vtag);
 1343         return;
 1344 }
 1345 
 1346 static void
 1347 ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, uint16_t vtag)
 1348 {
 1349         struct ipoib_dev_priv *parent;
 1350         struct ipoib_dev_priv *priv;
 1351         struct epoch_tracker et;
 1352         struct ifnet *dev;
 1353         uint16_t pkey;
 1354 
 1355         if (ifp->if_type != IFT_INFINIBAND)
 1356                 return;
 1357 
 1358         NET_EPOCH_ENTER(et);
 1359         dev = VLAN_DEVAT(ifp, vtag);
 1360         NET_EPOCH_EXIT(et);
 1361         if (dev)
 1362                 VLAN_SETCOOKIE(dev, NULL);
 1363         pkey = vtag | 0x8000;
 1364         parent = ifp->if_softc;
 1365         mutex_lock(&parent->vlan_mutex);
 1366         list_for_each_entry(priv, &parent->child_intfs, list) {
 1367                 if (priv->pkey == pkey) {
 1368                         ipoib_dev_cleanup(priv);
 1369                         list_del(&priv->list);
 1370                         break;
 1371                 }
 1372         }
 1373         mutex_unlock(&parent->vlan_mutex);
 1374 }
 1375 
 1376 eventhandler_tag ipoib_vlan_attach;
 1377 eventhandler_tag ipoib_vlan_detach;
 1378 
 1379 static int __init
 1380 ipoib_init_module(void)
 1381 {
 1382         int ret;
 1383 
 1384         ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
 1385         ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
 1386         ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
 1387 
 1388         ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
 1389         ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
 1390         ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
 1391                                                      IPOIB_MIN_QUEUE_SIZE));
 1392 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 1393         ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 1394 #endif
 1395 
 1396         ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 1397                 ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST);
 1398         ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 1399                 ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST);
 1400 
 1401         /*
 1402          * We create our own workqueue mainly because we want to be
 1403          * able to flush it when devices are being removed.  We can't
 1404          * use schedule_work()/flush_scheduled_work() because both
 1405          * unregister_netdev() and linkwatch_event take the rtnl lock,
 1406          * so flush_scheduled_work() can deadlock during device
 1407          * removal.
 1408          */
 1409         ipoib_workqueue = create_singlethread_workqueue("ipoib");
 1410         if (!ipoib_workqueue) {
 1411                 ret = -ENOMEM;
 1412                 goto err_fs;
 1413         }
 1414 
 1415         ib_sa_register_client(&ipoib_sa_client);
 1416 
 1417         ret = ib_register_client(&ipoib_client);
 1418         if (ret)
 1419                 goto err_sa;
 1420 
 1421         return 0;
 1422 
 1423 err_sa:
 1424         ib_sa_unregister_client(&ipoib_sa_client);
 1425         destroy_workqueue(ipoib_workqueue);
 1426 
 1427 err_fs:
 1428         return ret;
 1429 }
 1430 
 1431 static void __exit
 1432 ipoib_cleanup_module(void)
 1433 {
 1434 
 1435         EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach);
 1436         EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach);
 1437         ib_unregister_client(&ipoib_client);
 1438         ib_sa_unregister_client(&ipoib_sa_client);
 1439         destroy_workqueue(ipoib_workqueue);
 1440 }
 1441 module_init_order(ipoib_init_module, SI_ORDER_FIFTH);
 1442 module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH);
 1443 
 1444 static int
 1445 ipoib_evhand(module_t mod, int event, void *arg)
 1446 {
 1447         return (0);
 1448 }
 1449 
 1450 static moduledata_t ipoib_mod = {
 1451         .name = "ipoib",
 1452         .evhand = ipoib_evhand,
 1453 };
 1454 
 1455 DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY);
 1456 MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
 1457 MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1);
 1458 MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);

Cache object: c404b5950c78d27440a3b45d3d843133


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.